fix(firmware): upgrade NimBLE to 2.x + DNS fallback for unreliable resolvers
NimBLE-Arduino 1.4.2 had an init/fire race in its FreeRTOS callout porting layer where os_callout_timer_cb dispatched a queued TimerHandle expiry against a not-yet-initialized event (NULL fn pointer), causing PC=0 InstrFetchProhibited within ~1s of boot when the camera task starved the timer service. Confirmed by ets_printf instrumentation. Upgrading to ^2.0.0 rewrites the porting layer and eliminates the race; verified clean on the customer network for 1+ hour. Also rolls in DNS-resilience work that surfaced the BLE crash during provisioning: pin lwIP/esp-netif resolvers to 1.1.1.1/8.8.8.8 across DHCP renewals, add three-tier resolver fallback in reporter with a hardcoded IP of last resort, and switch to raw WiFiClient with manual Host header to bypass HTTPClient's brittle DNS path. Migration touches for NimBLE 2.x: - NimBLEAdvertisedDeviceCallbacks -> NimBLEScanCallbacks - onResult signature now takes const NimBLEAdvertisedDevice* - setAdvertisedDeviceCallbacks -> setScanCallbacks - start(0, nullptr, false) -> start(0, false, false) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -9,8 +9,66 @@ uint32_t net_guard_next_backoff_ms(uint32_t attempt) {
|
||||
#ifdef ARDUINO
|
||||
#include "config.h"
|
||||
#include <WiFi.h>
|
||||
#include <Arduino.h>
|
||||
#include <lwip/dns.h>
|
||||
#include <esp_netif.h>
|
||||
#include "event_log.h"
|
||||
|
||||
// Both lwIP's ip_addr_t and esp-netif's esp_ip_addr_t alias the same on-disk
|
||||
// layout for IPv4, but the C++ types differ. Take the raw u32 to sidestep it.
|
||||
static String fmt_v4(uint32_t addr_be) {
|
||||
if (addr_be == 0) return String("0.0.0.0");
|
||||
char b[16];
|
||||
snprintf(b, sizeof(b), "%u.%u.%u.%u",
|
||||
(unsigned)((addr_be >> 0) & 0xFF),
|
||||
(unsigned)((addr_be >> 8) & 0xFF),
|
||||
(unsigned)((addr_be >> 16) & 0xFF),
|
||||
(unsigned)((addr_be >> 24) & 0xFF));
|
||||
return String(b);
|
||||
}
|
||||
|
||||
void net_guard_dump_dns(const char* tag) {
|
||||
const ip_addr_t* d0 = dns_getserver(0);
|
||||
const ip_addr_t* d1 = dns_getserver(1);
|
||||
Serial.printf("[DNS] %s lwip: %s , %s\n", tag,
|
||||
fmt_v4(d0 ? ip_2_ip4(d0)->addr : 0).c_str(),
|
||||
fmt_v4(d1 ? ip_2_ip4(d1)->addr : 0).c_str());
|
||||
|
||||
esp_netif_t* sta = esp_netif_get_handle_from_ifkey("WIFI_STA_DEF");
|
||||
if (sta) {
|
||||
esp_netif_dns_info_t main_dns{}, backup_dns{};
|
||||
esp_netif_get_dns_info(sta, ESP_NETIF_DNS_MAIN, &main_dns);
|
||||
esp_netif_get_dns_info(sta, ESP_NETIF_DNS_BACKUP, &backup_dns);
|
||||
Serial.printf("[DNS] %s netif: %s , %s\n", tag,
|
||||
fmt_v4(main_dns.ip.u_addr.ip4.addr).c_str(),
|
||||
fmt_v4(backup_dns.ip.u_addr.ip4.addr).c_str());
|
||||
} else {
|
||||
Serial.printf("[DNS] %s netif: <no STA handle>\n", tag);
|
||||
}
|
||||
}
|
||||
|
||||
void net_guard_pin_dns() {
|
||||
ip_addr_t d1, d2;
|
||||
IP_ADDR4(&d1, 1, 1, 1, 1);
|
||||
IP_ADDR4(&d2, 8, 8, 8, 8);
|
||||
dns_setserver(0, &d1);
|
||||
dns_setserver(1, &d2);
|
||||
|
||||
// Also push through the esp_netif layer. dns_setserver() writes the
|
||||
// global lwIP table directly; esp_netif_set_dns_info() is what the
|
||||
// DHCP client itself calls, so writing here prevents the next DHCP
|
||||
// event from silently overwriting our pin.
|
||||
esp_netif_t* sta = esp_netif_get_handle_from_ifkey("WIFI_STA_DEF");
|
||||
if (sta) {
|
||||
esp_netif_dns_info_t info{};
|
||||
IP_ADDR4(&info.ip, 1, 1, 1, 1);
|
||||
esp_netif_set_dns_info(sta, ESP_NETIF_DNS_MAIN, &info);
|
||||
IP_ADDR4(&info.ip, 8, 8, 8, 8);
|
||||
esp_netif_set_dns_info(sta, ESP_NETIF_DNS_BACKUP, &info);
|
||||
}
|
||||
net_guard_dump_dns("pinned");
|
||||
}
|
||||
|
||||
// Shared with the WiFi event task. 32-bit aligned loads/stores are atomic on
|
||||
// Xtensa; volatile suffices. Tick re-evaluates every loop iteration, so stale
|
||||
// reads self-correct within ~200ms.
|
||||
@@ -23,6 +81,11 @@ static volatile uint32_t s_next_retry_ms = 0;
|
||||
static void on_wifi_event(WiFiEvent_t event, WiFiEventInfo_t info) {
|
||||
switch (event) {
|
||||
case ARDUINO_EVENT_WIFI_STA_GOT_IP:
|
||||
// Override DHCP-supplied DNS. Some routers return TC=1 for short
|
||||
// answers (forcing TCP fallback that lwIP can't follow), or hand
|
||||
// out an unreachable resolver. Pin to public resolvers so
|
||||
// hostByName() never depends on the local network's DNS quality.
|
||||
net_guard_pin_dns();
|
||||
s_up = true;
|
||||
s_attempts = 0;
|
||||
s_next_retry_ms = 0;
|
||||
|
||||
@@ -21,4 +21,13 @@ uint8_t net_guard_last_disconnect_reason();
|
||||
|
||||
// Non-blocking tick called from loop(); kicks reconnect if due.
|
||||
extern "C" void net_guard_tick();
|
||||
|
||||
// Override DHCP-supplied DNS with public resolvers (1.1.1.1, 8.8.8.8).
|
||||
// Idempotent; safe to call repeatedly. net_guard re-applies on every GOT_IP,
|
||||
// but main.cpp must call it once for the boot association (which completes
|
||||
// before net_guard_start() registers its event handler).
|
||||
void net_guard_pin_dns();
|
||||
|
||||
// Diagnostic: print current DNS table state from both lwIP and esp_netif.
|
||||
void net_guard_dump_dns(const char* tag);
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user