fix(firmware): upgrade NimBLE to 2.x + DNS fallback for unreliable resolvers

NimBLE-Arduino 1.4.2 had an init/fire race in its FreeRTOS callout porting
layer where os_callout_timer_cb dispatched a queued TimerHandle expiry
against a not-yet-initialized event (NULL fn pointer), causing PC=0
InstrFetchProhibited within ~1s of boot when the camera task starved the
timer service. Confirmed by ets_printf instrumentation. Upgrading to
^2.0.0 rewrites the porting layer and eliminates the race; verified clean
on the customer network for 1+ hour.

Also rolls in DNS-resilience work that surfaced the BLE crash during
provisioning: pin lwIP/esp-netif resolvers to 1.1.1.1/8.8.8.8 across DHCP
renewals, add three-tier resolver fallback in reporter with a hardcoded
IP of last resort, and switch to raw WiFiClient with manual Host header
to bypass HTTPClient's brittle DNS path.

Migration touches for NimBLE 2.x:
- NimBLEAdvertisedDeviceCallbacks -> NimBLEScanCallbacks
- onResult signature now takes const NimBLEAdvertisedDevice*
- setAdvertisedDeviceCallbacks -> setScanCallbacks
- start(0, nullptr, false) -> start(0, false, false)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-01 11:34:17 -07:00
parent 461ed7d888
commit a585a56cff
9 changed files with 1434 additions and 31 deletions

View File

@@ -9,8 +9,66 @@ uint32_t net_guard_next_backoff_ms(uint32_t attempt) {
#ifdef ARDUINO
#include "config.h"
#include <WiFi.h>
#include <Arduino.h>
#include <lwip/dns.h>
#include <esp_netif.h>
#include "event_log.h"
// Both lwIP's ip_addr_t and esp-netif's esp_ip_addr_t alias the same on-disk
// layout for IPv4, but the C++ types differ. Take the raw u32 to sidestep it.
static String fmt_v4(uint32_t addr_be) {
if (addr_be == 0) return String("0.0.0.0");
char b[16];
snprintf(b, sizeof(b), "%u.%u.%u.%u",
(unsigned)((addr_be >> 0) & 0xFF),
(unsigned)((addr_be >> 8) & 0xFF),
(unsigned)((addr_be >> 16) & 0xFF),
(unsigned)((addr_be >> 24) & 0xFF));
return String(b);
}
void net_guard_dump_dns(const char* tag) {
const ip_addr_t* d0 = dns_getserver(0);
const ip_addr_t* d1 = dns_getserver(1);
Serial.printf("[DNS] %s lwip: %s , %s\n", tag,
fmt_v4(d0 ? ip_2_ip4(d0)->addr : 0).c_str(),
fmt_v4(d1 ? ip_2_ip4(d1)->addr : 0).c_str());
esp_netif_t* sta = esp_netif_get_handle_from_ifkey("WIFI_STA_DEF");
if (sta) {
esp_netif_dns_info_t main_dns{}, backup_dns{};
esp_netif_get_dns_info(sta, ESP_NETIF_DNS_MAIN, &main_dns);
esp_netif_get_dns_info(sta, ESP_NETIF_DNS_BACKUP, &backup_dns);
Serial.printf("[DNS] %s netif: %s , %s\n", tag,
fmt_v4(main_dns.ip.u_addr.ip4.addr).c_str(),
fmt_v4(backup_dns.ip.u_addr.ip4.addr).c_str());
} else {
Serial.printf("[DNS] %s netif: <no STA handle>\n", tag);
}
}
void net_guard_pin_dns() {
ip_addr_t d1, d2;
IP_ADDR4(&d1, 1, 1, 1, 1);
IP_ADDR4(&d2, 8, 8, 8, 8);
dns_setserver(0, &d1);
dns_setserver(1, &d2);
// Also push through the esp_netif layer. dns_setserver() writes the
// global lwIP table directly; esp_netif_set_dns_info() is what the
// DHCP client itself calls, so writing here prevents the next DHCP
// event from silently overwriting our pin.
esp_netif_t* sta = esp_netif_get_handle_from_ifkey("WIFI_STA_DEF");
if (sta) {
esp_netif_dns_info_t info{};
IP_ADDR4(&info.ip, 1, 1, 1, 1);
esp_netif_set_dns_info(sta, ESP_NETIF_DNS_MAIN, &info);
IP_ADDR4(&info.ip, 8, 8, 8, 8);
esp_netif_set_dns_info(sta, ESP_NETIF_DNS_BACKUP, &info);
}
net_guard_dump_dns("pinned");
}
// Shared with the WiFi event task. 32-bit aligned loads/stores are atomic on
// Xtensa; volatile suffices. Tick re-evaluates every loop iteration, so stale
// reads self-correct within ~200ms.
@@ -23,6 +81,11 @@ static volatile uint32_t s_next_retry_ms = 0;
static void on_wifi_event(WiFiEvent_t event, WiFiEventInfo_t info) {
switch (event) {
case ARDUINO_EVENT_WIFI_STA_GOT_IP:
// Override DHCP-supplied DNS. Some routers return TC=1 for short
// answers (forcing TCP fallback that lwIP can't follow), or hand
// out an unreachable resolver. Pin to public resolvers so
// hostByName() never depends on the local network's DNS quality.
net_guard_pin_dns();
s_up = true;
s_attempts = 0;
s_next_retry_ms = 0;

View File

@@ -21,4 +21,13 @@ uint8_t net_guard_last_disconnect_reason();
// Non-blocking tick called from loop(); kicks reconnect if due.
extern "C" void net_guard_tick();
// Override DHCP-supplied DNS with public resolvers (1.1.1.1, 8.8.8.8).
// Idempotent; safe to call repeatedly. net_guard re-applies on every GOT_IP,
// but main.cpp must call it once for the boot association (which completes
// before net_guard_start() registers its event handler).
void net_guard_pin_dns();
// Diagnostic: print current DNS table state from both lwIP and esp_netif.
void net_guard_dump_dns(const char* tag);
#endif