fix(firmware): upgrade NimBLE to 2.x + DNS fallback for unreliable resolvers

NimBLE-Arduino 1.4.2 had an init/fire race in its FreeRTOS callout porting
layer where os_callout_timer_cb dispatched a queued TimerHandle expiry
against a not-yet-initialized event (NULL fn pointer), causing PC=0
InstrFetchProhibited within ~1s of boot when the camera task starved the
timer service. Confirmed by ets_printf instrumentation. Upgrading to
^2.0.0 rewrites the porting layer and eliminates the race; verified clean
on the customer network for 1+ hour.

Also rolls in DNS-resilience work that surfaced the BLE crash during
provisioning: pin lwIP/esp-netif resolvers to 1.1.1.1/8.8.8.8 across DHCP
renewals, add three-tier resolver fallback in reporter with a hardcoded
IP of last resort, and switch to raw WiFiClient with manual Host header
to bypass HTTPClient's brittle DNS path.

Migration touches for NimBLE 2.x:
- NimBLEAdvertisedDeviceCallbacks -> NimBLEScanCallbacks
- onResult signature now takes const NimBLEAdvertisedDevice*
- setAdvertisedDeviceCallbacks -> setScanCallbacks
- start(0, nullptr, false) -> start(0, false, false)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-01 11:34:17 -07:00
parent 461ed7d888
commit a585a56cff
9 changed files with 1434 additions and 31 deletions

View File

@@ -19,6 +19,15 @@
#define BUTTON_PIN 37
#define FACTORY_RESET_HOLD_MS 5000
// BLE scanning disabled in production until the NimBLE-Arduino 1.4.2 timer
// race is resolved. Symptom: FreeRTOS timer task dispatches an
// os_callout_timer_cb whose callback fn is NULL, causing PC=0 fetch and
// Historical note: NimBLE-Arduino 1.4.2 had an init/fire race in its FreeRTOS
// callout porting layer that caused a NULL-fn dispatch (PC=0,
// InstrFetchProhibited) within ~1s of boot when the camera task starved the
// timer service. Fixed by upgrading to 2.x (see platformio.ini).
#define BLE_SCANNING_ENABLED 1
#define CAM_FPS 5
#define CAM_INTERVAL_MS (1000 / CAM_FPS)
#define REPORT_INTERVAL_S 3600
@@ -67,16 +76,7 @@ static void task_camera(void*) {
if (camera_capture_96(frame)) {
if (xSemaphoreTake(s_cv_mutex, pdMS_TO_TICKS(100)) == pdTRUE) {
CVResult r = cv_process(g_cv, frame, g_cfg.line_offset);
for (const auto& t : g_cv.tracks) {
if (t.id > last_logged_track_id) {
last_logged_track_id = t.id;
Serial.printf("[CV] spawn id=%d y=%.1f\n", t.id, t.spawn_y);
}
}
if (r.fg_count > 0) {
Serial.printf("[F] n=%d y=%d..%d c=%.1f\n",
r.fg_count, r.fg_min_y, r.fg_max_y, r.fg_centroid_y);
}
(void)last_logged_track_id;
if (r.entries_delta) Serial.printf("[CV] entry +%d (total %d) first=%.1f min=%.1f max=%.1f last=%.1f dur=%d\n",
r.entries_delta, g_cv.entries,
r.fire_first_c, r.fire_min_c, r.fire_max_c, r.fire_last_c, r.fire_duration);
@@ -119,7 +119,9 @@ static void task_reporter(void*) {
last_report_ts = now;
// Deinit BLE to free ~25KB heap for SSL handshakes
#if BLE_SCANNING_ENABLED
ble_scanner_deinit();
#endif
led_set(true); // on = uploading
CameraHourlyRecord cam_rec;
@@ -129,18 +131,26 @@ static void task_reporter(void*) {
xSemaphoreGive(s_cv_mutex);
} else {
// Failed to acquire — skip this cycle, will report next hour
#if BLE_SCANNING_ENABLED
ble_scanner_reinit();
#endif
led_set(false);
continue;
}
#if !BLE_SCANNING_ENABLED
BLEHourlyRecord ble_rec = {period_start, period_end, 0, 0};
#else
BLEHourlyRecord ble_rec = ble_scanner_collect(period_start, period_end);
#endif
reporter_submit_camera(g_cfg, cam_rec);
reporter_submit_ble(g_cfg, ble_rec);
bool hb_ok = reporter_heartbeat(g_cfg, millis() / 1000, WiFi.RSSI());
#if BLE_SCANNING_ENABLED
ble_scanner_reinit();
#endif
led_set(false);
static uint8_t consecutive_misses = 0;
@@ -202,6 +212,11 @@ void setup() {
ESP.restart();
}
// Boot connect happens before net_guard registers its WiFi event handler,
// so the GOT_IP-driven DNS override there won't fire for this association.
// Pin DNS now; net_guard re-applies it on every subsequent reconnect.
net_guard_pin_dns();
net_guard_start(g_cfg);
led_set(false); // off = connected
@@ -220,17 +235,29 @@ void setup() {
reporter_init();
#if BLE_SCANNING_ENABLED
ble_scanner_start();
#endif
// OTA update support
ArduinoOTA.setHostname(g_cfg.device_id.c_str());
#if !BLE_SCANNING_ENABLED
ArduinoOTA.onStart([]() { });
#else
ArduinoOTA.onStart([]() { ble_scanner_pause(); });
#endif
ArduinoOTA.onEnd([]() {
#if BLE_SCANNING_ENABLED
ble_scanner_resume();
#endif
event_log_write(EVT_REBOOT, REBOOT_OTA, 0);
ESP.restart();
});
#if !BLE_SCANNING_ENABLED
ArduinoOTA.onError([](ota_error_t e) { });
#else
ArduinoOTA.onError([](ota_error_t e) { ble_scanner_resume(); });
#endif
ArduinoOTA.begin();
s_cv_mutex = xSemaphoreCreateMutex();