fix(firmware): upgrade NimBLE to 2.x + DNS fallback for unreliable resolvers
NimBLE-Arduino 1.4.2 had an init/fire race in its FreeRTOS callout porting layer where os_callout_timer_cb dispatched a queued TimerHandle expiry against a not-yet-initialized event (NULL fn pointer), causing PC=0 InstrFetchProhibited within ~1s of boot when the camera task starved the timer service. Confirmed by ets_printf instrumentation. Upgrading to ^2.0.0 rewrites the porting layer and eliminates the race; verified clean on the customer network for 1+ hour. Also rolls in DNS-resilience work that surfaced the BLE crash during provisioning: pin lwIP/esp-netif resolvers to 1.1.1.1/8.8.8.8 across DHCP renewals, add three-tier resolver fallback in reporter with a hardcoded IP of last resort, and switch to raw WiFiClient with manual Host header to bypass HTTPClient's brittle DNS path. Migration touches for NimBLE 2.x: - NimBLEAdvertisedDeviceCallbacks -> NimBLEScanCallbacks - onResult signature now takes const NimBLEAdvertisedDevice* - setAdvertisedDeviceCallbacks -> setScanCallbacks - start(0, nullptr, false) -> start(0, false, false) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -19,6 +19,15 @@
|
||||
#define BUTTON_PIN 37
|
||||
#define FACTORY_RESET_HOLD_MS 5000
|
||||
|
||||
// BLE scanning disabled in production until the NimBLE-Arduino 1.4.2 timer
|
||||
// race is resolved. Symptom: FreeRTOS timer task dispatches an
|
||||
// os_callout_timer_cb whose callback fn is NULL, causing PC=0 fetch and
|
||||
// Historical note: NimBLE-Arduino 1.4.2 had an init/fire race in its FreeRTOS
|
||||
// callout porting layer that caused a NULL-fn dispatch (PC=0,
|
||||
// InstrFetchProhibited) within ~1s of boot when the camera task starved the
|
||||
// timer service. Fixed by upgrading to 2.x (see platformio.ini).
|
||||
#define BLE_SCANNING_ENABLED 1
|
||||
|
||||
#define CAM_FPS 5
|
||||
#define CAM_INTERVAL_MS (1000 / CAM_FPS)
|
||||
#define REPORT_INTERVAL_S 3600
|
||||
@@ -67,16 +76,7 @@ static void task_camera(void*) {
|
||||
if (camera_capture_96(frame)) {
|
||||
if (xSemaphoreTake(s_cv_mutex, pdMS_TO_TICKS(100)) == pdTRUE) {
|
||||
CVResult r = cv_process(g_cv, frame, g_cfg.line_offset);
|
||||
for (const auto& t : g_cv.tracks) {
|
||||
if (t.id > last_logged_track_id) {
|
||||
last_logged_track_id = t.id;
|
||||
Serial.printf("[CV] spawn id=%d y=%.1f\n", t.id, t.spawn_y);
|
||||
}
|
||||
}
|
||||
if (r.fg_count > 0) {
|
||||
Serial.printf("[F] n=%d y=%d..%d c=%.1f\n",
|
||||
r.fg_count, r.fg_min_y, r.fg_max_y, r.fg_centroid_y);
|
||||
}
|
||||
(void)last_logged_track_id;
|
||||
if (r.entries_delta) Serial.printf("[CV] entry +%d (total %d) first=%.1f min=%.1f max=%.1f last=%.1f dur=%d\n",
|
||||
r.entries_delta, g_cv.entries,
|
||||
r.fire_first_c, r.fire_min_c, r.fire_max_c, r.fire_last_c, r.fire_duration);
|
||||
@@ -119,7 +119,9 @@ static void task_reporter(void*) {
|
||||
last_report_ts = now;
|
||||
|
||||
// Deinit BLE to free ~25KB heap for SSL handshakes
|
||||
#if BLE_SCANNING_ENABLED
|
||||
ble_scanner_deinit();
|
||||
#endif
|
||||
led_set(true); // on = uploading
|
||||
|
||||
CameraHourlyRecord cam_rec;
|
||||
@@ -129,18 +131,26 @@ static void task_reporter(void*) {
|
||||
xSemaphoreGive(s_cv_mutex);
|
||||
} else {
|
||||
// Failed to acquire — skip this cycle, will report next hour
|
||||
#if BLE_SCANNING_ENABLED
|
||||
ble_scanner_reinit();
|
||||
#endif
|
||||
led_set(false);
|
||||
continue;
|
||||
}
|
||||
|
||||
#if !BLE_SCANNING_ENABLED
|
||||
BLEHourlyRecord ble_rec = {period_start, period_end, 0, 0};
|
||||
#else
|
||||
BLEHourlyRecord ble_rec = ble_scanner_collect(period_start, period_end);
|
||||
#endif
|
||||
|
||||
reporter_submit_camera(g_cfg, cam_rec);
|
||||
reporter_submit_ble(g_cfg, ble_rec);
|
||||
bool hb_ok = reporter_heartbeat(g_cfg, millis() / 1000, WiFi.RSSI());
|
||||
|
||||
#if BLE_SCANNING_ENABLED
|
||||
ble_scanner_reinit();
|
||||
#endif
|
||||
led_set(false);
|
||||
|
||||
static uint8_t consecutive_misses = 0;
|
||||
@@ -202,6 +212,11 @@ void setup() {
|
||||
ESP.restart();
|
||||
}
|
||||
|
||||
// Boot connect happens before net_guard registers its WiFi event handler,
|
||||
// so the GOT_IP-driven DNS override there won't fire for this association.
|
||||
// Pin DNS now; net_guard re-applies it on every subsequent reconnect.
|
||||
net_guard_pin_dns();
|
||||
|
||||
net_guard_start(g_cfg);
|
||||
led_set(false); // off = connected
|
||||
|
||||
@@ -220,17 +235,29 @@ void setup() {
|
||||
|
||||
reporter_init();
|
||||
|
||||
#if BLE_SCANNING_ENABLED
|
||||
ble_scanner_start();
|
||||
#endif
|
||||
|
||||
// OTA update support
|
||||
ArduinoOTA.setHostname(g_cfg.device_id.c_str());
|
||||
#if !BLE_SCANNING_ENABLED
|
||||
ArduinoOTA.onStart([]() { });
|
||||
#else
|
||||
ArduinoOTA.onStart([]() { ble_scanner_pause(); });
|
||||
#endif
|
||||
ArduinoOTA.onEnd([]() {
|
||||
#if BLE_SCANNING_ENABLED
|
||||
ble_scanner_resume();
|
||||
#endif
|
||||
event_log_write(EVT_REBOOT, REBOOT_OTA, 0);
|
||||
ESP.restart();
|
||||
});
|
||||
#if !BLE_SCANNING_ENABLED
|
||||
ArduinoOTA.onError([](ota_error_t e) { });
|
||||
#else
|
||||
ArduinoOTA.onError([](ota_error_t e) { ble_scanner_resume(); });
|
||||
#endif
|
||||
ArduinoOTA.begin();
|
||||
|
||||
s_cv_mutex = xSemaphoreCreateMutex();
|
||||
|
||||
Reference in New Issue
Block a user