Files
DoorCounter/firmware/src/main.cpp
Peter Woolery a585a56cff fix(firmware): upgrade NimBLE to 2.x + DNS fallback for unreliable resolvers
NimBLE-Arduino 1.4.2 had an init/fire race in its FreeRTOS callout porting
layer where os_callout_timer_cb dispatched a queued TimerHandle expiry
against a not-yet-initialized event (NULL fn pointer), causing PC=0
InstrFetchProhibited within ~1s of boot when the camera task starved the
timer service. Confirmed by ets_printf instrumentation. Upgrading to
^2.0.0 rewrites the porting layer and eliminates the race; verified clean
on the customer network for 1+ hour.

Also rolls in DNS-resilience work that surfaced the BLE crash during
provisioning: pin lwIP/esp-netif resolvers to 1.1.1.1/8.8.8.8 across DHCP
renewals, add three-tier resolver fallback in reporter with a hardcoded
IP of last resort, and switch to raw WiFiClient with manual Host header
to bypass HTTPClient's brittle DNS path.

Migration touches for NimBLE 2.x:
- NimBLEAdvertisedDeviceCallbacks -> NimBLEScanCallbacks
- onResult signature now takes const NimBLEAdvertisedDevice*
- setAdvertisedDeviceCallbacks -> setScanCallbacks
- start(0, nullptr, false) -> start(0, false, false)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 11:34:17 -07:00

289 lines
9.6 KiB
C++

// firmware/src/main.cpp
#include <Arduino.h>
#include <WiFi.h>
#include <ArduinoOTA.h>
#include "config.h"
#include "provisioning.h"
#include "camera.h"
#include "cv.h"
#include "ble_scanner.h"
#include "reporter.h"
#include "event_log.h"
#include "net_guard.h"
#include <esp_system.h>
#include <esp_task_wdt.h>
// LED on GPIO2 (TimerCamera-F built-in LED) — verify against board schematic
// Factory reset: hold GPIO37 (BOOT button) for 5 seconds
#define LED_PIN 2
#define BUTTON_PIN 37
#define FACTORY_RESET_HOLD_MS 5000
// BLE scanning disabled in production until the NimBLE-Arduino 1.4.2 timer
// race is resolved. Symptom: FreeRTOS timer task dispatches an
// os_callout_timer_cb whose callback fn is NULL, causing PC=0 fetch and
// Historical note: NimBLE-Arduino 1.4.2 had an init/fire race in its FreeRTOS
// callout porting layer that caused a NULL-fn dispatch (PC=0,
// InstrFetchProhibited) within ~1s of boot when the camera task starved the
// timer service. Fixed by upgrading to 2.x (see platformio.ini).
#define BLE_SCANNING_ENABLED 1
#define CAM_FPS 5
#define CAM_INTERVAL_MS (1000 / CAM_FPS)
#define REPORT_INTERVAL_S 3600
#define BOOT_REPORT_DELAY_S 60 // first report fires 60s after NTP sync
static DeviceConfig g_cfg;
static CVState g_cv;
static SemaphoreHandle_t s_cv_mutex = nullptr;
static void led_set(bool on) { digitalWrite(LED_PIN, on ? HIGH : LOW); }
// Non-blocking-ish detection blink. Saves and restores the current LED state
// so it doesn't clobber upload/no-wifi indicators. Total duration: ~60ms per
// pulse + 80ms gap between pulses.
static void led_blink_pattern(int pulses) {
bool prev = digitalRead(LED_PIN);
for (int i = 0; i < pulses; i++) {
led_set(true);
vTaskDelay(pdMS_TO_TICKS(60));
led_set(false);
if (i < pulses - 1) vTaskDelay(pdMS_TO_TICKS(80));
}
led_set(prev);
}
static void check_factory_reset() {
if (digitalRead(BUTTON_PIN) != LOW) return;
uint32_t held = millis();
while (digitalRead(BUTTON_PIN) == LOW) {
if (millis() - held >= FACTORY_RESET_HOLD_MS) {
event_log_write(EVT_REBOOT, REBOOT_FACTORY_RESET, 0);
config_clear_wifi();
ESP.restart();
}
delay(50);
esp_task_wdt_reset();
}
}
// Camera + CV task — runs on core 1 at 5 fps
static void task_camera(void*) {
static uint8_t frame[CV_PIXELS]; // static: avoids 9KB on task stack
int last_logged_track_id = 0; // diagnostic: log each new track once
esp_task_wdt_add(nullptr);
while (true) {
if (camera_capture_96(frame)) {
if (xSemaphoreTake(s_cv_mutex, pdMS_TO_TICKS(100)) == pdTRUE) {
CVResult r = cv_process(g_cv, frame, g_cfg.line_offset);
(void)last_logged_track_id;
if (r.entries_delta) Serial.printf("[CV] entry +%d (total %d) first=%.1f min=%.1f max=%.1f last=%.1f dur=%d\n",
r.entries_delta, g_cv.entries,
r.fire_first_c, r.fire_min_c, r.fire_max_c, r.fire_last_c, r.fire_duration);
if (r.exits_delta) Serial.printf("[CV] exit +%d (total %d) first=%.1f min=%.1f max=%.1f last=%.1f dur=%d\n",
r.exits_delta, g_cv.exits,
r.fire_first_c, r.fire_min_c, r.fire_max_c, r.fire_last_c, r.fire_duration);
xSemaphoreGive(s_cv_mutex);
if (r.entries_delta) led_blink_pattern(1);
if (r.exits_delta) led_blink_pattern(2);
}
}
vTaskDelay(pdMS_TO_TICKS(CAM_INTERVAL_MS));
esp_task_wdt_reset();
}
}
// Hourly reporter task — runs on core 0
static void task_reporter(void*) {
uint32_t last_report_ts = 0; // 0 = not initialized yet
esp_task_wdt_add(nullptr);
while (true) {
vTaskDelay(pdMS_TO_TICKS(10000)); // check every 10s
esp_task_wdt_reset();
uint32_t now = (uint32_t)(time(nullptr));
if (now < 1700000000UL) continue; // NTP not synced
// First valid timestamp — schedule boot report 60s from now
if (last_report_ts == 0) {
event_log_write(EVT_NTP_SYNC, (uint16_t)(millis() / 1000), 0);
last_report_ts = now - (REPORT_INTERVAL_S - BOOT_REPORT_DELAY_S);
continue;
}
if ((now - last_report_ts) < REPORT_INTERVAL_S) continue;
uint32_t period_start = last_report_ts;
uint32_t period_end = now;
last_report_ts = now;
// Deinit BLE to free ~25KB heap for SSL handshakes
#if BLE_SCANNING_ENABLED
ble_scanner_deinit();
#endif
led_set(true); // on = uploading
CameraHourlyRecord cam_rec;
if (xSemaphoreTake(s_cv_mutex, pdMS_TO_TICKS(500)) == pdTRUE) {
cam_rec = {period_start, period_end, g_cv.entries, g_cv.exits};
cv_reset_counts(g_cv);
xSemaphoreGive(s_cv_mutex);
} else {
// Failed to acquire — skip this cycle, will report next hour
#if BLE_SCANNING_ENABLED
ble_scanner_reinit();
#endif
led_set(false);
continue;
}
#if !BLE_SCANNING_ENABLED
BLEHourlyRecord ble_rec = {period_start, period_end, 0, 0};
#else
BLEHourlyRecord ble_rec = ble_scanner_collect(period_start, period_end);
#endif
reporter_submit_camera(g_cfg, cam_rec);
reporter_submit_ble(g_cfg, ble_rec);
bool hb_ok = reporter_heartbeat(g_cfg, millis() / 1000, WiFi.RSSI());
#if BLE_SCANNING_ENABLED
ble_scanner_reinit();
#endif
led_set(false);
static uint8_t consecutive_misses = 0;
if (hb_ok) {
consecutive_misses = 0;
} else {
consecutive_misses++;
event_log_write(EVT_HEARTBEAT_MISS, consecutive_misses, 0);
Serial.printf("[WDG] heartbeat miss %u/6\n", consecutive_misses);
if (consecutive_misses >= 6) {
event_log_write(EVT_REBOOT, REBOOT_HEARTBEAT_MISS, 0);
delay(200); // let NVS commit before reboot
ESP.restart();
}
}
}
}
void setup() {
Serial.begin(115200);
pinMode(LED_PIN, OUTPUT);
pinMode(BUTTON_PIN, INPUT_PULLUP);
led_set(true); // on = booting
event_log_init();
event_log_write(EVT_BOOT, (uint16_t)esp_reset_reason(), 0);
if (!config_load(g_cfg)) {
Serial.println("FATAL: device_id/location_id/hmac_secret not provisioned");
event_log_write(EVT_REBOOT, REBOOT_FATAL_CONFIG, 0);
// Blink fast for 3s so a physically-present operator can see it,
// then reboot so EVT_BOOT history on the next heartbeat surfaces
// the failure — though in this case the device can't heartbeat
// without config, so the real signal is the fast-blink-then-reboot
// cycle visible on the LED.
uint32_t t0 = millis();
while (millis() - t0 < 3000) { led_set(!digitalRead(LED_PIN)); delay(100); }
ESP.restart();
}
// Connect to WiFi
if (!config_has_wifi()) {
provisioning_run();
event_log_write(EVT_REBOOT, REBOOT_WIFI_REPROV, 0);
ESP.restart();
}
WiFi.begin(g_cfg.wifi_ssid.c_str(), g_cfg.wifi_pass.c_str());
uint32_t wifi_start = millis();
while (WiFi.status() != WL_CONNECTED && millis() - wifi_start < 15000) {
check_factory_reset();
delay(200);
}
if (WiFi.status() != WL_CONNECTED) {
// Saved creds failed — re-provision
provisioning_run();
event_log_write(EVT_REBOOT, REBOOT_WIFI_REPROV, 0);
ESP.restart();
}
// Boot connect happens before net_guard registers its WiFi event handler,
// so the GOT_IP-driven DNS override there won't fire for this association.
// Pin DNS now; net_guard re-applies it on every subsequent reconnect.
net_guard_pin_dns();
net_guard_start(g_cfg);
led_set(false); // off = connected
// NTP sync (UTC)
configTime(0, 0, "pool.ntp.org", "time.nist.gov");
cv_init(g_cv);
if (!camera_init()) {
Serial.println("FATAL: camera init failed");
event_log_write(EVT_REBOOT, REBOOT_FATAL_CAMERA, 0);
uint32_t t0 = millis();
while (millis() - t0 < 3000) { led_set(!digitalRead(LED_PIN)); delay(100); }
ESP.restart();
}
reporter_init();
#if BLE_SCANNING_ENABLED
ble_scanner_start();
#endif
// OTA update support
ArduinoOTA.setHostname(g_cfg.device_id.c_str());
#if !BLE_SCANNING_ENABLED
ArduinoOTA.onStart([]() { });
#else
ArduinoOTA.onStart([]() { ble_scanner_pause(); });
#endif
ArduinoOTA.onEnd([]() {
#if BLE_SCANNING_ENABLED
ble_scanner_resume();
#endif
event_log_write(EVT_REBOOT, REBOOT_OTA, 0);
ESP.restart();
});
#if !BLE_SCANNING_ENABLED
ArduinoOTA.onError([](ota_error_t e) { });
#else
ArduinoOTA.onError([](ota_error_t e) { ble_scanner_resume(); });
#endif
ArduinoOTA.begin();
s_cv_mutex = xSemaphoreCreateMutex();
// Task watchdog: 30s timeout, panic on trigger so we reboot and log
// via esp_reset_reason() in EVT_BOOT on the next boot.
esp_task_wdt_init(30, /*panic=*/true);
esp_task_wdt_add(nullptr); // subscribe the Arduino loopTask
xTaskCreatePinnedToCore(task_camera, "cam", 8192, nullptr, 2, nullptr, 1);
xTaskCreatePinnedToCore(task_reporter, "rep", 8192, nullptr, 1, nullptr, 0);
}
void loop() {
esp_task_wdt_reset();
ArduinoOTA.handle();
check_factory_reset();
net_guard_tick();
static bool s_was_up = true;
bool up = net_guard_is_up();
if (up != s_was_up) {
led_set(!up); // LED on when NOT up
if (up) reporter_flush(g_cfg);
s_was_up = up;
}
delay(200);
}