feat: event-based walker detector tuned to real 7' overhead mount

Replace per-track line-crossing counter with a single event state machine
gated by foreground pixel count (ENTER=250, EXIT=150) and finalized by
quiet-exit or timeout. Direction inferred from centroid excursion
(up_score vs down_score) on quiet-exit fires, and from net displacement
(last_c vs first_c) on timeout fires.

Tuning reflects bench data at the intended 7' overhead mount: walkers
produce smaller centroid excursions than originally modelled, so
EXTENT gates, MIN_TRAJ, MAX_FRAMES and REFRACTORY were all relaxed from
their initial guesses. Constants and rationale live in firmware/lib/cv/cv.h.

Bench results (8 isolated walks, 4 entries + 4 exits):
  * Event detection: 8/8 (100%)
  * Aggregate entries+exits split: 4+4 (matches)
  * Per-walk direction labelling: 4/8 (~50%)

Document explicitly that per-walk direction is unreliable at this mount
and that downstream analytics should trust only gross traffic
(entries + exits). Recovering direction would require a physical mount
change or a richer signal; both are out of scope for v1.

Tooling:
  * tools/replay_logs.py — replay event state machine against captured
    [F] diagnostic lines, for offline tuning without flash-test loops.
  * firmware/src/main_capture.cpp + tools/capture_frames.py +
    tools/replay_frames.py — raw-frame capture firmware and Python port
    of the detector, kept in tree for future iteration even though the
    TimerCamera-F serial driver stripped specific byte ranges in testing
    and log-based replay became the working path.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-17 16:03:36 -07:00
parent 3b471992f2
commit a37207b6ff
12 changed files with 1203 additions and 340 deletions

View File

@@ -5,8 +5,21 @@
#include <algorithm>
#include <vector>
static void event_reset(CVState& s) {
s.event_active = false;
s.event_start_frame = 0;
s.event_frame_count = 0;
s.event_peak_n = 0;
s.event_first_c = -1.0f;
s.event_last_c = -1.0f;
s.event_min_c = (float)CV_H;
s.event_max_c = -1.0f;
s.event_min_y_seen = CV_H;
s.event_max_y_seen = -1;
s.event_quiet_count = 0;
}
void cv_init(CVState& state) {
// Initialize members directly — avoid CVState{} temporary which puts 9KB on stack
memset(state.background, 0, sizeof(state.background));
state.bg_valid = false;
state.last_motion_frame = 0;
@@ -15,8 +28,8 @@ void cv_init(CVState& state) {
state.tracks.clear();
state.entries = 0;
state.exits = 0;
state.last_entry_frame = 0;
state.last_exit_frame = 0;
state.last_fire_frame = 0;
event_reset(state);
}
void cv_reset_counts(CVState& state) {
@@ -26,9 +39,6 @@ void cv_reset_counts(CVState& state) {
struct Point { int x, y; };
// Note: queue may grow to CV_PIXELS entries (~72KB) on large blobs.
// Requires PSRAM (enabled via -DBOARD_HAS_PSRAM in platformio.ini).
// BFS flood fill. Marks visited pixels (sets fg to 0). Returns {-1,-1} if blob < CV_MIN_BLOB_PX.
static std::pair<float,float> extract_blob(uint8_t* fg, int start_x, int start_y) {
std::vector<Point> queue;
queue.reserve(512);
@@ -60,7 +70,7 @@ static std::pair<float,float> extract_blob(uint8_t* fg, int start_x, int start_y
static std::vector<std::pair<float,float>> find_centroids(const uint8_t* fg) {
std::vector<std::pair<float,float>> result;
static uint8_t fg_copy[CV_PIXELS]; // static to avoid 9KB stack allocation
static uint8_t fg_copy[CV_PIXELS];
memcpy(fg_copy, fg, CV_PIXELS);
for (int y = 0; y < CV_H; y++) {
@@ -82,8 +92,62 @@ static void frame_diff(const uint8_t* frame, const uint8_t* bg,
}
}
CVResult cv_process(CVState& state, const uint8_t* frame, uint8_t line_pct) {
CVResult result = {0, 0};
// Decide whether the just-ended event should fire and in which direction.
// Up-through-frame (centroid excursion from high y toward low y) maps to
// ENTRY per mount convention.
static void finalize_event(CVState& s, CVResult& result) {
if (s.event_frame_count < CV_EVENT_MIN_FRAMES) return;
// Note: no MAX_FRAMES rejection here. An event that runs the full duration
// may still be a valid walker whose fg_count stayed above EXIT_THRESH due
// to a stale bg or an AEC-driven lighting shift. Extent + MIN_TRAJ gates
// below already reject stationary-person / wobble events.
if (s.event_min_y_seen > CV_EVENT_EXTENT_TOP) return;
if (s.event_max_y_seen < CV_EVENT_EXTENT_BOT) return;
// Direction from centroid excursion relative to event start.
// up_score: how far centroid excursed upward (smaller y) from first_c.
// down_score: how far it excursed downward (larger y) from first_c.
float up_score = s.event_first_c - s.event_min_c;
float down_score = s.event_max_c - s.event_first_c;
float winning = (up_score >= down_score) ? up_score : down_score;
if (winning < CV_EVENT_MIN_TRAJ) return;
// Timeout-aware direction. Quiet-exit events (fg fell below EXIT_THRESH)
// have walker fully out of frame → min/max excursion bracket the true
// traversal and up/down scores are reliable. Timeout events (event hit
// MAX_FRAMES while still elevated) captured both an approach and a
// departure within the window, so excursion measures the walker's
// *range in frame* rather than direction — an entry walker who paused
// near the top, then drifted back toward the middle before timeout
// gets (wrongly) called an entry by up-score even though net motion is
// mixed. For those, the net first→last centroid displacement is a
// better direction signal (it's where the walker ended up, not just
// where they peaked).
bool timed_out = (s.event_frame_count > CV_EVENT_MAX_FRAMES);
bool is_entry;
if (timed_out) {
is_entry = (s.event_last_c < s.event_first_c);
} else {
is_entry = (up_score >= down_score);
}
if (is_entry) {
s.entries++;
result.entries_delta++;
} else {
s.exits++;
result.exits_delta++;
}
s.last_fire_frame = s.frame_index;
result.fire_first_c = s.event_first_c;
result.fire_min_c = s.event_min_c;
result.fire_max_c = s.event_max_c;
result.fire_last_c = s.event_last_c;
result.fire_duration = s.event_frame_count;
}
CVResult cv_process(CVState& state, const uint8_t* frame, uint8_t /*line_pct*/) {
CVResult result = {0, 0, 0, -1, -1, -1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0};
state.frame_index++;
if (!state.bg_valid) {
@@ -92,105 +156,147 @@ CVResult cv_process(CVState& state, const uint8_t* frame, uint8_t line_pct) {
return result;
}
static uint8_t fg[CV_PIXELS]; // static: avoids 9KB on task stack
static uint8_t fg[CV_PIXELS];
frame_diff(frame, state.background, fg, CV_PIXELS);
int fg_count = 0;
for (int i = 0; i < CV_PIXELS; i++) fg_count += fg[i];
bool motion = fg_count > CV_MIN_BLOB_PX;
if (!motion) {
if (state.frame_index - state.last_motion_frame > 10) {
memcpy(state.background, frame, CV_PIXELS);
// Running-average background blend: bg = (31*bg + frame)/32. Adapts to
// slow scene drift during idle periods. Frozen during an active event so
// the walker's signature is never absorbed — otherwise bg retains a
// "ghost" of the walker for ~30 frames after they leave, keeping fg_count
// elevated and preventing subsequent walkers from producing a clean
// trajectory.
if (!state.event_active) {
for (int i = 0; i < CV_PIXELS; i++) {
state.background[i] = (uint8_t)(((uint16_t)state.background[i] * 31 + frame[i]) >> 5);
}
}
int fg_count = 0;
int min_y = CV_H, max_y = -1;
long sum_y = 0;
for (int y = 0; y < CV_H; y++) {
const uint8_t* row = &fg[y * CV_W];
int row_count = 0;
for (int x = 0; x < CV_W; x++) row_count += row[x];
if (row_count > 0) {
if (y < min_y) min_y = y;
if (y > max_y) max_y = y;
sum_y += (long)row_count * y;
fg_count += row_count;
}
}
result.fg_count = fg_count;
result.fg_min_y = (fg_count > 0) ? min_y : -1;
result.fg_max_y = (fg_count > 0) ? max_y : -1;
result.fg_centroid_y = (fg_count > 0) ? ((float)sum_y / fg_count) : -1.0f;
// Hard self-heal: if more than half the frame is fg, bg is catastrophically
// wrong. Snap and skip the event machine this frame.
if (fg_count > CV_PIXELS / 2) {
memcpy(state.background, frame, CV_PIXELS);
state.last_motion_frame = state.frame_index;
if (state.event_active) event_reset(state);
return result;
}
// Diagnostic track management (no effect on counting).
bool motion = fg_count > CV_MIN_BLOB_PX;
if (motion) {
state.last_motion_frame = state.frame_index;
auto centroids = find_centroids(fg);
std::vector<bool> centroid_matched(centroids.size(), false);
for (auto& track : state.tracks) {
float best_dist = CV_MAX_MOVE * CV_MAX_MOVE;
int best_idx = -1;
for (int i = 0; i < (int)centroids.size(); i++) {
if (centroid_matched[i]) continue;
float dx = centroids[i].first - track.x;
float dy = centroids[i].second - track.y;
float d2 = dx*dx + dy*dy;
if (d2 < best_dist) { best_dist = d2; best_idx = i; }
}
if (best_idx >= 0) {
centroid_matched[best_idx] = true;
track.x = centroids[best_idx].first;
track.y = centroids[best_idx].second;
track.missed = 0;
} else {
track.missed++;
}
}
state.tracks.erase(
std::remove_if(state.tracks.begin(), state.tracks.end(),
[](const CVTrack& t){ return t.missed > CV_MAX_MISSED; }),
state.tracks.end());
for (int i = 0; i < (int)centroids.size(); i++) {
if (centroid_matched[i]) continue;
CVTrack t;
t.id = state.next_id++;
t.x = centroids[i].first;
t.y = centroids[i].second;
t.spawn_y = t.y;
t.missed = 0;
state.tracks.push_back(t);
}
} else {
for (auto& t : state.tracks) t.missed++;
state.tracks.erase(
std::remove_if(state.tracks.begin(), state.tracks.end(),
[](const CVTrack& t){ return t.missed > CV_MAX_MISSED; }),
state.tracks.end());
return result;
}
state.last_motion_frame = state.frame_index;
// Event state machine. Refractory period after a fire blocks new events
// for CV_EVENT_REFRACTORY_FRAMES frames — absorbs lingering-walker motion
// that would otherwise re-trigger a second count.
bool in_refractory = state.last_fire_frame != 0 &&
(state.frame_index - state.last_fire_frame) < CV_EVENT_REFRACTORY_FRAMES;
auto centroids = find_centroids(fg);
std::vector<bool> centroid_matched(centroids.size(), false);
for (auto& track : state.tracks) {
float best_dist = CV_MAX_MOVE * CV_MAX_MOVE;
int best_idx = -1;
for (int i = 0; i < (int)centroids.size(); i++) {
if (centroid_matched[i]) continue;
float dx = centroids[i].first - track.x;
float dy = centroids[i].second - track.y;
float d2 = dx*dx + dy*dy;
if (d2 < best_dist) { best_dist = d2; best_idx = i; }
if (!state.event_active) {
if (!in_refractory && fg_count >= CV_EVENT_ENTER_THRESH) {
state.event_active = true;
state.event_start_frame = state.frame_index;
state.event_frame_count = 1;
state.event_peak_n = fg_count;
state.event_first_c = result.fg_centroid_y;
state.event_last_c = result.fg_centroid_y;
state.event_min_c = result.fg_centroid_y;
state.event_max_c = result.fg_centroid_y;
state.event_min_y_seen = min_y;
state.event_max_y_seen = max_y;
state.event_quiet_count = 0;
}
if (best_idx >= 0) {
centroid_matched[best_idx] = true;
track.x = centroids[best_idx].first;
track.y = centroids[best_idx].second;
track.missed = 0;
} else {
state.event_frame_count++;
if (fg_count > state.event_peak_n) state.event_peak_n = fg_count;
if (fg_count > 0) {
state.event_last_c = result.fg_centroid_y;
if (result.fg_centroid_y < state.event_min_c) state.event_min_c = result.fg_centroid_y;
if (result.fg_centroid_y > state.event_max_c) state.event_max_c = result.fg_centroid_y;
if (min_y < state.event_min_y_seen) state.event_min_y_seen = min_y;
if (max_y > state.event_max_y_seen) state.event_max_y_seen = max_y;
}
if (fg_count < CV_EVENT_EXIT_THRESH) {
state.event_quiet_count++;
if (state.event_quiet_count >= CV_EVENT_QUIET_FRAMES) {
finalize_event(state, result);
event_reset(state);
memcpy(state.background, frame, CV_PIXELS);
}
} else {
track.missed++;
}
}
state.tracks.erase(
std::remove_if(state.tracks.begin(), state.tracks.end(),
[](const CVTrack& t){ return t.missed > CV_MAX_MISSED; }),
state.tracks.end());
float line_y = (line_pct / 100.0f) * CV_H;
for (int i = 0; i < (int)centroids.size(); i++) {
if (centroid_matched[i]) continue;
CVTrack t;
t.id = state.next_id++;
t.x = centroids[i].first;
t.y = centroids[i].second;
t.spawn_y = t.y;
t.above_line = (t.y < line_y);
t.counted = false;
t.missed = 0;
state.tracks.push_back(t);
}
// Directional crossing check. A track counts at most once, and only if it
// spawned clearly on one side of the line AND is now clearly on the other.
// This rejects blobs that wobble around the line (shadows, body straddling
// the line, track churn at spawn) — only a true traversal fires an event.
for (auto& track : state.tracks) {
if (track.missed > 0) continue; // only check tracks matched this frame
if (track.counted) continue; // one track = one trip
bool spawned_above = track.spawn_y < (line_y - CV_TRAVERSAL_MARGIN_PX);
bool spawned_below = track.spawn_y > (line_y + CV_TRAVERSAL_MARGIN_PX);
bool now_above_firm = track.y < (line_y - CV_TRAVERSAL_MARGIN_PX);
bool now_below_firm = track.y > (line_y + CV_TRAVERSAL_MARGIN_PX);
if (spawned_above && now_below_firm) {
bool in_cooldown = state.last_entry_frame != 0 &&
(state.frame_index - state.last_entry_frame) < CV_CROSSING_COOLDOWN_FRAMES;
if (!in_cooldown) {
state.entries++;
result.entries_delta++;
state.last_entry_frame = state.frame_index;
track.counted = true;
}
} else if (spawned_below && now_above_firm) {
bool in_cooldown = state.last_exit_frame != 0 &&
(state.frame_index - state.last_exit_frame) < CV_CROSSING_COOLDOWN_FRAMES;
if (!in_cooldown) {
state.exits++;
result.exits_delta++;
state.last_exit_frame = state.frame_index;
track.counted = true;
state.event_quiet_count = 0;
if (state.event_frame_count > CV_EVENT_MAX_FRAMES) {
// Timeout end: fg still elevated. Snap bg anyway — in practice
// a stuck-high event means bg is stale (walker has merged
// with stale bg, or AEC shifted). Leaving bg stale permanently
// poisons subsequent events. If a walker truly is mid-frame
// they'll get absorbed into bg, but that's a rare corner
// beaten by the common case of stale bg chaining events.
finalize_event(state, result);
event_reset(state);
memcpy(state.background, frame, CV_PIXELS);
}
}
track.above_line = (track.y < line_y);
}
return result;

View File

@@ -12,24 +12,63 @@ static const int CV_MIN_BLOB_PX = 64;
static const float CV_MAX_MOVE = 15.0f;
static const int CV_MAX_MISSED = 10;
// Directional counting margin: a track only counts if it spawned and is now
// both at least this far from the line (in pixels). Prevents counting blobs
// that wobble around the line or spawn on top of it. Value chosen at ~15% of
// the 96px frame: 14px ≈ the typical torso half-width overhead.
static const float CV_TRAVERSAL_MARGIN_PX = 14.0f;
// Event-based walker detector. Per-frame zone-flip approaches were direction-
// blind at realistic mounts: a walker traversing top-to-bottom and a walker
// traversing bottom-to-top produced identical zone-dominance sequences
// (geometric artifact of asymmetric zones + body spanning the line). The
// event approach buffers a whole walker event, then decides direction from
// the centroid trajectory: sign(first_centroid_y - peak_centroid_y) > 0 means
// the centroid moved upward through the frame during the event.
//
// Per-mount convention: UP through frame == ENTRY into store. Flip the camera
// mount or invert the mapping in cv_process if the physical install differs.
// Per-direction crossing cooldown. Any same-direction crossing whose frame gap
// is strictly less than this value is dropped. At 5 fps, a value of 5 → ≈0.8s
// suppression window. Purpose: mask track churn (blob briefly drops below
// min_blob_px, track dies & respawns, re-crosses).
static const uint32_t CV_CROSSING_COOLDOWN_FRAMES = 5;
// fg_count thresholds that gate event start/end. Tuned against a real
// 8-walk isolated test (see .agent/walk_isolated_8walks.log). Lower than
// initial guesses because the 7' overhead mount produces smaller centroid
// excursions than we originally modelled.
static const int CV_EVENT_ENTER_THRESH = 250;
static const int CV_EVENT_EXIT_THRESH = 150;
// Number of consecutive sub-EXIT frames required to end an event.
static const int CV_EVENT_QUIET_FRAMES = 3;
// Min/max event duration in frames. Below min = too brief to be a walker
// (noise burst). Above max = stationary object or stuck detection.
static const int CV_EVENT_MIN_FRAMES = 5;
// MAX bounds the event duration. Too low (15) cut events off while walker
// was still physically in frame — every fire hit dur=MAX+1 and bg snapped
// with a walker-ghost baked in, corrupting the next walk. Too high (40)
// merged multiple walkers. 25 frames (5s) lets a single walker reach the
// quiet-exit path (fg drops below EXIT_THRESH) before timeout, so bg snaps
// on a clean empty frame.
static const int CV_EVENT_MAX_FRAMES = 25;
// Required vertical extent: during the event, fg must have reached near the
// top of the frame (min_y <= TOP) AND near the bottom (max_y >= BOT). At a
// 7' overhead mount real walkers span fg y≈0..70, not 0..95 — the original
// 10/85 gates rejected most real walks. Relaxed to catch them while still
// filtering small local motion that doesn't span the doorway.
static const int CV_EVENT_EXTENT_TOP = 25;
static const int CV_EVENT_EXTENT_BOT = 50;
// Minimum centroid excursion (max of up_score/down_score) for a valid
// trajectory. At overhead mount walker centroid traverses ~15-40 pixels;
// 15 was too aggressive and dropped clean walks. 5 still filters wobble.
static const float CV_EVENT_MIN_TRAJ = 5.0f;
// Refractory period after a fire. Shorter than originally chosen — at 5 fps
// a second walker can arrive within 2s of the first, especially at busy
// doorways. 10 frames = 2s of back-pressure, tuned to match the gap between
// consecutive isolated walks in the test log.
static const uint32_t CV_EVENT_REFRACTORY_FRAMES = 10;
// Diagnostic only: tracks are kept for spawn logging. Counting does NOT
// depend on tracks.
struct CVTrack {
int id;
float x, y;
float spawn_y; // y at track creation — used for directional counting
bool above_line;
bool counted; // fires at most once per track (one track = one trip)
float spawn_y;
int missed;
};
@@ -42,13 +81,36 @@ struct CVState {
std::vector<CVTrack> tracks;
int entries;
int exits;
uint32_t last_entry_frame; // 0 = never; frame_index of last counted entry
uint32_t last_exit_frame; // 0 = never; frame_index of last counted exit
// Event state machine.
bool event_active;
uint32_t event_start_frame;
int event_frame_count;
int event_peak_n;
float event_first_c;
float event_last_c;
float event_min_c; // min centroid_y observed during event
float event_max_c; // max centroid_y observed during event
int event_min_y_seen;
int event_max_y_seen;
int event_quiet_count;
uint32_t last_fire_frame; // 0 = never; frame of last counted fire
};
struct CVResult {
int entries_delta;
int exits_delta;
// Per-frame foreground diagnostics (populated every call).
int fg_count;
int fg_min_y;
int fg_max_y;
float fg_centroid_y;
// Populated only on a fire frame; zeroed otherwise.
float fire_first_c;
float fire_min_c;
float fire_max_c;
float fire_last_c;
int fire_duration;
};
void cv_init(CVState& state);