Skip to content

Commit 8a9e890

Browse files
authored
Merge pull request #393 from ruvnet/fix/esp32-node-id-clobber
fix(firmware): defensive node_id capture prevents runtime clobber (#390)
2 parents 6e015c4 + 425f0e6 commit 8a9e890

File tree

7 files changed

+58
-10
lines changed

7 files changed

+58
-10
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010
### Fixed
1111
- **`provision.py` esptool v5 compat** (#391) — Stale `write_flash` (underscore) syntax in the dry-run manual-flash hint now uses `write-flash` (hyphenated) for esptool >= 5.x. The primary flash command was already correct.
1212
- **`provision.py` silent NVS wipe** (#391) — The script replaces the entire `csi_cfg` NVS namespace on every run, so partial invocations were silently erasing WiFi credentials and causing `Retrying WiFi connection (10/10)` in the field. Now refuses to run without `--ssid`, `--password`, and `--target-ip` unless `--force-partial` is passed. `--force-partial` prints a warning listing which keys will be wiped.
13+
- **Firmware: defensive `node_id` capture** (#232, #375, #385, #386, #390) — Users on multi-node deployments reported `node_id` reverting to the Kconfig default (`1`) in UDP frames and in the `csi_collector` init log, despite NVS loading the correct value. The root cause (memory corruption of `g_nvs_config`) has not been definitively isolated, but the UDP frame header is now tamper-proof: `csi_collector_init()` captures `g_nvs_config.node_id` into a module-local `s_node_id` once, and `csi_serialize_frame()` plus all other consumers (`edge_processing.c`, `wasm_runtime.c`, `display_ui.c`, `swarm_bridge_init`) read it via the new `csi_collector_get_node_id()` accessor. A canary logs `WARN` if `g_nvs_config.node_id` diverges from `s_node_id` at end-of-init, helping isolate the upstream corruption path. Validated on attached ESP32-S3 (COM8): NVS `node_id=2` propagates through boot log, capture log, init log, and byte[4] of every UDP frame.
1314

1415
### Docs
1516
- **CHANGELOG catch-up** (#367) — Added missing entries for v0.5.5, v0.6.0, and v0.7.0 releases.

firmware/esp32-csi-node/main/csi_collector.c

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,14 @@
2525
/* ADR-060: Access the global NVS config for MAC filter and channel override. */
2626
extern nvs_config_t g_nvs_config;
2727

28+
/* Defensive fix (#232, #375, #385, #386, #390): capture node_id at init-time
29+
* into a module-local static. Using the global g_nvs_config.node_id directly
30+
* at every callback is vulnerable to any memory corruption that clobbers the
31+
* struct (which users have reported reverting node_id to the Kconfig default
32+
* of 1). The local copy is set once at csi_collector_init() and then used
33+
* exclusively by csi_serialize_frame(). */
34+
static uint8_t s_node_id = 1;
35+
2836
/* ADR-057: Build-time guard — fail early if CSI is not enabled in sdkconfig.
2937
* Without this, the firmware compiles but crashes at runtime with:
3038
* "E (xxxx) wifi:CSI not enabled in menuconfig!"
@@ -117,8 +125,9 @@ size_t csi_serialize_frame(const wifi_csi_info_t *info, uint8_t *buf, size_t buf
117125
uint32_t magic = CSI_MAGIC;
118126
memcpy(&buf[0], &magic, 4);
119127

120-
/* Node ID (from NVS runtime config, not compile-time Kconfig) */
121-
buf[4] = g_nvs_config.node_id;
128+
/* Node ID (captured at init into s_node_id to survive memory corruption
129+
* that could clobber g_nvs_config.node_id - see #232/#375/#385/#390). */
130+
buf[4] = s_node_id;
122131

123132
/* Number of antennas */
124133
buf[5] = n_antennas;
@@ -215,6 +224,13 @@ static void wifi_promiscuous_cb(void *buf, wifi_promiscuous_pkt_type_t type)
215224

216225
void csi_collector_init(void)
217226
{
227+
/* Capture node_id into module-local static at init time. After this point
228+
* csi_serialize_frame() uses s_node_id exclusively, isolating the UDP
229+
* frame node_id field from any memory corruption of g_nvs_config. */
230+
s_node_id = g_nvs_config.node_id;
231+
ESP_LOGI(TAG, "Captured node_id=%u at init (defensive copy for #232/#375/#385/#390)",
232+
(unsigned)s_node_id);
233+
218234
/* ADR-060: Determine the CSI channel.
219235
* Priority: 1) NVS override (--channel), 2) connected AP channel, 3) Kconfig default. */
220236
uint8_t csi_channel = (uint8_t)CONFIG_CSI_WIFI_CHANNEL;
@@ -272,8 +288,24 @@ void csi_collector_init(void)
272288
g_nvs_config.filter_mac[4], g_nvs_config.filter_mac[5]);
273289
}
274290

275-
ESP_LOGI(TAG, "CSI collection initialized (node_id=%d, channel=%u)",
276-
g_nvs_config.node_id, (unsigned)csi_channel);
291+
ESP_LOGI(TAG, "CSI collection initialized (node_id=%u, channel=%u)",
292+
(unsigned)s_node_id, (unsigned)csi_channel);
293+
294+
/* Clobber-detection canary: if g_nvs_config.node_id no longer matches the
295+
* value we captured, something corrupted the struct between nvs_config_load
296+
* and here. This is the historic #232/#375 symptom. */
297+
if (g_nvs_config.node_id != s_node_id) {
298+
ESP_LOGW(TAG, "node_id clobber detected: captured=%u but g_nvs_config=%u "
299+
"(frames will use captured value %u). Please report to #390.",
300+
(unsigned)s_node_id, (unsigned)g_nvs_config.node_id,
301+
(unsigned)s_node_id);
302+
}
303+
}
304+
305+
/* Accessor for other modules that need the authoritative runtime node_id. */
306+
uint8_t csi_collector_get_node_id(void)
307+
{
308+
return s_node_id;
277309
}
278310

279311
/* ---- ADR-029: Channel hopping ---- */

firmware/esp32-csi-node/main/csi_collector.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,18 @@
2929
*/
3030
void csi_collector_init(void);
3131

32+
/**
33+
* Get the runtime node_id captured at csi_collector_init().
34+
*
35+
* This is a defensive copy of g_nvs_config.node_id taken at init time. Other
36+
* modules (edge_processing, wasm_runtime, display_ui) should prefer this
37+
* accessor over reading g_nvs_config.node_id directly, because the global
38+
* struct can be clobbered by memory corruption (see #232, #375, #385, #390).
39+
*
40+
* @return Node ID (0-255) as loaded from NVS or Kconfig default at boot.
41+
*/
42+
uint8_t csi_collector_get_node_id(void);
43+
3244
/**
3345
* Serialize CSI data into ADR-018 binary frame format.
3446
*

firmware/esp32-csi-node/main/display_ui.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "display_ui.h"
1010
#include "nvs_config.h"
11+
#include "csi_collector.h" /* csi_collector_get_node_id() - defensive #390 */
1112
#include "sdkconfig.h"
1213

1314
extern nvs_config_t g_nvs_config;
@@ -350,7 +351,7 @@ void display_ui_update(void)
350351
{
351352
char buf[48];
352353

353-
snprintf(buf, sizeof(buf), "Node: %d", g_nvs_config.node_id);
354+
snprintf(buf, sizeof(buf), "Node: %u", (unsigned)csi_collector_get_node_id());
354355
lv_label_set_text(s_sys_node, buf);
355356

356357
snprintf(buf, sizeof(buf), "Heap: %lu KB free",

firmware/esp32-csi-node/main/edge_processing.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include "edge_processing.h"
2121
#include "nvs_config.h"
22+
#include "csi_collector.h" /* csi_collector_get_node_id() - defensive #390 */
2223
#include "mmwave_sensor.h"
2324

2425
/* Runtime config — declared in main.c, loaded from NVS at boot. */
@@ -441,7 +442,7 @@ static void send_compressed_frame(const uint8_t *iq_data, uint16_t iq_len,
441442
uint32_t magic = EDGE_COMPRESSED_MAGIC;
442443
memcpy(&pkt[0], &magic, 4);
443444

444-
pkt[4] = g_nvs_config.node_id;
445+
pkt[4] = csi_collector_get_node_id(); /* #390: defensive copy */
445446
pkt[5] = channel;
446447
memcpy(&pkt[6], &iq_len, 2);
447448
memcpy(&pkt[8], &comp_len, 2);
@@ -557,7 +558,7 @@ static void send_vitals_packet(void)
557558
memset(&pkt, 0, sizeof(pkt));
558559

559560
pkt.magic = EDGE_VITALS_MAGIC;
560-
pkt.node_id = g_nvs_config.node_id;
561+
pkt.node_id = csi_collector_get_node_id(); /* #390: defensive copy */
561562

562563
pkt.flags = 0;
563564
if (s_presence_detected) pkt.flags |= 0x01;
@@ -647,7 +648,7 @@ static void send_feature_vector(void)
647648
memset(&pkt, 0, sizeof(pkt));
648649

649650
pkt.magic = EDGE_FEATURE_MAGIC;
650-
pkt.node_id = g_nvs_config.node_id;
651+
pkt.node_id = csi_collector_get_node_id(); /* #390: defensive copy */
651652
pkt.reserved = 0;
652653
pkt.seq = s_feature_seq++;
653654
pkt.timestamp_us = esp_timer_get_time();

firmware/esp32-csi-node/main/main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ void app_main(void)
267267
strncpy(swarm_cfg.seed_url, g_nvs_config.seed_url, sizeof(swarm_cfg.seed_url) - 1);
268268
strncpy(swarm_cfg.seed_token, g_nvs_config.seed_token, sizeof(swarm_cfg.seed_token) - 1);
269269
strncpy(swarm_cfg.zone_name, g_nvs_config.zone_name, sizeof(swarm_cfg.zone_name) - 1);
270-
swarm_ret = swarm_bridge_init(&swarm_cfg, g_nvs_config.node_id);
270+
swarm_ret = swarm_bridge_init(&swarm_cfg, csi_collector_get_node_id());
271271
if (swarm_ret != ESP_OK) {
272272
ESP_LOGW(TAG, "Swarm bridge init failed: %s", esp_err_to_name(swarm_ret));
273273
}

firmware/esp32-csi-node/main/wasm_runtime.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "sdkconfig.h"
1414
#include "wasm_runtime.h"
1515
#include "nvs_config.h"
16+
#include "csi_collector.h" /* csi_collector_get_node_id() - defensive #390 */
1617

1718
extern nvs_config_t g_nvs_config;
1819

@@ -383,7 +384,7 @@ static void send_wasm_output(uint8_t slot_id)
383384
memset(&pkt, 0, sizeof(pkt));
384385

385386
pkt.magic = WASM_OUTPUT_MAGIC;
386-
pkt.node_id = g_nvs_config.node_id;
387+
pkt.node_id = csi_collector_get_node_id(); /* #390: defensive copy */
387388
pkt.module_id = slot_id;
388389
pkt.event_count = n_filtered;
389390

0 commit comments

Comments
 (0)