From 135a35a2af30470c5484855cf8953d65da8ff45b Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Wed, 20 May 2026 17:42:21 +0500 Subject: [PATCH 1/3] Make heap apply retry loop configurable via spock.read_retry_count spock_apply_heap_update() and spock_apply_heap_delete() hardcoded the retry count at 5 when the local tuple could not be found. Expose this as a GUC (default 5, range 0..100, PGC_SIGHUP) so deployments hitting out-of-order arrivals can tune the loop without rebuilding. --- include/spock.h | 1 + src/spock.c | 21 ++++ src/spock_apply_heap.c | 5 +- tests/tap/t/030_read_retry_count_guc.pl | 130 ++++++++++++++++++++++++ 4 files changed, 155 insertions(+), 2 deletions(-) create mode 100644 tests/tap/t/030_read_retry_count_guc.pl diff --git a/include/spock.h b/include/spock.h index d2d0d705..98997341 100644 --- a/include/spock.h +++ b/include/spock.h @@ -50,6 +50,7 @@ extern int restart_delay_default; extern int restart_delay_on_exception; extern int spock_replay_queue_size; extern int spock_pause_timeout; +extern int spock_read_retry_count; extern bool check_all_uc_indexes; extern bool spock_enable_quiet_mode; extern int log_origin_change; diff --git a/src/spock.c b/src/spock.c index dc981768..fa1c28d1 100644 --- a/src/spock.c +++ b/src/spock.c @@ -151,6 +151,7 @@ int restart_delay_default; int restart_delay_on_exception; int spock_replay_queue_size; int spock_pause_timeout = 10; /* seconds to wait for apply workers to pause */ +int spock_read_retry_count = 5; /* heap update/delete: retries when local tuple is missing */ bool check_all_uc_indexes = false; bool spock_enable_quiet_mode = false; int log_origin_change = SPOCK_ORIGIN_NONE; @@ -1195,6 +1196,26 @@ _PG_init(void) NULL, NULL); + DefineCustomIntVariable("spock.read_retry_count", + "Number of times the apply worker re-reads the local " + "relation when a row targeted by a remote UPDATE or " + "DELETE is not yet visible", + "On each retry the apply worker waits for any " + "concurrently-applying transaction to finish, then " + "searches the local relation again. Set to 0 to disable " + "retries (the row-missing path runs immediately). " + "Used in spock_apply_heap_update and " + "spock_apply_heap_delete.", + &spock_read_retry_count, + 5, + 0, + 100, + PGC_SIGHUP, + 0, + NULL, + NULL, + NULL); + DefineCustomIntVariable("spock.exception_replay_queue_size", "Maximum in-memory size for the apply replay queue", "When the replay queue exceeds this size, subsequent " diff --git a/src/spock_apply_heap.c b/src/spock_apply_heap.c index 6eb833b7..93c77c0c 100644 --- a/src/spock_apply_heap.c +++ b/src/spock_apply_heap.c @@ -79,6 +79,7 @@ #include "spock_apply_heap.h" #include "spock_apply.h" #include "spock_exception_handler.h" +#include "spock.h" typedef struct ApplyExecutionData { @@ -969,7 +970,7 @@ spock_apply_heap_update(SpockRelation *rel, SpockTupleData *oldtup, idxused = edata->targetRel->idxoid; retry = 0; - while (retry < 5) + while (retry < spock_read_retry_count) { found = FindReplTupleInLocalRel(edata, relinfo->ri_RelationDesc, edata->targetRel->idxoid, @@ -1086,7 +1087,7 @@ spock_apply_heap_delete(SpockRelation *rel, SpockTupleData *oldtup) relinfo = edata->targetRelInfo; retry = 0; - while (retry < 5) + while (retry < spock_read_retry_count) { found = FindReplTupleInLocalRel(edata, relinfo->ri_RelationDesc, edata->targetRel->idxoid, diff --git a/tests/tap/t/030_read_retry_count_guc.pl b/tests/tap/t/030_read_retry_count_guc.pl new file mode 100644 index 00000000..5e0123a2 --- /dev/null +++ b/tests/tap/t/030_read_retry_count_guc.pl @@ -0,0 +1,130 @@ +use strict; +use warnings; +use Test::More; +use lib '.'; +use lib 't'; +use SpockTest qw( + create_cluster destroy_cluster + get_test_config system_or_bail + psql_or_bail scalar_query +); + +# ============================================================================= +# Test: 030_read_retry_count_guc.pl +# +# Verifies the spock.read_retry_count GUC: +# 1. is registered with the expected default (5) +# 2. is read by the apply path on each iteration via SHOW +# 3. accepts ALTER SYSTEM SET + pg_reload_conf() updates at runtime +# 4. rejects values outside the documented [0, 100] range +# +# The GUC controls the retry loop in spock_apply_heap_update() and +# spock_apply_heap_delete() — the apply worker re-reads the local +# relation up to spock.read_retry_count times when a row targeted by a +# remote UPDATE/DELETE is not yet visible locally. +# ============================================================================= + +create_cluster(2, 'Create 2-node Spock cluster for read_retry_count GUC'); + +my $config = get_test_config(); +my $host = $config->{host}; +my $primary_port = $config->{node_ports}->[0]; + +# ----------------------------------------------------------------------------- +# 1. Default value is 5 +# ----------------------------------------------------------------------------- +my $default = scalar_query(1, "SHOW spock.read_retry_count"); +$default =~ s/\s+//g; +is($default, '5', + "spock.read_retry_count default is 5 (matches prior hardcoded behaviour)"); + +# ----------------------------------------------------------------------------- +# 2. The GUC is reported in pg_settings with the expected metadata +# ----------------------------------------------------------------------------- +my $context = scalar_query(1, + "SELECT context FROM pg_settings WHERE name = 'spock.read_retry_count'"); +$context =~ s/\s+//g; +is($context, 'sighup', + "spock.read_retry_count GUC context is PGC_SIGHUP (settable via reload)"); + +my $unit = scalar_query(1, + "SELECT coalesce(unit::text, '') FROM pg_settings WHERE name = 'spock.read_retry_count'"); +$unit =~ s/\s+//g; +is($unit, '', + "spock.read_retry_count is unit-less (raw retry count, not a time/size)"); + +my $min = scalar_query(1, + "SELECT min_val FROM pg_settings WHERE name = 'spock.read_retry_count'"); +$min =~ s/\s+//g; +is($min, '0', "spock.read_retry_count min_val is 0"); + +my $max = scalar_query(1, + "SELECT max_val FROM pg_settings WHERE name = 'spock.read_retry_count'"); +$max =~ s/\s+//g; +is($max, '100', "spock.read_retry_count max_val is 100"); + +# ----------------------------------------------------------------------------- +# 3. ALTER SYSTEM SET + pg_reload_conf() takes effect at runtime +# ----------------------------------------------------------------------------- +psql_or_bail(1, "ALTER SYSTEM SET spock.read_retry_count = 10"); +psql_or_bail(1, "SELECT pg_reload_conf()"); + +# Open a fresh psql session (the SIGHUP needs a new backend to pick up the +# value from the postmaster). scalar_query opens a new connection each call. +sleep(1); +my $after_set = scalar_query(1, "SHOW spock.read_retry_count"); +$after_set =~ s/\s+//g; +is($after_set, '10', + "spock.read_retry_count picks up new value (10) after ALTER SYSTEM + reload"); + +# Reset to default +psql_or_bail(1, "ALTER SYSTEM RESET spock.read_retry_count"); +psql_or_bail(1, "SELECT pg_reload_conf()"); +sleep(1); +my $after_reset = scalar_query(1, "SHOW spock.read_retry_count"); +$after_reset =~ s/\s+//g; +is($after_reset, '5', + "spock.read_retry_count returns to default (5) after ALTER SYSTEM RESET"); + +# ----------------------------------------------------------------------------- +# 4. Out-of-range values are rejected +# ----------------------------------------------------------------------------- +# Use system() so we can check the exit code without psql_or_bail dying. +my $pg_bin = $config->{pg_bin}; +my $dbname = $config->{db_name}; +my $db_user = $config->{db_user}; + +my $rc_neg = system( + "$pg_bin/psql -X -h $host -p $primary_port -d $dbname -U $db_user " + . "-v ON_ERROR_STOP=1 " + . "-c \"ALTER SYSTEM SET spock.read_retry_count = -1\" " + . ">/dev/null 2>&1"); +isnt($rc_neg, 0, "spock.read_retry_count rejects value below 0 (-1)"); + +my $rc_hi = system( + "$pg_bin/psql -X -h $host -p $primary_port -d $dbname -U $db_user " + . "-v ON_ERROR_STOP=1 " + . "-c \"ALTER SYSTEM SET spock.read_retry_count = 101\" " + . ">/dev/null 2>&1"); +isnt($rc_hi, 0, "spock.read_retry_count rejects value above 100 (101)"); + +# Boundary values must succeed +my $rc_zero = system( + "$pg_bin/psql -X -h $host -p $primary_port -d $dbname -U $db_user " + . "-v ON_ERROR_STOP=1 " + . "-c \"ALTER SYSTEM SET spock.read_retry_count = 0\" " + . ">/dev/null 2>&1"); +is($rc_zero, 0, "spock.read_retry_count accepts the lower boundary (0)"); + +my $rc_max = system( + "$pg_bin/psql -X -h $host -p $primary_port -d $dbname -U $db_user " + . "-v ON_ERROR_STOP=1 " + . "-c \"ALTER SYSTEM SET spock.read_retry_count = 100\" " + . ">/dev/null 2>&1"); +is($rc_max, 0, "spock.read_retry_count accepts the upper boundary (100)"); + +# Cleanup so the destroy_cluster restart leaves no residue +psql_or_bail(1, "ALTER SYSTEM RESET spock.read_retry_count"); + +destroy_cluster('Destroy test cluster'); +done_testing(); From e8b3b2e848a8044e1384d6914e6486e28d6ae935 Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Fri, 22 May 2026 23:44:20 +0500 Subject: [PATCH 2/3] spock.read_retry_count: drop function-name reference from GUC description --- src/spock.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/spock.c b/src/spock.c index fa1c28d1..828c5d13 100644 --- a/src/spock.c +++ b/src/spock.c @@ -1203,9 +1203,7 @@ _PG_init(void) "On each retry the apply worker waits for any " "concurrently-applying transaction to finish, then " "searches the local relation again. Set to 0 to disable " - "retries (the row-missing path runs immediately). " - "Used in spock_apply_heap_update and " - "spock_apply_heap_delete.", + "retries (the row-missing path runs immediately).", &spock_read_retry_count, 5, 0, From 7cc757e014b1fb800b91bac1dc53ccb7a953a327 Mon Sep 17 00:00:00 2001 From: Mason Sharp Date: Mon, 25 May 2026 12:28:17 -0700 Subject: [PATCH 3/3] document spock.read_retry_count GUC Add a configuring.md entry describing the retry behavior, the 0-disables-retries semantics, the 0-100 range, the default of 5, and the SIGHUP reload behavior. --- docs/configuring.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docs/configuring.md b/docs/configuring.md index a2926dab..02afe5b9 100644 --- a/docs/configuring.md +++ b/docs/configuring.md @@ -215,6 +215,26 @@ liveness detection. Default: `300` (5 minutes). spock.apply_idle_timeout = 300 ``` +### `spock.read_retry_count` + +Number of times the apply worker re-reads the local relation when a row +targeted by a remote `UPDATE` or `DELETE` is not yet visible. On each retry +the apply worker waits for any concurrently-applying transaction to finish +and then searches the local relation again. If the row is still missing +after the configured number of retries, the apply worker falls through to +the standard row-missing handling path (conflict resolution and/or the +exception handler, depending on the configured +[`spock.exception_behaviour`](#spock-exception_behaviour)). + +Set to `0` to disable retries entirely (the row-missing path runs +immediately). Valid range is `0` to `100`. Default: `5`. Changes take +effect on `SIGHUP` (for example, `SELECT pg_reload_conf()`); a server +restart is not required. + +``` +spock.read_retry_count = 5 +``` + ### Logical Slot Failover (HA Standby) Spock creates logical replication slots on each provider node. For high