From 75a4a10e14f85e22f79bb009162a285913bae547 Mon Sep 17 00:00:00 2001 From: Andrey Borodin Date: Tue, 26 May 2026 15:00:29 +0500 Subject: [PATCH 1/7] Add TestReplayXversion module for cross-version WAL replay testing Test that WAL generated by the .0 release of a major version replays correctly on the current STABLE binary. This catches backwards- compatibility regressions in WAL replay code, such as the self-deadlock in RecordNewMultiXact introduced by commit 0852643e1c6. The .0 binary is built once and cached. Each run generates fresh WAL (including 2500 multixacts via the savepoint trick to cross an SLRU page boundary) and verifies that a STABLE standby can replay it. --- PGBuild/Modules/TestReplayXversion.pm | 452 ++++++++++++++++++++++++++ 1 file changed, 452 insertions(+) create mode 100644 PGBuild/Modules/TestReplayXversion.pm diff --git a/PGBuild/Modules/TestReplayXversion.pm b/PGBuild/Modules/TestReplayXversion.pm new file mode 100644 index 0000000..9391dd3 --- /dev/null +++ b/PGBuild/Modules/TestReplayXversion.pm @@ -0,0 +1,452 @@ + +# Package Namespace is hardcoded. Modules must live in +# PGBuild::Modules + +=comment + +Copyright (c) 2026, Andrew Dunstan + +See accompanying License file for license details + +=cut + +# Test that WAL generated by the .0 release of a major version replays +# correctly on the current STABLE binary. This catches backwards- +# compatibility regressions in WAL replay code, such as the self-deadlock +# in RecordNewMultiXact introduced by commit 0852643e1c6. +# +# The .0 binary is built once and cached. Each run generates fresh WAL +# (including 2500 multixacts via the savepoint trick to cross an SLRU +# page boundary) and verifies that a STABLE standby can replay it. + +package PGBuild::Modules::TestReplayXversion; + +use PGBuild::Log; +use PGBuild::Options; +use PGBuild::Utils qw(:DEFAULT $tmpdir $steps_completed $devnull); + +use File::Path 'mkpath'; +use File::Basename; + +use strict; +use warnings; + +our ($VERSION); $VERSION = 'REL_20'; + +my $hooks = { + 'need-run' => \&need_run, + 'locale-end' => \&installcheck, +}; + +sub setup +{ + my $class = __PACKAGE__; + + my $buildroot = shift; + my $branch = shift; + my $conf = shift; + my $pgsql = shift; + + return unless $branch =~ /^REL_(\d+)_STABLE$/; + my $major = $1; + return if $major < 14; + + my $animal = $conf->{animal}; + my $replay_root = "$buildroot/replay.$animal"; + mkdir $replay_root unless -d $replay_root; + + my $self = { + buildroot => $buildroot, + pgbranch => $branch, + bfconf => $conf, + pgsql => $pgsql, + major => $major, + dot0_tag => "REL_${major}_0", + replay_root => $replay_root, + }; + bless($self, $class); + + register_module_hooks($self, $hooks); + return; +} + +sub need_run +{ + my $self = shift; + my $need_run_ref = shift; + my $dot0_inst = "$self->{replay_root}/$self->{pgbranch}/inst-dot0"; + $$need_run_ref = 1 unless -d "$dot0_inst/bin"; + return; +} + +sub installcheck +{ + my $self = shift; + my $locale = shift; + return unless $locale eq 'C'; + + return unless step_wanted('replay-xversion-check'); + + local %ENV = %ENV; + + my $tdir = $tmpdir; + $tdir =~ s!\\!/!g; + $ENV{PGHOST} = $tdir; + + my $this_branch = $self->{pgbranch}; + my $dot0_tag = $self->{dot0_tag}; + my $replay_loc = "$self->{replay_root}/$this_branch"; + + mkpath $replay_loc unless -d $replay_loc; + + # Phase 1: build .0 if not already cached + my $dot0_inst = "$replay_loc/inst-dot0"; + unless (-e "$dot0_inst/bin/postgres") + { + print time_str(), "building $dot0_tag for replay check\n" + if $verbose; + + my $ok = build_dot0($self); + + my @saveout; + my $savelog = PGBuild::Log->new('replay-xversion-save'); + foreach my $f (qw(configure build install)) + { + $savelog->add_log("$replay_loc/$f.log") if -s "$replay_loc/$f.log"; + } + push(@saveout, $savelog->log_string); + writelog('replay-xversion-save', \@saveout); + print "======== replay xversion save log ===========\n", @saveout + if ($verbose > 1); + send_result('ReplayXversionSave', 1, \@saveout) unless $ok; + $steps_completed .= " ReplayXversionSave"; + } + + # Phase 2: test replay + print time_str(), + "checking WAL replay from $dot0_tag to $this_branch ...\n" + if $verbose; + + my ($status, @testout) = test_replay($self); + + my $testlog = + PGBuild::Log->new("replay-xversion-$dot0_tag-$this_branch"); + foreach my $lf (glob("$replay_loc/*.log")) + { + $testlog->add_log($lf) if -s $lf; + } + push(@testout, $testlog->log_string); + writelog("replay-xversion-$dot0_tag-$this_branch", \@testout); + print "======== replay xversion test log ===========\n", @testout + if ($verbose > 1); + send_result("ReplayXversion-$dot0_tag-$this_branch", $status, \@testout) + if $status; + $steps_completed .= " ReplayXversion-$dot0_tag-$this_branch"; + return; +} + +sub build_dot0 +{ + my $self = shift; + my $replay_loc = "$self->{replay_root}/$self->{pgbranch}"; + my $dot0_inst = "$replay_loc/inst-dot0"; + my $dot0_src = "$replay_loc/src-dot0"; + my $dot0_tag = $self->{dot0_tag}; + + my $gitrepo = find_git_repo($self); + unless ($gitrepo) + { + print "cannot find git repo for $dot0_tag checkout\n"; + return 0; + } + + # verify the tag exists + system( + qq{git -C "$gitrepo" rev-parse --verify "$dot0_tag^{}" >$devnull 2>&1} + ); + if ($?) + { + print "tag $dot0_tag not found in $gitrepo\n"; + return 0; + } + + # extract source via git archive (no .git overhead) + rmtree($dot0_src) if -d $dot0_src; + mkpath($dot0_src); + system( + qq{git -C "$gitrepo" archive "$dot0_tag" | tar -x -C "$dot0_src"}); + return 0 if $?; + + rmtree($dot0_inst) if -d $dot0_inst; + mkpath($dot0_inst); + + system( + qq{cd "$dot0_src" && ./configure --prefix="$dot0_inst" } + . qq{--enable-cassert --enable-debug } + . qq{>"$replay_loc/configure.log" 2>&1}); + return 0 if $?; + + my $make = $self->{bfconf}->{make} || 'make'; + my $jobs = $self->{bfconf}->{make_jobs} || 1; + + system( + qq{cd "$dot0_src" && $make -j$jobs } + . qq{>"$replay_loc/build.log" 2>&1}); + return 0 if $?; + + system( + qq{cd "$dot0_src" && $make install } + . qq{>"$replay_loc/install.log" 2>&1}); + return 0 if $?; + + rmtree($dot0_src); + return 1; +} + +sub find_git_repo +{ + my $self = shift; + my $mirror = "$self->{buildroot}/pgmirror.git"; + return $mirror if -d $mirror; + + my $srcdir = $from_source + || "$self->{buildroot}/$self->{pgbranch}/pgsql"; + return $srcdir if -d "$srcdir/.git"; + + return undef; +} + +# Generate SQL that creates ~$count multixacts in a single session using +# the savepoint trick: each SAVEPOINT gets a fresh sub-xid, and locking +# the same row with a different mode from the subtransaction forces a new +# MultiXactId. ROLLBACK TO undoes the sub-xid lock so the next iteration +# starts clean. +sub gen_multixact_sql +{ + my $count = shift || 2500; + my @lines; + push @lines, "CREATE TABLE IF NOT EXISTS mx_gen(i int PRIMARY KEY);"; + push @lines, "INSERT INTO mx_gen VALUES (1) ON CONFLICT DO NOTHING;"; + push @lines, "BEGIN;"; + push @lines, "SELECT * FROM mx_gen WHERE i = 1 FOR NO KEY UPDATE;"; + for (1 .. $count) + { + push @lines, + "SAVEPOINT a; SELECT * FROM mx_gen WHERE i = 1 FOR UPDATE; ROLLBACK TO a;"; + } + push @lines, "COMMIT;"; + return join("\n", @lines) . "\n"; +} + +sub test_replay +{ + my $self = shift; + + my $replay_loc = "$self->{replay_root}/$self->{pgbranch}"; + my $dot0_inst = "$replay_loc/inst-dot0"; + my $stable_inst = "$self->{buildroot}/$self->{pgbranch}/inst"; + my $primary_data = "$tmpdir/replay-primary"; + my $standby_data = "$tmpdir/replay-standby"; + + my $dport; + { + no warnings 'once'; + $dport = $main::buildport; + } + my $primary_port = $dport + 200; + my $standby_port = $dport + 201; + + my $tdir = $tmpdir; + $tdir =~ s!\\!/!g; + + # clean up leftovers + rmtree($primary_data) if -d $primary_data; + rmtree($standby_data) if -d $standby_data; + + # remove stale log files from previous runs + unlink(glob("$replay_loc/initdb.log $replay_loc/primary*.log " + . "$replay_loc/standby*.log $replay_loc/basebackup.log " + . "$replay_loc/mx*.log $replay_loc/workload.log")); + + # --- set up .0 environment for the primary --- + $ENV{LD_LIBRARY_PATH} = "$dot0_inst/lib"; + $ENV{DYLD_LIBRARY_PATH} = "$dot0_inst/lib"; + + # initdb + system( + qq{"$dot0_inst/bin/initdb" -A trust -U buildfarm } + . qq{"$primary_data" >"$replay_loc/initdb.log" 2>&1}); + return (1, "initdb with $self->{dot0_tag} failed\n") if $?; + + # configure for streaming replication + open(my $pgconf, '>>', "$primary_data/postgresql.conf") + || return (1, "opening postgresql.conf: $!\n"); + print $pgconf "\n# TestReplayXversion\n"; + print $pgconf "listen_addresses = ''\n"; + print $pgconf "unix_socket_directories = '$tdir'\n"; + print $pgconf "port = $primary_port\n"; + print $pgconf "wal_level = replica\n"; + print $pgconf "max_wal_senders = 2\n"; + print $pgconf "wal_keep_size = '1GB'\n"; + close($pgconf); + + open(my $hba, '>>', "$primary_data/pg_hba.conf") + || return (1, "opening pg_hba.conf: $!\n"); + print $hba "local replication all trust\n"; + close($hba); + + # start .0 primary + system( + qq{"$dot0_inst/bin/pg_ctl" -D "$primary_data" } + . qq{-l "$replay_loc/primary.log" -w start } + . qq{>"$replay_loc/primary-ctl.log" 2>&1}); + if ($?) + { + rmtree($primary_data); + return (1, "primary start failed\n"); + } + + # --- generate workload and take backup between page boundaries --- + # To catch the SimpleLruWriteAll self-deadlock on PG 14-16, the + # backup checkpoint must capture nextMulti between two SLRU page + # boundaries (each page holds 2048 multixact IDs). Then the standby + # replays the second page-boundary crossing without having seen any + # XLOG_MULTIXACT_ZERO_OFF_PAGE records, entering the code path that + # calls SimpleLruWriteAll while already holding the SLRU lock. + + my $psql = qq{"$dot0_inst/bin/psql" -h "$tdir" -p $primary_port -U buildfarm}; + + # basic DML for diverse WAL coverage + my $workload_sql = <<'END_SQL'; +CREATE TABLE replay_dml(id serial PRIMARY KEY, data text); +INSERT INTO replay_dml SELECT g, repeat('x', 100) FROM generate_series(1,5000) g; +UPDATE replay_dml SET data = 'updated' WHERE id % 3 = 0; +DELETE FROM replay_dml WHERE id % 7 = 0; +CREATE INDEX ON replay_dml(data); +VACUUM replay_dml; +END_SQL + + write_and_run_sql($psql, "$tmpdir/workload.sql", $workload_sql, + "$replay_loc/workload.log"); + + # create the table and generate 2500 multixacts (past the first page + # boundary at 2048); the ZERO_OFF_PAGE records for pages 0 and 1 are + # now in the WAL, but will be BEFORE the backup checkpoint + my $mx_pre = gen_multixact_sql(2500); + write_and_run_sql($psql, "$tmpdir/mx_pre.sql", $mx_pre, + "$replay_loc/mx-pre.log"); + + # take base backup - its checkpoint captures nextMulti on page 1 + system( + qq{"$dot0_inst/bin/pg_basebackup" -h "$tdir" -p $primary_port } + . qq{-U buildfarm -D "$standby_data" --write-recovery-conf } + . qq{>"$replay_loc/basebackup.log" 2>&1}); + if ($?) + { + stop_and_clean($dot0_inst, $primary_data); + return (1, "pg_basebackup failed\n"); + } + + # generate 2500 more multixacts AFTER the backup - these cross the + # page 1->2 boundary at multi ~4096 and will be replayed by the standby + my $mx_post = gen_multixact_sql(2500); + write_and_run_sql($psql, "$tmpdir/mx_post.sql", $mx_post, + "$replay_loc/mx-post.log"); + + # flush WAL and record the LSN the standby must reach + system(qq{$psql -c "SELECT pg_switch_wal()" postgres >$devnull 2>&1}); + my $primary_lsn = + `$psql -A -t -c "SELECT pg_current_wal_flush_lsn()" postgres 2>/dev/null`; + chomp $primary_lsn; + $primary_lsn =~ s/\s+//g; + + # --- switch to STABLE environment for the standby --- + $ENV{LD_LIBRARY_PATH} = "$stable_inst/lib"; + $ENV{DYLD_LIBRARY_PATH} = "$stable_inst/lib"; + + # override the standby port (the rest of postgresql.conf comes from + # the base backup and primary_conninfo already points to the primary) + open($pgconf, '>>', "$standby_data/postgresql.conf") + || do + { + stop_and_clean($dot0_inst, $primary_data); + rmtree($standby_data); + return (1, "opening standby postgresql.conf: $!\n"); + }; + print $pgconf "\n# TestReplayXversion standby overrides\n"; + print $pgconf "port = $standby_port\n"; + close($pgconf); + + # start STABLE standby + system( + qq{"$stable_inst/bin/pg_ctl" -D "$standby_data" } + . qq{-l "$replay_loc/standby.log" -w start } + . qq{>"$replay_loc/standby-ctl.log" 2>&1}); + if ($?) + { + stop_and_clean($dot0_inst, $primary_data); + rmtree($standby_data); + return (1, "standby start failed\n"); + } + + # --- wait for the standby to catch up --- + + my $spql = + qq{"$stable_inst/bin/psql" -h "$tdir" -p $standby_port -U buildfarm}; + my $replayed = 0; + + for my $i (1 .. 120) + { + my $ok = + `$spql -A -t -c "SELECT '$primary_lsn'::pg_lsn <= pg_last_wal_replay_lsn()" postgres 2>/dev/null`; + chomp $ok; + if ($ok =~ /^t/) + { + $replayed = 1; + last; + } + + # check if the standby is still alive + system( + qq{"$stable_inst/bin/pg_isready" -h "$tdir" -p $standby_port -d postgres >$devnull 2>&1} + ); + last if $?; + + sleep 1; + } + + # --- cleanup --- + stop_and_clean($stable_inst, $standby_data); + stop_and_clean($dot0_inst, $primary_data); + + if ($replayed) + { + return (0, + "standby replayed WAL from $self->{dot0_tag} successfully\n"); + } + else + { + return (1, + "standby failed to replay WAL from $self->{dot0_tag}\n"); + } +} + +sub write_and_run_sql +{ + my ($psql, $file, $sql, $logfile) = @_; + open(my $fh, '>', $file) || return; + print $fh $sql; + close($fh); + system(qq{$psql -f "$file" postgres >"$logfile" 2>&1}); + return; +} + +sub stop_and_clean +{ + my ($instdir, $datadir) = @_; + system(qq{"$instdir/bin/pg_ctl" -D "$datadir" -m fast -w stop >$devnull 2>&1}); + rmtree($datadir); + return; +} + +1; From 02117103d46ef9aee085bcb03199332704195b76 Mon Sep 17 00:00:00 2001 From: Andrey Borodin Date: Tue, 26 May 2026 16:53:04 +0500 Subject: [PATCH 2/7] Small fix --- PGBuild/Modules/TestReplayXversion.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PGBuild/Modules/TestReplayXversion.pm b/PGBuild/Modules/TestReplayXversion.pm index 9391dd3..02a9352 100644 --- a/PGBuild/Modules/TestReplayXversion.pm +++ b/PGBuild/Modules/TestReplayXversion.pm @@ -211,7 +211,7 @@ sub find_git_repo my $srcdir = $from_source || "$self->{buildroot}/$self->{pgbranch}/pgsql"; - return $srcdir if -d "$srcdir/.git"; + return $srcdir if -e "$srcdir/.git"; return undef; } From dae04dc322953ac4d5cd7b20322a10ec64b7a55f Mon Sep 17 00:00:00 2001 From: Andrey Borodin Date: Tue, 26 May 2026 23:58:34 +0500 Subject: [PATCH 3/7] Add .0 regression tests and timeout watchdog to replay check Run the .0 regression tests via pg_regress against the .0 primary for diverse WAL coverage. The regression test files are preserved in inst-dot0/regress/ during the build phase. A 180-second watchdog kills pg_regress if it hangs. --- PGBuild/Modules/TestReplayXversion.pm | 49 +++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/PGBuild/Modules/TestReplayXversion.pm b/PGBuild/Modules/TestReplayXversion.pm index 02a9352..f5eb409 100644 --- a/PGBuild/Modules/TestReplayXversion.pm +++ b/PGBuild/Modules/TestReplayXversion.pm @@ -27,6 +27,7 @@ use PGBuild::Utils qw(:DEFAULT $tmpdir $steps_completed $devnull); use File::Path 'mkpath'; use File::Basename; +use POSIX ':sys_wait_h'; use strict; use warnings; @@ -199,6 +200,8 @@ sub build_dot0 . qq{>"$replay_loc/install.log" 2>&1}); return 0 if $?; + # keep regression test files for use as WAL workload + rename("$dot0_src/src/test/regress", "$dot0_inst/regress"); rmtree($dot0_src); return 1; } @@ -266,7 +269,8 @@ sub test_replay # remove stale log files from previous runs unlink(glob("$replay_loc/initdb.log $replay_loc/primary*.log " . "$replay_loc/standby*.log $replay_loc/basebackup.log " - . "$replay_loc/mx*.log $replay_loc/workload.log")); + . "$replay_loc/mx*.log $replay_loc/workload.log " + . "$replay_loc/regress.log")); # --- set up .0 environment for the primary --- $ENV{LD_LIBRARY_PATH} = "$dot0_inst/lib"; @@ -288,6 +292,7 @@ sub test_replay print $pgconf "wal_level = replica\n"; print $pgconf "max_wal_senders = 2\n"; print $pgconf "wal_keep_size = '1GB'\n"; + print $pgconf "allow_in_place_tablespaces = on\n"; close($pgconf); open(my $hba, '>>', "$primary_data/pg_hba.conf") @@ -316,7 +321,21 @@ sub test_replay my $psql = qq{"$dot0_inst/bin/psql" -h "$tdir" -p $primary_port -U buildfarm}; - # basic DML for diverse WAL coverage + # run .0 regression tests for diverse WAL coverage + my $regress = "$dot0_inst/regress"; + if (-x "$regress/pg_regress") + { + run_with_timeout( + 180, + qq{"$regress/pg_regress" } + . qq{--inputdir="$regress" --bindir="$dot0_inst/bin" } + . qq{--host="$tdir" --port=$primary_port --user=buildfarm } + . qq{--dbname=regression --max-concurrent-tests=20 } + . qq{--schedule="$regress/parallel_schedule"}, + "$replay_loc/regress.log"); + } + + # basic DML workload for additional WAL coverage my $workload_sql = <<'END_SQL'; CREATE TABLE replay_dml(id serial PRIMARY KEY, data text); INSERT INTO replay_dml SELECT g, repeat('x', 100) FROM generate_series(1,5000) g; @@ -449,4 +468,30 @@ sub stop_and_clean return; } +sub run_with_timeout +{ + my ($seconds, $cmd, $logfile) = @_; + my $pid = fork(); + return unless defined $pid; + if ($pid == 0) + { + setpgrp(0, 0); + open(STDOUT, '>', $logfile) or exit(1); + open(STDERR, '>&STDOUT'); + exec("sh", "-c", $cmd); + exit(1); + } + my $deadline = time() + $seconds; + while (time() < $deadline) + { + return if waitpid($pid, WNOHANG) > 0; + sleep(1); + } + kill('TERM', -$pid); + sleep(2); + kill('KILL', -$pid); + waitpid($pid, 0); + return; +} + 1; From 8648c9fa42e5a6a1ac834e902af8b0e1066718eb Mon Sep 17 00:00:00 2001 From: Andrey Borodin Date: Wed, 27 May 2026 12:00:45 +0500 Subject: [PATCH 4/7] Remove allow_in_place_tablespaces and use immediate shutdown The .0 regression tests run against the .0 server, so there are no feature mismatches and allow_in_place_tablespaces is unnecessary. Use -m immediate for pg_ctl stop so a deadlocked standby does not hang the test indefinitely. --- PGBuild/Modules/TestReplayXversion.pm | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/PGBuild/Modules/TestReplayXversion.pm b/PGBuild/Modules/TestReplayXversion.pm index f5eb409..dd31775 100644 --- a/PGBuild/Modules/TestReplayXversion.pm +++ b/PGBuild/Modules/TestReplayXversion.pm @@ -292,7 +292,6 @@ sub test_replay print $pgconf "wal_level = replica\n"; print $pgconf "max_wal_senders = 2\n"; print $pgconf "wal_keep_size = '1GB'\n"; - print $pgconf "allow_in_place_tablespaces = on\n"; close($pgconf); open(my $hba, '>>', "$primary_data/pg_hba.conf") @@ -463,7 +462,7 @@ sub write_and_run_sql sub stop_and_clean { my ($instdir, $datadir) = @_; - system(qq{"$instdir/bin/pg_ctl" -D "$datadir" -m fast -w stop >$devnull 2>&1}); + system(qq{"$instdir/bin/pg_ctl" -D "$datadir" -m immediate -w stop >$devnull 2>&1}); rmtree($datadir); return; } From 357e75c43547b89401fbb02e634485fcaa603145 Mon Sep 17 00:00:00 2001 From: Andrey Borodin Date: Wed, 27 May 2026 12:00:50 +0500 Subject: [PATCH 5/7] Skip replay check when .0 tag does not exist yet A STABLE branch is created months before the GA release, so the REL_x_0 tag may not exist yet. Silently skip rather than reporting a build failure. --- PGBuild/Modules/TestReplayXversion.pm | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/PGBuild/Modules/TestReplayXversion.pm b/PGBuild/Modules/TestReplayXversion.pm index dd31775..93a7747 100644 --- a/PGBuild/Modules/TestReplayXversion.pm +++ b/PGBuild/Modules/TestReplayXversion.pm @@ -26,7 +26,6 @@ use PGBuild::Options; use PGBuild::Utils qw(:DEFAULT $tmpdir $steps_completed $devnull); use File::Path 'mkpath'; -use File::Basename; use POSIX ':sys_wait_h'; use strict; @@ -60,8 +59,6 @@ sub setup buildroot => $buildroot, pgbranch => $branch, bfconf => $conf, - pgsql => $pgsql, - major => $major, dot0_tag => "REL_${major}_0", replay_root => $replay_root, }; @@ -109,6 +106,9 @@ sub installcheck my $ok = build_dot0($self); + # tag not yet released - silently skip + return if $ok < 0; + my @saveout; my $savelog = PGBuild::Log->new('replay-xversion-save'); foreach my $f (qw(configure build install)) @@ -161,14 +161,14 @@ sub build_dot0 return 0; } - # verify the tag exists + # verify the tag exists; return -1 (skip) if not yet released system( qq{git -C "$gitrepo" rev-parse --verify "$dot0_tag^{}" >$devnull 2>&1} ); if ($?) { - print "tag $dot0_tag not found in $gitrepo\n"; - return 0; + print "tag $dot0_tag not found, skipping replay check\n"; + return -1; } # extract source via git archive (no .git overhead) From 30d3ba8149fe9d38ff62427d9ccc91e11ecd3e57 Mon Sep 17 00:00:00 2001 From: Andrey Borodin Date: Wed, 27 May 2026 12:38:25 +0500 Subject: [PATCH 6/7] Fix standby crash and improve cleanup reliability - Set allow_in_place_tablespaces on the STABLE standby so it can replay in-place tablespace WAL from .0 regression tests. - Add -t 10 timeout and SIGKILL fallback to stop_and_clean so a deadlocked standby does not hang the test indefinitely. - Reduce --max-concurrent-tests to 4 to keep checkpoint time short. - Remove unused fields (major, pgsql) and File::Basename import. --- PGBuild/Modules/TestReplayXversion.pm | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/PGBuild/Modules/TestReplayXversion.pm b/PGBuild/Modules/TestReplayXversion.pm index 93a7747..1005dca 100644 --- a/PGBuild/Modules/TestReplayXversion.pm +++ b/PGBuild/Modules/TestReplayXversion.pm @@ -329,7 +329,7 @@ sub test_replay qq{"$regress/pg_regress" } . qq{--inputdir="$regress" --bindir="$dot0_inst/bin" } . qq{--host="$tdir" --port=$primary_port --user=buildfarm } - . qq{--dbname=regression --max-concurrent-tests=20 } + . qq{--dbname=regression --max-concurrent-tests=4 } . qq{--schedule="$regress/parallel_schedule"}, "$replay_loc/regress.log"); } @@ -393,6 +393,7 @@ END_SQL }; print $pgconf "\n# TestReplayXversion standby overrides\n"; print $pgconf "port = $standby_port\n"; + print $pgconf "allow_in_place_tablespaces = on\n"; close($pgconf); # start STABLE standby @@ -462,7 +463,18 @@ sub write_and_run_sql sub stop_and_clean { my ($instdir, $datadir) = @_; - system(qq{"$instdir/bin/pg_ctl" -D "$datadir" -m immediate -w stop >$devnull 2>&1}); + system(qq{"$instdir/bin/pg_ctl" -D "$datadir" -m immediate -w -t 10 stop >$devnull 2>&1}); + # if postgres is still running (e.g. deadlocked), kill it + my $pidfile = "$datadir/postmaster.pid"; + if (-f $pidfile) + { + open(my $fh, '<', $pidfile); + my $pid = <$fh>; + close($fh); + chomp $pid; + kill('KILL', $pid) if $pid && kill(0, $pid); + sleep(1); + } rmtree($datadir); return; } From bb65c23b3c478363c344df498fd7d4962d665c9f Mon Sep 17 00:00:00 2001 From: Andrey Borodin Date: Thu, 28 May 2026 23:35:11 +0500 Subject: [PATCH 7/7] Advertise replay_xversion in PG_TEST_EXTRA When the module runs, add replay_xversion to PG_TEST_EXTRA so that future TAP tests in src/test/recovery/ can be gated on this token. Harmless until PostgreSQL adds such tests. --- PGBuild/Modules/TestReplayXversion.pm | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/PGBuild/Modules/TestReplayXversion.pm b/PGBuild/Modules/TestReplayXversion.pm index 1005dca..9fd6f9c 100644 --- a/PGBuild/Modules/TestReplayXversion.pm +++ b/PGBuild/Modules/TestReplayXversion.pm @@ -87,6 +87,13 @@ sub installcheck local %ENV = %ENV; + # Advertise this module's presence so that future TAP tests in + # src/test/recovery/ can be gated on replay_xversion via + # PG_TEST_EXTRA. Harmless until PostgreSQL adds such tests. + my $extra = $ENV{PG_TEST_EXTRA} // ''; + $ENV{PG_TEST_EXTRA} = "$extra replay_xversion" + unless $extra =~ /\breplay_xversion\b/; + my $tdir = $tmpdir; $tdir =~ s!\\!/!g; $ENV{PGHOST} = $tdir;