Back-patch src/test/recovery and PostgresNode from 9.6 to 9.5.

This omits 007_sync_rep.pl, which tests a feature new in 9.6.  The only
other change is to substitute "hot_standby" for "replica".  A planned
back-patch will use this suite to test its recovery behavior changes.
Identified by Kyotaro Horiguchi, though I did not use his patch.

Discussion: https://postgr.es/m/20200304.162919.898938381201316571.horikyota.ntt@gmail.com
This commit is contained in:
Noah Misch 2020-03-19 09:39:26 -07:00
parent c2c2e531e7
commit 12034da6cc
16 changed files with 2441 additions and 5 deletions

View File

@ -371,12 +371,12 @@ ifeq ($(enable_tap_tests),yes)
define prove_installcheck
rm -rf $(CURDIR)/tmp_check/log
cd $(srcdir) && TESTDIR='$(CURDIR)' PATH="$(bindir):$$PATH" PGPORT='6$(DEF_PGPORT)' top_builddir='$(CURDIR)/$(top_builddir)' PG_REGRESS='$(CURDIR)/$(top_builddir)/src/test/regress/pg_regress' $(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) t/*.pl
cd $(srcdir) && TESTDIR='$(CURDIR)' PATH="$(bindir):$$PATH" PGPORT='6$(DEF_PGPORT)' top_builddir='$(CURDIR)/$(top_builddir)' PG_REGRESS='$(CURDIR)/$(top_builddir)/src/test/regress/pg_regress' REGRESS_SHLIB='$(abs_top_builddir)/src/test/regress/regress$(DLSUFFIX)' $(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) t/*.pl
endef
define prove_check
rm -rf $(CURDIR)/tmp_check/log
cd $(srcdir) && TESTDIR='$(CURDIR)' $(with_temp_install) PGPORT='6$(DEF_PGPORT)' PG_REGRESS='$(CURDIR)/$(top_builddir)/src/test/regress/pg_regress' $(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) t/*.pl
cd $(srcdir) && TESTDIR='$(CURDIR)' $(with_temp_install) PGPORT='6$(DEF_PGPORT)' PG_REGRESS='$(CURDIR)/$(top_builddir)/src/test/regress/pg_regress' REGRESS_SHLIB='$(abs_top_builddir)/src/test/regress/regress$(DLSUFFIX)' $(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) t/*.pl
endef
else

View File

@ -12,7 +12,7 @@ subdir = src/test
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
SUBDIRS = perl regress isolation modules
SUBDIRS = perl regress isolation modules recovery
# We don't build or execute examples/, locale/, or thread/ by default,
# but we do want "make clean" etc to recurse into them. Likewise for ssl/,

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,154 @@
=pod
=head1 NAME
RecursiveCopy - simple recursive copy implementation
=head1 SYNOPSIS
use RecursiveCopy;
RecursiveCopy::copypath($from, $to, filterfn => sub { return 1; });
RecursiveCopy::copypath($from, $to);
=cut
package RecursiveCopy;
use strict;
use warnings;
use File::Basename;
use File::Copy;
=pod
=head1 DESCRIPTION
=head2 copypath($from, $to, %params)
Recursively copy all files and directories from $from to $to.
Does not preserve file metadata (e.g., permissions).
Only regular files and subdirectories are copied. Trying to copy other types
of directory entries raises an exception.
Raises an exception if a file would be overwritten, the source directory can't
be read, or any I/O operation fails. However, we silently ignore ENOENT on
open, because when copying from a live database it's possible for a file/dir
to be deleted after we see its directory entry but before we can open it.
Always returns true.
If the B<filterfn> parameter is given, it must be a subroutine reference.
This subroutine will be called for each entry in the source directory with its
relative path as only parameter; if the subroutine returns true the entry is
copied, otherwise the file is skipped.
On failure the target directory may be in some incomplete state; no cleanup is
attempted.
=head1 EXAMPLES
RecursiveCopy::copypath('/some/path', '/empty/dir',
filterfn => sub {
# omit pg_log and contents
my $src = shift;
return $src ne 'pg_log';
}
);
=cut
sub copypath
{
my ($base_src_dir, $base_dest_dir, %params) = @_;
my $filterfn;
if (defined $params{filterfn})
{
die "if specified, filterfn must be a subroutine reference"
unless defined(ref $params{filterfn})
and (ref $params{filterfn} eq 'CODE');
$filterfn = $params{filterfn};
}
else
{
$filterfn = sub { return 1; };
}
# Complain if original path is bogus, because _copypath_recurse won't.
die "\"$base_src_dir\" does not exist" if !-e $base_src_dir;
# Start recursive copy from current directory
return _copypath_recurse($base_src_dir, $base_dest_dir, "", $filterfn);
}
# Recursive private guts of copypath
sub _copypath_recurse
{
my ($base_src_dir, $base_dest_dir, $curr_path, $filterfn) = @_;
my $srcpath = "$base_src_dir/$curr_path";
my $destpath = "$base_dest_dir/$curr_path";
# invoke the filter and skip all further operation if it returns false
return 1 unless &$filterfn($curr_path);
# Check for symlink -- needed only on source dir
# (note: this will fall through quietly if file is already gone)
die "Cannot operate on symlink \"$srcpath\"" if -l $srcpath;
# Abort if destination path already exists. Should we allow directories
# to exist already?
die "Destination path \"$destpath\" already exists" if -e $destpath;
# If this source path is a file, simply copy it to destination with the
# same name and we're done.
if (-f $srcpath)
{
my $fh;
unless (open($fh, '<', $srcpath))
{
return 1 if ($!{ENOENT});
die "open($srcpath) failed: $!";
}
copy($fh, $destpath)
or die "copy $srcpath -> $destpath failed: $!";
close $fh;
return 1;
}
# If it's a directory, create it on dest and recurse into it.
if (-d $srcpath)
{
my $directory;
unless (opendir($directory, $srcpath))
{
return 1 if ($!{ENOENT});
die "opendir($srcpath) failed: $!";
}
mkdir($destpath) or die "mkdir($destpath) failed: $!";
while (my $entry = readdir($directory))
{
next if ($entry eq '.' or $entry eq '..');
_copypath_recurse($base_src_dir, $base_dest_dir,
$curr_path eq '' ? $entry : "$curr_path/$entry", $filterfn)
or die "copypath $srcpath/$entry -> $destpath/$entry failed";
}
closedir($directory);
return 1;
}
# If it disappeared from sight, that's OK.
return 1 if !-e $srcpath;
# Else it's some weird file type; complain.
die "Source path \"$srcpath\" is not a regular file or directory";
}
1;

View File

@ -15,6 +15,7 @@ our @EXPORT = qw(
psql
slurp_dir
slurp_file
append_to_file
system_or_bail
system_log
run_log
@ -129,6 +130,33 @@ sub tempdir_short
return File::Temp::tempdir(CLEANUP => 1);
}
# Translate a Perl file name to a host file name. Currently, this is a no-op
# except for the case of Perl=msys and host=mingw32. The subject need not
# exist, but its parent directory must exist.
sub perl2host
{
my ($subject) = @_;
return $subject unless $Config{osname} eq 'msys';
my $here = cwd;
my $leaf;
if (chdir $subject)
{
$leaf = '';
}
else
{
$leaf = '/' . basename $subject;
my $parent = dirname $subject;
chdir $parent or die "could not chdir \"$parent\": $!";
}
# this odd way of calling 'pwd -W' is the only way that seems to work.
my $dir = qx{sh -c "pwd -W"};
chomp $dir;
chdir $here;
return $dir . $leaf;
}
# Initialize a new cluster for testing.
#
# The PGHOST environment variable is set to connect to the new cluster.
@ -257,6 +285,15 @@ sub slurp_file
return $contents;
}
sub append_to_file
{
my ($filename, $str) = @_;
open my $fh, ">>", $filename
or die "could not write \"$filename\": $!";
print $fh $str;
close $fh;
}
sub system_or_bail
{
if (system_log(@_) != 0)

2
src/test/recovery/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
# Generated by test suite
/tmp_check/

View File

@ -0,0 +1,20 @@
#-------------------------------------------------------------------------
#
# Makefile for src/test/recovery
#
# Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
# Portions Copyright (c) 1994, Regents of the University of California
#
# src/test/recovery/Makefile
#
#-------------------------------------------------------------------------
subdir = src/test/recovery
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
check:
$(prove_check)
clean distclean maintainer-clean:
rm -rf tmp_check

17
src/test/recovery/README Normal file
View File

@ -0,0 +1,17 @@
src/test/recovery/README
Regression tests for recovery and replication
=============================================
This directory contains a test suite for recovery and replication.
Running the tests
=================
make check
NOTE: This creates a temporary installation, and some tests may
create one or multiple nodes, be they master or standby(s) for the
purpose of the tests.
NOTE: This requires the --enable-tap-tests argument to configure.

View File

@ -0,0 +1,68 @@
# Minimal test testing streaming replication
use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More tests => 4;
# Initialize master node
my $node_master = get_new_node('master');
$node_master->init(allows_streaming => 1);
$node_master->start;
my $backup_name = 'my_backup';
# Take backup
$node_master->backup($backup_name);
# Create streaming standby linking to master
my $node_standby_1 = get_new_node('standby_1');
$node_standby_1->init_from_backup($node_master, $backup_name,
has_streaming => 1);
$node_standby_1->start;
# Take backup of standby 1 (not mandatory, but useful to check if
# pg_basebackup works on a standby).
$node_standby_1->backup($backup_name);
# Take a second backup of the standby while the master is offline.
$node_master->stop;
$node_standby_1->backup('my_backup_2');
$node_master->start;
# Create second standby node linking to standby 1
my $node_standby_2 = get_new_node('standby_2');
$node_standby_2->init_from_backup($node_standby_1, $backup_name,
has_streaming => 1);
$node_standby_2->start;
# Create some content on master and check its presence in standby 1
$node_master->safe_psql('postgres',
"CREATE TABLE tab_int AS SELECT generate_series(1,1002) AS a");
# Wait for standbys to catch up
my $applname_1 = $node_standby_1->name;
my $applname_2 = $node_standby_2->name;
my $caughtup_query =
"SELECT pg_current_xlog_location() <= replay_location FROM pg_stat_replication WHERE application_name = '$applname_1';";
$node_master->poll_query_until('postgres', $caughtup_query)
or die "Timed out while waiting for standby 1 to catch up";
$caughtup_query =
"SELECT pg_last_xlog_replay_location() <= replay_location FROM pg_stat_replication WHERE application_name = '$applname_2';";
$node_standby_1->poll_query_until('postgres', $caughtup_query)
or die "Timed out while waiting for standby 2 to catch up";
my $result =
$node_standby_1->safe_psql('postgres', "SELECT count(*) FROM tab_int");
print "standby 1: $result\n";
is($result, qq(1002), 'check streamed content on standby 1');
$result =
$node_standby_2->safe_psql('postgres', "SELECT count(*) FROM tab_int");
print "standby 2: $result\n";
is($result, qq(1002), 'check streamed content on standby 2');
# Check that only READ-only queries can run on standbys
is($node_standby_1->psql('postgres', 'INSERT INTO tab_int VALUES (1)'),
3, 'read-only queries on standby 1');
is($node_standby_2->psql('postgres', 'INSERT INTO tab_int VALUES (1)'),
3, 'read-only queries on standby 2');

View File

@ -0,0 +1,76 @@
# test for archiving with hot standby
use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More tests => 3;
use File::Copy;
# Initialize master node, doing archives
my $node_master = get_new_node('master');
$node_master->init(
has_archiving => 1,
allows_streaming => 1);
my $backup_name = 'my_backup';
# Start it
$node_master->start;
# Take backup for slave
$node_master->backup($backup_name);
# Initialize standby node from backup, fetching WAL from archives
my $node_standby = get_new_node('standby');
$node_standby->init_from_backup($node_master, $backup_name,
has_restoring => 1);
$node_standby->append_conf(
'postgresql.conf', qq(
wal_retrieve_retry_interval = '100ms'
));
$node_standby->start;
# Create some content on master
$node_master->safe_psql('postgres',
"CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a");
my $current_lsn =
$node_master->safe_psql('postgres', "SELECT pg_current_xlog_location();");
# Force archiving of WAL file to make it present on master
$node_master->safe_psql('postgres', "SELECT pg_switch_xlog()");
# Add some more content, it should not be present on standby
$node_master->safe_psql('postgres',
"INSERT INTO tab_int VALUES (generate_series(1001,2000))");
# Wait until necessary replay has been done on standby
my $caughtup_query =
"SELECT '$current_lsn'::pg_lsn <= pg_last_xlog_replay_location()";
$node_standby->poll_query_until('postgres', $caughtup_query)
or die "Timed out while waiting for standby to catch up";
my $result =
$node_standby->safe_psql('postgres', "SELECT count(*) FROM tab_int");
is($result, qq(1000), 'check content from archives');
# Check the presence of temporary files specifically generated during
# archive recovery. To ensure the presence of the temporary history
# file, switch to a timeline large enough to allow a standby to recover
# a history file from an archive. As this requires at least two timeline
# switches, promote the existing standby first. Then create a second
# standby based on the promoted one. Finally, the second standby is
# promoted.
$node_standby->promote;
my $node_standby2 = get_new_node('standby2');
$node_standby2->init_from_backup($node_master, $backup_name,
has_restoring => 1);
$node_standby2->start;
# Now promote standby2, and check that temporary files specifically
# generated during archive recovery are removed by the end of recovery.
$node_standby2->promote;
my $node_standby2_data = $node_standby2->data_dir;
ok( !-f "$node_standby2_data/pg_wal/RECOVERYHISTORY",
"RECOVERYHISTORY removed after promotion");
ok( !-f "$node_standby2_data/pg_wal/RECOVERYXLOG",
"RECOVERYXLOG removed after promotion");

View File

@ -0,0 +1,127 @@
# Test for recovery targets: name, timestamp, XID
use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More tests => 7;
# Create and test a standby from given backup, with a certain recovery target.
# Choose $until_lsn later than the transaction commit that causes the row
# count to reach $num_rows, yet not later than the recovery target.
sub test_recovery_standby
{
my $test_name = shift;
my $node_name = shift;
my $node_master = shift;
my $recovery_params = shift;
my $num_rows = shift;
my $until_lsn = shift;
my $node_standby = get_new_node($node_name);
$node_standby->init_from_backup($node_master, 'my_backup',
has_restoring => 1);
foreach my $param_item (@$recovery_params)
{
$node_standby->append_conf(
'recovery.conf',
qq($param_item
));
}
$node_standby->start;
# Wait until standby has replayed enough data
my $caughtup_query =
"SELECT '$until_lsn'::pg_lsn <= pg_last_xlog_replay_location()";
$node_standby->poll_query_until('postgres', $caughtup_query)
or die "Timed out while waiting for standby to catch up";
# Create some content on master and check its presence in standby
my $result =
$node_standby->safe_psql('postgres', "SELECT count(*) FROM tab_int");
is($result, qq($num_rows), "check standby content for $test_name");
# Stop standby node
$node_standby->teardown_node;
}
# Initialize master node
my $node_master = get_new_node('master');
$node_master->init(has_archiving => 1, allows_streaming => 1);
# Start it
$node_master->start;
# Create data before taking the backup, aimed at testing
# recovery_target = 'immediate'
$node_master->safe_psql('postgres',
"CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a");
my $lsn1 =
$node_master->safe_psql('postgres', "SELECT pg_current_xlog_location();");
# Take backup from which all operations will be run
$node_master->backup('my_backup');
# Insert some data with used as a replay reference, with a recovery
# target TXID.
$node_master->safe_psql('postgres',
"INSERT INTO tab_int VALUES (generate_series(1001,2000))");
my $ret = $node_master->safe_psql('postgres',
"SELECT pg_current_xlog_location(), txid_current();");
my ($lsn2, $recovery_txid) = split /\|/, $ret;
# More data, with recovery target timestamp
$node_master->safe_psql('postgres',
"INSERT INTO tab_int VALUES (generate_series(2001,3000))");
my $lsn3 =
$node_master->safe_psql('postgres', "SELECT pg_current_xlog_location();");
my $recovery_time = $node_master->safe_psql('postgres', "SELECT now()");
# Even more data, this time with a recovery target name
$node_master->safe_psql('postgres',
"INSERT INTO tab_int VALUES (generate_series(3001,4000))");
my $recovery_name = "my_target";
my $lsn4 =
$node_master->safe_psql('postgres', "SELECT pg_current_xlog_location();");
$node_master->safe_psql('postgres',
"SELECT pg_create_restore_point('$recovery_name');");
# Force archiving of WAL file
$node_master->safe_psql('postgres', "SELECT pg_switch_xlog()");
# Test recovery targets
my @recovery_params = ("recovery_target = 'immediate'");
test_recovery_standby('immediate target',
'standby_1', $node_master, \@recovery_params, "1000", $lsn1);
@recovery_params = ("recovery_target_xid = '$recovery_txid'");
test_recovery_standby('XID', 'standby_2', $node_master, \@recovery_params,
"2000", $lsn2);
@recovery_params = ("recovery_target_time = '$recovery_time'");
test_recovery_standby('time', 'standby_3', $node_master, \@recovery_params,
"3000", $lsn3);
@recovery_params = ("recovery_target_name = '$recovery_name'");
test_recovery_standby('name', 'standby_4', $node_master, \@recovery_params,
"4000", $lsn4);
# Multiple targets
# Last entry has priority (note that an array respects the order of items
# not hashes).
@recovery_params = (
"recovery_target_name = '$recovery_name'",
"recovery_target_xid = '$recovery_txid'",
"recovery_target_time = '$recovery_time'");
test_recovery_standby('name + XID + time',
'standby_5', $node_master, \@recovery_params, "3000", $lsn3);
@recovery_params = (
"recovery_target_time = '$recovery_time'",
"recovery_target_name = '$recovery_name'",
"recovery_target_xid = '$recovery_txid'");
test_recovery_standby('time + name + XID',
'standby_6', $node_master, \@recovery_params, "2000", $lsn2);
@recovery_params = (
"recovery_target_xid = '$recovery_txid'",
"recovery_target_time = '$recovery_time'",
"recovery_target_name = '$recovery_name'");
test_recovery_standby('XID + time + name',
'standby_7', $node_master, \@recovery_params, "4000", $lsn4);

View File

@ -0,0 +1,75 @@
# Test for timeline switch
# Ensure that a cascading standby is able to follow a newly-promoted standby
# on a new timeline.
use strict;
use warnings;
use File::Path qw(rmtree);
use PostgresNode;
use TestLib;
use Test::More tests => 1;
$ENV{PGDATABASE} = 'postgres';
# Initialize master node
my $node_master = get_new_node('master');
$node_master->init(allows_streaming => 1);
$node_master->start;
# Take backup
my $backup_name = 'my_backup';
$node_master->backup($backup_name);
# Create two standbys linking to it
my $node_standby_1 = get_new_node('standby_1');
$node_standby_1->init_from_backup($node_master, $backup_name,
has_streaming => 1);
$node_standby_1->start;
my $node_standby_2 = get_new_node('standby_2');
$node_standby_2->init_from_backup($node_master, $backup_name,
has_streaming => 1);
$node_standby_2->start;
# Create some content on master
$node_master->safe_psql('postgres',
"CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a");
my $until_lsn =
$node_master->safe_psql('postgres', "SELECT pg_current_xlog_location();");
# Wait until standby has replayed enough data on standby 1
my $caughtup_query =
"SELECT '$until_lsn'::pg_lsn <= pg_last_xlog_replay_location()";
$node_standby_1->poll_query_until('postgres', $caughtup_query)
or die "Timed out while waiting for standby to catch up";
# Stop and remove master, and promote standby 1, switching it to a new timeline
$node_master->teardown_node;
$node_standby_1->promote;
# Switch standby 2 to replay from standby 1
rmtree($node_standby_2->data_dir . '/recovery.conf');
my $connstr_1 = $node_standby_1->connstr;
$node_standby_2->append_conf(
'recovery.conf', qq(
primary_conninfo='$connstr_1'
standby_mode=on
recovery_target_timeline='latest'
));
$node_standby_2->restart;
# Insert some data in standby 1 and check its presence in standby 2
# to ensure that the timeline switch has been done. Standby 1 needs
# to exit recovery first before moving on with the test.
$node_standby_1->poll_query_until('postgres',
"SELECT pg_is_in_recovery() <> true");
$node_standby_1->safe_psql('postgres',
"INSERT INTO tab_int VALUES (generate_series(1001,2000))");
$until_lsn = $node_standby_1->safe_psql('postgres',
"SELECT pg_current_xlog_location();");
$caughtup_query =
"SELECT '$until_lsn'::pg_lsn <= pg_last_xlog_replay_location()";
$node_standby_2->poll_query_until('postgres', $caughtup_query)
or die "Timed out while waiting for standby to catch up";
my $result =
$node_standby_2->safe_psql('postgres', "SELECT count(*) FROM tab_int");
is($result, qq(2000), 'check content of standby 2');

View File

@ -0,0 +1,69 @@
# Checks for recovery_min_apply_delay
use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More tests => 1;
# Initialize master node
my $node_master = get_new_node('master');
$node_master->init(allows_streaming => 1);
$node_master->start;
# And some content
$node_master->safe_psql('postgres',
"CREATE TABLE tab_int AS SELECT generate_series(1, 10) AS a");
# Take backup
my $backup_name = 'my_backup';
$node_master->backup($backup_name);
# Create streaming standby from backup
my $node_standby = get_new_node('standby');
my $delay = 3;
$node_standby->init_from_backup($node_master, $backup_name,
has_streaming => 1);
$node_standby->append_conf(
'recovery.conf', qq(
recovery_min_apply_delay = '${delay}s'
));
$node_standby->start;
# Make new content on master and check its presence in standby depending
# on the delay applied above. Before doing the insertion, get the
# current timestamp that will be used as a comparison base. Even on slow
# machines, this allows to have a predictable behavior when comparing the
# delay between data insertion moment on master and replay time on standby.
my $master_insert_time = time();
$node_master->safe_psql('postgres',
"INSERT INTO tab_int VALUES (generate_series(11, 20))");
# Now wait for replay to complete on standby. We're done waiting when the
# slave has replayed up to the previously saved master LSN.
my $until_lsn =
$node_master->safe_psql('postgres', "SELECT pg_current_xlog_location()");
my $remaining = 90;
while ($remaining-- > 0)
{
# Done waiting?
my $replay_status = $node_standby->safe_psql('postgres',
"SELECT (pg_last_xlog_replay_location() - '$until_lsn'::pg_lsn) >= 0"
);
last if $replay_status eq 't';
# No, sleep some more.
my $sleep = $master_insert_time + $delay - time();
$sleep = 1 if $sleep < 1;
sleep $sleep;
}
die "Maximum number of attempts reached ($remaining remain)"
if $remaining < 0;
# This test is successful if and only if the LSN has been applied with at least
# the configured apply delay.
ok(time() - $master_insert_time >= $delay,
"standby applies WAL only after replication delay");

View File

@ -0,0 +1,93 @@
# Test WAL replay of FSM changes.
#
# FSM changes don't normally need to be WAL-logged, except for truncation.
# The FSM mustn't return a page that doesn't exist (anymore).
use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More tests => 1;
my $node_master = get_new_node('master');
$node_master->init(allows_streaming => 1);
$node_master->append_conf('postgresql.conf', qq{
fsync = on
wal_level = hot_standby
wal_log_hints = on
max_prepared_transactions = 5
autovacuum = off
});
# Create a master node and its standby, initializing both with some data
# at the same time.
$node_master->start;
$node_master->backup('master_backup');
my $node_standby = get_new_node('standby');
$node_standby->init_from_backup($node_master, 'master_backup',
has_streaming => 1);
$node_standby->start;
$node_master->psql('postgres', qq{
create table testtab (a int, b char(100));
insert into testtab select generate_series(1,1000), 'foo';
insert into testtab select generate_series(1,1000), 'foo';
delete from testtab where ctid > '(8,0)';
});
# Take a lock on the table to prevent following vacuum from truncating it
$node_master->psql('postgres', qq{
begin;
lock table testtab in row share mode;
prepare transaction 'p1';
});
# Vacuum, update FSM without truncation
$node_master->psql('postgres', 'vacuum verbose testtab');
# Force a checkpoint
$node_master->psql('postgres', 'checkpoint');
# Now do some more insert/deletes, another vacuum to ensure full-page writes
# are done
$node_master->psql('postgres', qq{
insert into testtab select generate_series(1,1000), 'foo';
delete from testtab where ctid > '(8,0)';
vacuum verbose testtab;
});
# Ensure all buffers are now clean on the standby
$node_standby->psql('postgres', 'checkpoint');
# Release the lock, vacuum again which should lead to truncation
$node_master->psql('postgres', qq{
rollback prepared 'p1';
vacuum verbose testtab;
});
$node_master->psql('postgres', 'checkpoint');
my $until_lsn =
$node_master->safe_psql('postgres', "SELECT pg_current_xlog_location();");
# Wait long enough for standby to receive and apply all WAL
my $caughtup_query =
"SELECT '$until_lsn'::pg_lsn <= pg_last_xlog_replay_location()";
$node_standby->poll_query_until('postgres', $caughtup_query)
or die "Timed out while waiting for standby to catch up";
# Promote the standby
$node_standby->promote;
$node_standby->poll_query_until('postgres',
"SELECT NOT pg_is_in_recovery()")
or die "Timed out while waiting for promotion of standby";
$node_standby->psql('postgres', 'checkpoint');
# Restart to discard in-memory copy of FSM
$node_standby->restart;
# Insert should work on standby
is($node_standby->psql('postgres',
qq{insert into testtab select generate_series(1,1000), 'foo';}),
0, 'INSERT succeeds with truncated relation FSM');

View File

@ -0,0 +1,204 @@
#
# Tests of pg_shmem.h functions
#
use strict;
use warnings;
use IPC::Run 'run';
use PostgresNode;
use Test::More;
use TestLib;
use Time::HiRes qw(usleep);
if ($windows_os)
{
plan skip_all => 'SysV shared memory not supported by this platform';
}
else
{
plan tests => 5;
}
my $tempdir = TestLib::tempdir;
my $port;
# Log "ipcs" diffs on a best-effort basis, swallowing any error.
my $ipcs_before = "$tempdir/ipcs_before";
eval { run_log [ 'ipcs', '-am' ], '>', $ipcs_before; };
sub log_ipcs
{
eval { run_log [ 'ipcs', '-am' ], '|', [ 'diff', $ipcs_before, '-' ] };
return;
}
# These tests need a $port such that nothing creates or removes a segment in
# $port's IpcMemoryKey range while this test script runs. While there's no
# way to ensure that in general, we do ensure that if PostgreSQL tests are the
# only actors. With TCP, the first get_new_node picks a port number. With
# Unix sockets, use a postmaster, $port_holder, to represent a key space
# reservation. $port_holder holds a reservation on the key space of port
# 1+$port_holder->port if it created the first IpcMemoryKey of its own port's
# key space. If multiple copies of this test script run concurrently, they
# will pick different ports. $port_holder postmasters use odd-numbered ports,
# and tests use even-numbered ports. In the absence of collisions from other
# shmget() activity, gnat starts with key 0x7d001 (512001), and flea starts
# with key 0x7d002 (512002).
my $port_holder;
if (!$PostgresNode::use_tcp)
{
my $lock_port;
for ($lock_port = 511; $lock_port < 711; $lock_port += 2)
{
$port_holder = PostgresNode->get_new_node(
"port${lock_port}_holder",
port => $lock_port,
own_host => 1);
$port_holder->init(hba_permit_replication => 0);
$port_holder->append_conf('postgresql.conf', 'max_connections = 5');
$port_holder->start;
# Match the AddToDataDirLockFile() call in sysv_shmem.c. Assume all
# systems not using sysv_shmem.c do use TCP.
my $shmem_key_line_prefix = sprintf("%9lu ", 1 + $lock_port * 1000);
last
if slurp_file($port_holder->data_dir . '/postmaster.pid') =~
/^$shmem_key_line_prefix/m;
$port_holder->stop;
}
$port = $lock_port + 1;
}
# Node setup.
sub init_start
{
my $name = shift;
my $ret = PostgresNode->get_new_node($name, port => $port, own_host => 1);
defined($port) or $port = $ret->port; # same port for all nodes
$ret->init(hba_permit_replication => 0);
# Limit semaphore consumption, since we run several nodes concurrently.
$ret->append_conf('postgresql.conf', 'max_connections = 5');
$ret->start;
log_ipcs();
return $ret;
}
my $gnat = init_start 'gnat';
my $flea = init_start 'flea';
# Upon postmaster death, postmaster children exit automatically.
$gnat->kill9;
log_ipcs();
$flea->restart; # flea ignores the shm key gnat abandoned.
log_ipcs();
poll_start($gnat); # gnat recycles its former shm key.
log_ipcs();
# After clean shutdown, the nodes swap shm keys.
$gnat->stop;
$flea->restart;
log_ipcs();
$gnat->start;
log_ipcs();
# Scenarios involving no postmaster.pid, dead postmaster, and a live backend.
# Use a regress.c function to emulate the responsiveness of a backend working
# through a CPU-intensive task.
$gnat->safe_psql('postgres', <<EOSQL);
CREATE FUNCTION wait_pid(int)
RETURNS void
AS '$ENV{REGRESS_SHLIB}'
LANGUAGE C STRICT;
EOSQL
my $slow_query = 'SELECT wait_pid(pg_backend_pid())';
my ($stdout, $stderr);
my $slow_client = IPC::Run::start(
[
'psql', '-X', '-qAt', '-d', $gnat->connstr('postgres'),
'-c', $slow_query
],
'<',
\undef,
'>',
\$stdout,
'2>',
\$stderr,
IPC::Run::timeout(900)); # five times the poll_query_until timeout
ok( $gnat->poll_query_until(
'postgres',
"SELECT true FROM pg_stat_activity WHERE query = '$slow_query'"),
'slow query started');
my $slow_pid = $gnat->safe_psql('postgres',
"SELECT pid FROM pg_stat_activity WHERE query = '$slow_query'");
$gnat->kill9;
unlink($gnat->data_dir . '/postmaster.pid');
$gnat->rotate_logfile;
log_ipcs();
# Reject ordinary startup. Retry for the same reasons poll_start() does.
my $pre_existing_msg = qr/pre-existing shared memory block/;
{
my $max_attempts = 180 * 10; # Retry every 0.1s for at least 180s.
my $attempts = 0;
while ($attempts < $max_attempts)
{
last
if $gnat->start(fail_ok => 1)
|| slurp_file($gnat->logfile) =~ $pre_existing_msg;
usleep(100_000);
$attempts++;
}
}
like(slurp_file($gnat->logfile),
$pre_existing_msg, 'detected live backend via shared memory');
# Reject single-user startup.
my $single_stderr;
ok( !run_log(
[ 'postgres', '--single', '-D', $gnat->data_dir, 'template1' ],
'<', \undef, '2>', \$single_stderr),
'live query blocks --single');
print STDERR $single_stderr;
like($single_stderr, $pre_existing_msg,
'single-user mode detected live backend via shared memory');
log_ipcs();
# Fail to reject startup if shm key N has become available and we crash while
# using key N+1. This is unwanted, but expected.
$flea->stop; # release first key
is($gnat->start(fail_ok => 1), 1, 'key turnover fools only sysv_shmem.c');
$gnat->stop; # release first key
$flea->start; # grab first key
# cleanup
TestLib::system_log('pg_ctl', 'kill', 'QUIT', $slow_pid);
$slow_client->finish; # client has detected backend termination
log_ipcs();
poll_start($gnat); # recycle second key
$gnat->stop;
$flea->stop;
$port_holder->stop if $port_holder;
log_ipcs();
# We may need retries to start a new postmaster. Causes:
# - kernel is slow to deliver SIGKILL
# - postmaster parent is slow to waitpid()
# - postmaster child is slow to exit in response to SIGQUIT
# - postmaster child is slow to exit after postmaster death
sub poll_start
{
my ($node) = @_;
my $max_attempts = 180 * 10;
my $attempts = 0;
while ($attempts < $max_attempts)
{
$node->start(fail_ok => 1) && return 1;
# Wait 0.1 second before retrying.
usleep(100_000);
$attempts++;
}
# No success within 180 seconds. Try one last time without fail_ok, which
# will BAIL_OUT unless it succeeds.
$node->start && return 1;
return 0;
}

View File

@ -37,7 +37,7 @@ if (-e "src/tools/msvc/buildenv.pl")
my $what = shift || "";
if ($what =~
/^(check|installcheck|plcheck|contribcheck|modulescheck|ecpgcheck|isolationcheck|upgradecheck|bincheck|taptest)$/i
/^(check|installcheck|plcheck|contribcheck|modulescheck|ecpgcheck|isolationcheck|upgradecheck|bincheck|recoverycheck|taptest)$/i
)
{
$what = uc $what;
@ -85,6 +85,7 @@ my %command = (
MODULESCHECK => \&modulescheck,
ISOLATIONCHECK => \&isolationcheck,
BINCHECK => \&bincheck,
RECOVERYCHECK => \&recoverycheck,
UPGRADECHECK => \&upgradecheck,
TAPTEST => \&taptest,);
@ -203,8 +204,9 @@ sub tap_check
# adjust the environment for just this test
local %ENV = %ENV;
$ENV{PERL5LIB} = "$topdir/src/test/perl;$ENV{PERL5LIB}";
$ENV{PERL5LIB} = "$topdir/src/test/perl;$ENV{PERL5LIB}";
$ENV{PG_REGRESS} = "$topdir/$Config/pg_regress/pg_regress";
$ENV{REGRESS_SHLIB} = "$topdir/src/test/regress/regress.dll";
$ENV{TESTDIR} = "$dir";
@ -439,6 +441,16 @@ sub modulescheck
exit $mstat if $mstat;
}
sub recoverycheck
{
InstallTemp();
my $mstat = 0;
my $dir = "$topdir/src/test/recovery";
my $status = tap_check($dir);
exit $status if $status;
}
# Run "initdb", then reconfigure authentication.
sub standard_initdb
{
@ -683,6 +695,7 @@ sub usage
" isolationcheck run isolation tests\n",
" modulescheck run tests of modules in src/test/modules/\n",
" plcheck run tests of PL languages\n",
" recoverycheck run recovery test suite\n",
" taptest run an arbitrary TAP test set\n",
" upgradecheck run tests of pg_upgrade\n",
"\nOptions for <arg>: (used by check and installcheck)\n",