postgresql pgpool-II autofailover and failback

Hello, try to find a good material about how-to configure pgpool and found nothing, so I’ve decide to write my own article =))

I’m recommend to read previous articles:
How to install postgres
How to configure repmgr to manage standby databases
because I use those config in this scenario

So, lets start =)

First of all lest configure OS to match our needs ( I had to use RH 6 distributive )
1. configure passwordless ssh to all nodes from root and from postgres user
2. configure passwordless sudo to allow this commands:
add to /etc/sudoers

postgres ALL=(root) NOPASSWD: /sbin/ip
postgres ALL=(root) NOPASSWD: /sbin/arping
postgres ALL=(root) NOPASSWD: /sbin/ifconfig
postgres ALL=(root) NOPASSWD: /etc/init.d/postgresql-9.5

To allow passwordless pcp commands create this file in postgres and root users:

cat $HOME/.pcppass
*:*:pgpooladmin:q1
chmod 600 .pcppass

now lets configure pgpool

start with easiest part, configure pcp.conf
to do this run pg_md5 command with password as first argument:

[postgres@pg1 ~]$ pg_md5 q1
ff33f1b12213e021c2c4a888141953ba

result add to pcp.conf with format of username:pg_md5_pass

echo "pgpooladmin:ff33f1b12213e021c2c4a888141953ba" >> pcp.conf

I’m allow connect to pgpool from whole network, to archive this I’ve add this line to pool_hba.conf

host	all		all	10.10.10.0/24	md5

next session is to configure pgpool.conf
I’ve made it from pgpool.conf.sample-stream

diff pgpool.conf.sample-stream pgpool.conf

27c27,28
< listen_addresses = 'localhost' --- > listen_addresses = ‘*’
>
65c66
< backend_hostname0 = 'host1' --- > backend_hostname0 = ‘pg1’
71c72
< backend_data_directory0 = '/data' --- > backend_data_directory0 = ‘/u01/postgres/9.5/main/pgdata’
76,80c77,89
< #backend_hostname1 = 'host2' < #backend_port1 = 5433 < #backend_weight1 = 1 < #backend_data_directory1 = '/data1' < #backend_flag1 = 'ALLOW_TO_FAILOVER' --- > backend_hostname1 = ‘pg2’
> backend_port1 = 5432
> backend_weight1 = 1
> backend_data_directory1 = ‘/u01/postgres/9.5/main/pgdata’
> backend_flag1 = ‘ALLOW_TO_FAILOVER’
>
> backend_hostname2 = ‘pg3’
> backend_port2 = 5432
> backend_weight2 = 1
> backend_data_directory2 = ‘/u01/postgres/9.5/main/pgdata’
> backend_flag2 = ‘ALLOW_TO_FAILOVER’
>
>
84c93
< enable_pool_hba = off --- > enable_pool_hba = on
120c129
< num_init_children = 32 --- > num_init_children = 300
123c132
< max_pool = 4 --- > max_pool = 1
218c227
< pid_file_name = '/var/run/pgpool/pgpool.pid' --- > pid_file_name = ‘/var/run/pgpool-II-95/pgpool.pid’
333c342
< sr_check_user = 'nobody' --- > sr_check_user = ‘repmgr’
337c346
< sr_check_password = '' --- > sr_check_password = ‘q1’
348c357
< follow_master_command = '' --- > follow_master_command = ‘/u01/postgres/9.5/main/conf/follow_master.sh %d %H %m %P %h’
367c376
< health_check_period = 0 --- > health_check_period = 5
373c382
< health_check_user = 'nobody' --- > health_check_user = ‘repmgr’
375c384
< health_check_password = '' --- > health_check_password = ‘q1’
377c386
< health_check_database = '' --- > health_check_database = ‘postgres’
379c388
< health_check_max_retries = 0 --- > health_check_max_retries = 5
381c390
< health_check_retry_delay = 1 --- > health_check_retry_delay = 5
383c392
< connect_timeout = 10000 --- > connect_timeout = 30000
394c403
< failover_command = '' --- > failover_command = ‘/u01/postgres/9.5/main/conf/failover.sh %d %H %m %P %h’
408c417,418
< failback_command = '' --- > failback_command = ‘/u01/postgres/9.5/main/conf/stb_recreate.sh %d %H %m %P %h’
> #failback_command = »
439c449
< recovery_user = 'nobody' --- > recovery_user = ‘repmgr’
441c451
< recovery_password = '' --- > recovery_password = ‘q1’
465c475
< use_watchdog = off --- > use_watchdog = on
471c481
< trusted_servers = '' --- > trusted_servers = ‘app01,app02’
482c492
< wd_hostname = '' --- > wd_hostname = ‘pg1’
505c515
< delegate_IP = '' --- > delegate_IP = ‘10.10.10.20’
509c519
< if_cmd_path = '/sbin' --- > if_cmd_path = ‘/usr/bin’
512c522
< if_up_cmd = 'ip addr add $_IP_$/24 dev eth0 label eth0:0' --- > if_up_cmd = ‘sudo ip addr add $_IP_$/24 dev eth1 label eth1:1’
515c525
< if_down_cmd = 'ip addr del $_IP_$/24 dev eth0' --- > if_down_cmd = ‘sudo ip addr del $_IP_$/24 dev eth1:1’
518c528
< arping_path = '/usr/sbin' --- > arping_path = ‘/usr/bin’
521c531
< arping_cmd = 'arping -U $_IP_$ -w 1' --- > arping_cmd = ‘sudo arping -U $_IP_$ -w 1’
569c579
< heartbeat_destination0 = 'host0_ip1' --- > heartbeat_destination0 = ‘pg2’
586,588c596,598
< #heartbeat_destination1 = 'host0_ip2' < #heartbeat_destination_port1 = 9694 < #heartbeat_device1 = '' --- > heartbeat_destination1 = ‘pg3’
> heartbeat_destination_port1 = 9694
> heartbeat_device1 = »
601c611
< wd_lifecheck_user = 'nobody' --- > wd_lifecheck_user = ‘pgpool’
604c614
< wd_lifecheck_password = '' --- > wd_lifecheck_password = ‘w7DeMth2oid3oYyE’
610c620
< #other_pgpool_hostname0 = 'host0' --- > other_pgpool_hostname0 = ‘pg2’
613c623
< #other_pgpool_port0 = 5432 --- > other_pgpool_port0 = 9999
616c626
< #other_wd_port0 = 9000 --- > other_wd_port0 = 9000
619,621c629,631
< #other_pgpool_hostname1 = 'host1' < #other_pgpool_port1 = 5432 < #other_wd_port1 = 9000 --- > other_pgpool_hostname1 = ‘p01db03’
> other_pgpool_port1 = 9999
> other_wd_port1 = 9000

[свернуть]

in my configuration I use trusted servers ( app servers ) you may leave it blank, also there was 3 node config ( for quorum) but in my test stand I use only two stands for one datacenter.

Key thig here is to keep first part of this file the same on all host ( where backend_hostname defenition is described )

copy config to second node:

scp  /etc/pgpool-II-95/* pg2:/etc/pgpool-II-95/

modify:
wd_hostname to second node
heartbeat_destination0 to first node
other_pgpool_hostname0 to first node

if you have any problems start pgpool in foreground mode ( also can be usefull to enable debug )
pgpool -f /etc/pgpool-II-95/pgpool.conf -n -d

usefull commands:
check node status
I use connect to VIP ip and port 9999

psql -U repmgr -d postgres -p 9999 -h 10.10.10.20 -c "show pool_nodes"
 node_id | hostname | port | status | lb_weight |  role   | select_cnt
---------+----------+------+--------+-----------+---------+------------
 0       | pg1      | 5432 | 2      | 0.500000  | primary | 0
 1       | pg2      | 5432 | 2      | 0.500000  | standby | 0

scripts ( check that they are executable or add chmod +x /u01/postgres/9.5/main/conf/*.sh )

follow_master.sh

[code language=»bash»]
#!/bin/sh
failed_node=$1
new_master=$2
new_master_id=$3
old_primary=$4
cur_hostname=$5 #%h = host name
(
date
echo "Failed node: $failed_node"
echo "New_master: $new_master"
echo "New_master_id: $new_master_id"
echo "Old_primary: $old_primary"
echo "cur_hostname: $cur_hostname"

set -x
if [ $new_master_id -eq -1 ]; then echo "smth wrong" ; exit 1; fi

if [ $UID -eq 0 ]
then su postgres -c "/usr/pgsql-9.5/bin/repmgr -f /u01/postgres/9.5/main/conf/repmgr.conf -D $PGDATA -h $new_master -U repmgr -d postgres standby follow"
else /usr/pgsql-9.5/bin/repmgr -f /u01/postgres/9.5/main/conf/repmgr.conf -D $PGDATA -h $new_master -U repmgr -d postgres standby follow
fi
exit 0;
) 2>&1 | tee -a /tmp/pgpool_failover.log
[/code]

[свернуть]

failover.sh

set -x
if [ $new_master_id -eq -1 ]; then echo "smth wrong" ; exit 1; fi

if [ $UID -eq 0 ]
then su postgres -c "/usr/bin/ssh -T -l postgres $new_master \"/usr/pgsql-9.5/bin/repmgr -f /u01/postgres/9.5/main/conf/repmgr.conf standby promote 2>/dev/null 1>/dev/null <&-\" "
else /usr/bin/ssh $new_master "/usr/pgsql-9.5/bin/repmgr -f /u01/postgres/9.5/main/conf/repmgr.conf standby promote 2>/dev/null 1>/dev/null <&-"
fi
sleep 5
if [ $UID -eq 0 ]
then su postgres -c "/usr/bin/ssh -T -l postgres $new_master \"pcp_attach_node -p 9898 -U pgpooladmin -w -n $new_master_id\""
else /usr/bin/ssh -T -l postgres $new_master "/usr/bin/ssh -T -l postgres $new_master \"pcp_attach_node -p 9898 -U pgpooladmin -w -n $new_master_id\""
fi
exit 0;
) 2>&1 | tee -a /tmp/pgpool_failover.log
[/code]

[свернуть]

stb_recreate.sh

[code language=»bash»]
#!/bin/sh
failed_node=$1
new_master=$2
new_master_id=$3
old_primary=$4
cur_hostname=$5 #%h = host name
repmgr_conf=/u01/postgres/9.5/main/conf/repmgr.conf
(
date
echo "Failed node: $failed_node"
echo "New_master: $new_master"
echo "New_master_id: $new_master_id"
echo "Old_primary: $old_primary"
echo "cur_hostname: $cur_hostname"

set -x
if [ $new_master_id -eq -1 ]; then echo "smth wrong" ; exit 1; fi

if [ $UID -eq 0 ]
then su postgres -c "
/usr/pgsql-9.5/bin/repmgr -c -h $master_node -d postgres -U repmgr -D $PGDATA -f $repmgr_conf standby clone
sudo /etc/init.d/postgresql-9.5 start
/usr/pgsql-9.5/bin/repmgr -f $repmgr_conf standby register -F
sleep 15
pcp_attach_node -h localhost -U pgpooladmin -w -n 1
"
else
/usr/pgsql-9.5/bin/pg_ctl -m fast stop
mv $PGDATA "$(echo $PGDATA)_$(echo `date +%H:%M_%d%m%Y`)"
/usr/pgsql-9.5/bin/repmgr -c -h $new_master -d postgres -U repmgr -D $PGDATA -f $repmgr_conf standby clone
sudo /etc/init.d/postgresql-9.5 start
/usr/pgsql-9.5/bin/repmgr -f $repmgr_conf standby register -F
sleep 15
pcp_attach_node -h localhost -U pgpooladmin -w -n 1

fi
exit 0;
) 2>&1 | tee -a /tmp/pgpool_recreate.log
[/code]

[свернуть]

bushmelev-aa.ru

postgresql pgpool-II autofailover and failback

Добавить комментарий Отменить ответ

Добавить комментарий Отменить ответ

Primary Sidebar