PostgreSQL灾难恢复系列2_Truncate table后数据的恢复

原创
01/13 15:00
阅读数 63

本文为墨天轮数据库管理服务团队原创内容,如需转载请联系小墨(VX:modb666)并注明来源。


作者:陈昱

墨天轮数据库管理服务团队技术顾问

19 年IT软件行业从业经验,超过15年以上数据库开发和维护经历,拥有丰富的软件开发及应用、数据库架构设计开发和维护项目经历。10g 和 11g OCM, 曾为多家金融、政府机构提供Oracle的现场支持和咨询服务。

适用范围

PG >= 14

Platform Linux
XFS 文件系统

问题概述

上回分享了 PG 下数据表被误 drop 场景下的数据恢复,本场景再次分享数据表被 Truncate 后的数据恢复,总体来说,步骤基本一致,只不过唯一不同之处在于得到需要恢复文件的文件名方法不同,这是因为 truncate 会将表的relfilenode 更新,同时如果表存在toast,那么toast的relfilenode 也会变化,对于drop来说全都被删除了。

问题原因

Truntable 类似 drop,先将原来的文件删掉,然后新建空文件,所以需要找到被删除的文件的文件名,然后进行异常恢复,也包含2步
1) 首先恢复操作系统中被 rm 的数据文件

2) 使用 PG 恢复工具抽取数据

解决方案


下面我们通过一个案例来说明 PG 中如何恢复一个被 Truncate 的表中的数据。

1) 造就测试数据表和数据

postgres=# create table arraytab5 as select * from arraytab;

postgres=# insert into arraytab5 select * from arraytab;
b1 | array1 | array2
----+---------+-----------
t | {10001} | {meeting}

postgres=# checkpoint;
CHECKPOINT

postgres=# select * from arraytab5;
b1 | array1 | array2
----+---------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------
t | {10001} | {meeting}
t | {10001,10002} | {meeting,lunch}
t | {10001,10002,10003,10004} | {{meeting,lunch},{training,presentation}}
t | {{{101},{102}},{{103},{104}},{{105},{106}}} | {{{aaa},{bbb}},{{ccc},{ddd}},{{eee},{fff}}}
t | {{{101,102},{103,104}},{{105,106},{107,108}},{{109,110},{111,112}}} | {{{aaa,bbb},{ccc,ddd}},{{eee,fff},{ggg,hhh}},{{iii,jjj},{kkk,lll}}}
t | {10001,10002,10003,10004} | {{{aaa},{bbb}},{{ccc},{ddd}}}
t | {10001,10002,10003,10004} | {{{aaa,bbb,ccc},{ddd,eee,fff},{ggg,hhh,iii}},{{jjj,kkk,lll},{mmm,nnn,ooo},{ppp,qqq,rrr}},{{sss,ttt,uuu},{vvv,www,xxx},{yyy,zzz,aaa}}}
t | {10001,10002,10003,10004} | {{meeting},{training}}
t | {{10001,10002,1003},{10004,10005,1006},{10007,10008,1009}} | {{meeting},{training}}
t | {10001} | {meeting}
t | {10001,10002} | {meeting,lunch}
t | {10001,10002,10003,10004} | {{meeting,lunch},{training,presentation}}
t | {{{101},{102}},{{103},{104}},{{105},{106}}} | {{{aaa},{bbb}},{{ccc},{ddd}},{{eee},{fff}}}
t | {{{101,102},{103,104}},{{105,106},{107,108}},{{109,110},{111,112}}} | {{{aaa,bbb},{ccc,ddd}},{{eee,fff},{ggg,hhh}},{{iii,jjj},{kkk,lll}}}
t | {10001,10002,10003,10004} | {{{aaa},{bbb}},{{ccc},{ddd}}}
t | {10001,10002,10003,10004} | {{{aaa,bbb,ccc},{ddd,eee,fff},{ggg,hhh,iii}},{{jjj,kkk,lll},{mmm,nnn,ooo},{ppp,qqq,rrr}},{{sss,ttt,uuu},{vvv,www,xxx},{yyy,zzz,aaa}}}
t | {10001,10002,10003,10004} | {{meeting},{training}}
t | {{10001,10002,1003},{10004,10005,1006},{10007,10008,1009}} | {{meeting},{training}}
(18 rows)


select oid,relname,relfilenode,reltoastrelid from pg_class where relname = 'arraytab5';
oid | relname | relfilenode | reltoastrelid
--------+-----------+-------------+---------------
126891 | arraytab5 | 126891 | 126894

select oid,relname,relfilenode,reltoastrelid from pg_class where relname like '%pg_toast_126891%';
oid | relname | relfilenode | reltoastrelid
--------+-----------------------+-------------+---------------
126894 | pg_toast_126891 | 126894 | 0
126895 | pg_toast_126891_index | 126895 | 0

[root@rac1 5]# ls -li 126891
69763712 -rw------- 1 postgres dba 8192 Dec 11 14:47 126891 --> inode = 69763712

[root@rac1 5]# ls -li 126894
69763715 -rw------- 1 postgres dba 0 Dec 11 14:46 126894

[root@rac1 5]# ls -li 126895
69763716 -rw------- 1 postgres dba 8192 Dec 11 14:46 126895

[root@rac1 5]# md5sum 126891
97a75feb46de9bcc88fcb067db7165a6 126891

[postgres@rac1 5]$ date
Wed Dec 11 14:49:17 CST 2024
  1. 模拟删除数据

# 删除表
postgres=# truncate table arraytab5;
TRUNCATE TABLE

postgres=# select oid,relname,relfilenode,reltoastrelid from pg_class where relname='arraytab5';
oid | relname | relfilenode | reltoastrelid
--------+-----------+-------------+---------------
126891 | arraytab5 | 126896 | 126894
(1 row)

postgres=# select oid,relname,relfilenode,reltoastrelid from pg_class where relname like '%pg_toast_126891%';
oid | relname | relfilenode | reltoastrelid
--------+-----------------------+-------------+---------------
126894 | pg_toast_126891 | 126897 | 0
126895 | pg_toast_126891_index | 126898 | 0


--truncate 后文件也从操作系统中删除了
[root@rac1 5]# ls -li 126891
ls: cannot access 126891: No such file or directory

[root@rac1 5]# ls -li 126894
ls: cannot access 126894: No such file or directory

[root@rac1 5]# ls -li 126895
ls: cannot access 126895: No such file or directory
  1. 恢复OS已经删除的数据文件

cat /opt/data6666/log/postgresql_log.Tue.csv
2024-12-11 14:47:02.142 CST,,,76839,,674ea544.12c27,148,,2024-12-03 14:29:24 CST,,0,LOG,00000,"checkpoint complete: wrote 4 buffers (0.0%); 0 WAL file(s) added, 0 removed, 0 recycled; write=0.004 s, sync=0.002 s, total=0.008 s; sync files=4, longest=0.001 s, average=0.001 s; distance=10 kB, estimate=960 kB; 【lsn=0/54DB3DC0】, redo lsn=0/54DB3D68",,,,,,,,,"","checkpointer",,0
....
2024-12-11 14:50:51.186 CST,"postgres","postgres",25665,"[local]",67592eb3.6441,45,"TRUNCATE TABLE",2024-12-11 14:18:27 CST,18/0,0,LOG,00000,"duration: 12.244 ms",,,,,,,,,"psql","client backend",,0
2024-12-11 14:51:15.643 CST,"postgres","postgres",25665,"[local]",67592eb3.6441,46,"SELECT",2024-12-11 14:18:27 CST,18/0,0,LOG,00000,"duration: 0.188 ms",,,,,,,,,"psql","client backend",,0
2024-12-11 14:51:43.611 CST,"postgres","postgres",25665,"[local]",67592eb3.6441,47,"SELECT",2024-12-11 14:18:27 CST,18/0,0,LOG,00000,"duration: 0.284 ms",,,,,,,,,"psql","client backend",,0
2024-12-11 14:52:02.244 CST,,,76839,,674ea544.12c27,149,,2024-12-03 14:29:24 CST,,0,LOG,00000,"checkpoint starting: time",,,,,,,,,"","checkpointer",,0
2024-12-11 14:52:02.970 CST,,,76839,,674ea544.12c27,150,,2024-12-03 14:29:24 CST,,0,LOG,00000,"checkpoint complete: wrote 8 buffers (0.0%); 0 WAL file(s) added, 0 removed, 0 recycled; write=0.715 s, sync=0.001 s, total=0.726 s; sync files=10, longest=0.001 s, average=0.001 s; distance=45 kB, estimate=869 kB; 【lsn=0/54DBF1C0】, redo lsn=0/54DBF168",,,,,,,,,"","checkpointer",,0


只能从日志中解析

postgres=# select pg_walfile_name('0/54DB3DC0');
pg_walfile_name
--------------------------
000000050000000000000054
(1 row)

postgres=# select pg_walfile_name('0/54DBF1C0');
pg_walfile_name
--------------------------
000000050000000000000054


[postgres@rac1 pg_wal]$ pg_waldump 000000050000000000000054 -s 0/54DB3DC0 -e 0/54DBF1C0 > /tmp/wal.log
[postgres@rac1 pg_wal]$ cat /tmp/wal.log | grep "rmgr: Transaction"
rmgr: Transaction len (rec/tot): 266/ 266, tx: 2958, lsn: 0/54DBF020, prev 0/54DBEF90, desc: COMMIT 2024-12-11 14:50:51.183676 CST; rels: base/5/126895 base/5/126894 base/5/126891; inval msgs: catcache 55 catcache 54 catcache 55 catcache 54 catcache 55 catcache 54 relcache 126891 relcache 126894 relcache 126895 relcache 126894 relcache 126895

注意这里,可以看见这里的信息正式truncate前的表名
rels: base/5/126895 base/5/126894 base/5/126891;

所以缺少的pg_class 信息我们可以从 wal 日志中得到,我们可以得到如下被删除表的信息
rmtab_info = [126891,126891,'arraytab5',126894] oid,relfilenode,arraytab5,reltoastrelid
rmtab_toast_info = [126894,126894,'pg_toast_126891',0] toast的相关信息
need recover relfilenode:126891
need recover toast filenode:12689

# 为了方便使用命令生成需要恢复的文件
cat /tmp/wal.log | grep "rmgr: Transaction"|awk -F "rels:|inval msgs:" '{print $2}'|awk -F" |/|;" '{print $4,$7,$10}'|awk '{print "rmtab_info = [" $3 "," $3 "," "\47tabname\47," $2 "]" "\n" "rmtab_toast_info = [" $2 "," $2 "," "\47pg_toast_"$3 "\47," "0]\n" "need recover relfilenode:" $3 "\n" "need recover toast filenode:" $2}'

rmtab_info = [126891,126891,'tabname',126894] --|
rmtab_toast_info = [126894,126894,'pg_toast_126891',0] --| 复制到 config.txt中
need recover relfilenode:126891 --|
need recover toast filenode:126894 --| 需要使用os恢复的文件

将上面'tabname'替换成为 'arraytab5'

# 获得表名对应 inode
-- 先根据文件名获得16进制,方便下一步查询
126891 字符串转16进制 = 0x31 0x32 0x36 0x38 0x39 0x31 = 31 3632 3938 31

ls -lid /opt/data6666/base/5
69762042 drwx------ 2 postgres dba 12288 Dec 9 10:24 /opt/data6666/base/5

xfs_db -r /dev/mapper/rhel-root
xfs_db> inode 69762042
xfs_db> dblock 0
xfs_db> addr
current
byte offset 10240110592, length 4096
buffer block 20000216 (fsbno 4360123), 8 bbs
inode 69762042, dir inode 69762042, type dir2

dd if=/dev/mapper/rhel-root bs=1 skip=10240110592 count=4096 | od -v -x | grep -B1 "31.* 3632 3938 .*31"
0003120 3106 3632 3738 e838 0000 0000 3904 4806
0003140 ffff 1800 2804 8082 3106 3632 3938 de31

ffff 1800 2804 8082 --> ffff 是删除标记 2804 8082 是INODE = 0428 8280 = 69763712 --> 找到INODE

# 根据 INODE 获得删除的文件
[root@rac1 5]# xfs_db -r -c "inode 69763712" -c "addr" /dev/mapper/rhel-root
current
byte offset 10240557056, length 256
buffer block 20001088 (fsbno 4360232), 16 bbs
inode 69763712, dir inode -1, type inode

dd if=/dev/mapper/rhel-root bs=1 skip=10240557056 count=256 | od -v -x
4e49 0000 0202 0000 0000 ec03 0000 3601
0000 0000 0000 0000 0000 0000 0000 3200
5967 cd35 6112 64e2 5967 4b36 0f0b 0c1a
5967 9236 c439 f279 0000 0000 0000 0000
0000 0000 0000 0000 0000 0000 0000 0000
0000 0200 0000 0000 0000 0000 0540 0272
ffff ffff
0000 0000 0000 0000 0000 7308 a056 0200 --|
0000 0000 0000 0000 0000 0000 0000 0000 | extent 信息
0000 0000 0000 0000 0000 0000 0000 0000 --|
0000 0000 0000 0000 0000 0000 0000 0000
0000 0000 0000 0000 0000 0000 0000 0000
0000 0000 0000 0000 0000 0000 0000 0000
0000 0000 0000 0000 0000 0000 0000 0000
0000 0000 0000 0000 0000 0000 0000 0000
0000 0000 0000 0000 0000 0000 0000 0000
0000 0000 0000 0000 0000 0000


0000 0000 0000 0000 0000 7308 a056 0200 反过来 = 00 00 00 00 00 00 00 00 00 00 08 73 56 a0 00 02 =
00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00001000 01110011 01010110 10100000 00000000 00000010 =
0 000000000000000000000000000000000000000000000000000000 0000000000000000000000000000010000111001101010110101 000000000000000000010 =
0 0 4430517 2

[startoff,startblock,blockcount,extentflag]
0:[0,4430517,2,0]


1) 首先计算文件在那个AG (根据INODE 69763712 计算)
# 首先要得到INODE在整个文件系统中的偏移量, 先假设 69763712 在第1个AG中(sb 0)
--Low bits 是block里存放的第几个inode
xfs_db -r -c "sb 0" -c "p inopblog" /dev/mapper/rhel-root
inopblog = 4 --> inode num 转成2进制,从后往前取 4 位

--Middle bits 在AG的哪个block
xfs_db -r -c "sb 0" -c "p agblklog" /dev/mapper/rhel-root
agblklog = 22 --> inode num 转成2进制,从后往前取 4 位后,在往前取 22 位

--High bits 在哪个AG
--> inode num 转成2进制,除去低位的 4+22=26 位后,剩下的就是AG

--假设在 sb 0 AG中 High bit(ag_number) Middle bits(block_in_ag) Low Bits(inode_in_blk)
69763712 = 1 0000101000100000101000 0000
= 10进制 = 1 165928 0

可以看到假设错误,因为假设是在第一个AG 即 AG=0中, 但是计算出来的AG=1, 即第2个AG中,那么按照AG=1 重新计算
xfs_db -r -c "sb 1" -c "p inopblog" /dev/mapper/rhel-root
inopblog = 4 --> inode num 转成2进制,从后往前取 4 位

--Middle bits 在AG的哪个block
xfs_db -r -c "sb 1" -c "p agblklog" /dev/mapper/rhel-root
agblklog = 22 --> inode num 转成2进制,从后往前取 4 位后,在往前取 22 位

--High bits 在哪个AG
--> inode num 转成2进制,除去低位的 4+22=26 位后,剩下的就是AG

-文件在sb 1 AG中 High bit(ag_number) Middle bits(block_in_ag) Low Bits(inode_in_blk)
69763712 = 1 0000101000100000101000 0000
= 10进制 = 1 165928 0

2) 计算 dd 命令的 skip 参数值
--sb 0 相关参数
--1个Inode 大小
xfs_db -r -c "sb 1" -c "p inodesize" /dev/mapper/rhel-root
inodesize = 256

xfs_db -r -c "sb 3" -c "p agblocks" /dev/mapper/rhel-root
agblocks = 2334208

xfs_db -r -c "sb 3" -c "p blocksize" /dev/mapper/rhel-root
blocksize = 4096

xfs_db -r -c "sb 3" -c "p agcount" /dev/mapper/rhel-root
agcount = 4

#每个block 可以容纳多少个inode
inode_nums_per_blk = blocksize/inodesize = 4096/256 = 16

--skip 值的计算方法如下:
0:[0,4430517,2,0]
4430517 = 10000111001101010110101 = 1 0000111001101010110101(22 位,取 agblklog 的长度) = 1 236213

skip = (agblocks*blocksize*ag号 + 块号*blocksize)/blocksize = (2334208*4096*1 + 236213*4096)/4096= 2570421
--count 值的计算方法如下:
count=count 即 bmap 中的 count 值
--seek 值的计算方法如下:
seek=offset 即 bmap 中的 offset 值

如上面 bmap 转化为下面命令:
--第1行 skip = (2334208*4096*1 + 236213*4096)/4096= 2570421
dd if=/dev/mapper/rhel-root bs=4096 skip=2570421 count=2 of=/mnt/hgfs/shared/126891

[root@rac1 ~]# ls -l /mnt/hgfs/shared/126891
-rwxrwxrwx 1 root root 8192 Dec 11 15:42 /mnt/hgfs/shared/126891

[root@rac1 ~]# md5sum /mnt/hgfs/shared/126891
97a75feb46de9bcc88fcb067db7165a6 /mnt/hgfs/shared/126891

# 接下来将文件复制到数据库目录中,使用工具进行数据抽取
cp /mnt/hgfs/shared/126891 /opt/data6666/base/5

[root@rac1 ~]# ls -ltr /opt/data6666/base/5/126891
-rwxr-xr-x 1 root root 8192 Dec 11 15:43 /opt/data6666/base/5/126891 <-----
  1. 使用恢复工具抽取数据

# 使用恢复工具进行数据抽取
1) 配置参数文件
[postgres@rac1 dist]$ cat config.txt
page_size = 8192
pg_class_oid = 1259
pg_attribute_oid = 1249
rows_perfile = 10000
output_directory = /mnt/hgfs/shared/zodu_pg/zodu_pg_single/db_recovery
largedata_directory = /mnt/hgfs/shared/zodu_pg/zodu_pg_single/db_recovery/data
rmtab_info = [126891,126891,'arraytab5',126894]
rmtab_toast_info = [126894,126894,'pg_toast_126891',0]

2) 开始抽取数据
[root@rac1 zodu_pg_single]# python zodupg.py -t arraytab5
Toast 126894 may be already removed.
('filepath', '/opt/data6666/base/5/126891')
  1. 查看抽取结果

[root@rac1 dist]# cd ..
[root@rac1 zodu_pg_single]# cd db_recovery/
[root@rac1 db_recovery]# ls -ltr
total 5
drwxrwxrwx 1 root root 0 Dec 4 18:01 data
-rwxrwxrwx 1 root root 72 Dec 11 15:48 create_table.sql
-rwxrwxrwx 1 root root 1427 Dec 11 15:48 arraytab5.csv
-rwxrwxrwx 1 root root 106 Dec 11 15:48 impdp_table.sql

[root@rac1 db_recovery]# cat arraytab5.csv
b1,array1,array2
t,{10001},{meeting}
t,{10001,10002},{meeting,lunch}
t,{10001,10002,10003,10004},{{meeting,lunch},{training,presentation}}
t,{{{101},{102}},{{103},{104}},{{105},{106}}},{{{aaa},{bbb}},{{ccc},{ddd}},{{eee},{fff}}}
t,{{{101,102},{103,104}},{{105,106},{107,108}},{{109,110},{111,112}}},{{{aaa,bbb},{ccc,ddd}},{{eee,fff},{ggg,hhh}},{{iii,jjj},{kkk,lll}}}
t,{10001,10002,10003,10004},{{{aaa},{bbb}},{{ccc},{ddd}}}
t,{10001,10002,10003,10004},{{{aaa,bbb,ccc},{ddd,eee,fff},{ggg,hhh,iii}},{{jjj,kkk,lll},{mmm,nnn,ooo},{ppp,qqq,rrr}},{{sss,ttt,uuu},{vvv,www,xxx},{yyy,zzz,aaa}}}
t,{10001,10002,10003,10004},{{meeting},{training}}
t,{{10001,10002,1003},{10004,10005,1006},{10007,10008,1009}},{{meeting},{training}}
t,{10001},{meeting}
t,{10001,10002},{meeting,lunch}
t,{10001,10002,10003,10004},{{meeting,lunch},{training,presentation}}
t,{{{101},{102}},{{103},{104}},{{105},{106}}},{{{aaa},{bbb}},{{ccc},{ddd}},{{eee},{fff}}}
t,{{{101,102},{103,104}},{{105,106},{107,108}},{{109,110},{111,112}}},{{{aaa,bbb},{ccc,ddd}},{{eee,fff},{ggg,hhh}},{{iii,jjj},{kkk,lll}}}
t,{10001,10002,10003,10004},{{{aaa},{bbb}},{{ccc},{ddd}}}
t,{10001,10002,10003,10004},{{{aaa,bbb,ccc},{ddd,eee,fff},{ggg,hhh,iii}},{{jjj,kkk,lll},{mmm,nnn,ooo},{ppp,qqq,rrr}},{{sss,ttt,uuu},{vvv,www,xxx},{yyy,zzz,aaa}}}
t,{10001,10002,10003,10004},{{meeting},{training}}
t,{{10001,10002,1003},{10004,10005,1006},{10007,10008,1009}},{{meeting},{training}}

[root@rac1 db_recovery]# cat create_table.sql
CREATE TABLE arraytab5(
b1 boolean,
array1 integer[],
array2 text[]
);

[root@rac1 db_recovery]# cat impdp_table.sql
COPY arraytab5 FROM '/mnt/hgfs/shared/zodu_pg/zodu_pg_single/db_recovery/arraytab5.csv' WITH csv header;


总结:

从恢复的过程中,我们可以发现整个恢复的入口在于获取到文件名的inode号,而获取inode号的条件是先找到 drop 表对应的 relfilenode,好在这个信息在wal日志中有记录,否则恢复难度就会大大增加。此外误删除数据库不能再有数据的写入,否则如果数据被覆盖,则无法再进行数据的恢复。

所以:备份仍然是数据安全的重要保障,因为异常恢复无法百分百保证能够恢复成功。


THE END

墨天轮从乐知乐享的数据库技术社区蓄势出发,全面升级,提供多类型数据库管理服务。墨天轮数据库管理服务旨在为用户构建信赖可托付的数据库环境,并为数据库厂商提供中立的生态支持。

服务官网:https://www.modb.pro/service

点击进入作者服务团队主页

本文分享自微信公众号 - 墨天轮(enmocs)。
如有侵权,请联系 support@oschina.cn 删除。
本文参与“OSC源创计划”,欢迎正在阅读的你也加入,一起分享。

展开阅读全文
加载中
点击引领话题📣 发布并加入讨论🔥
0 评论
0 收藏
0
分享
返回顶部
顶部