默认防止意外删除存储池会导致数据丢失,因此ceph 实施了两个机制来防止删除存储池,要删除存储池,必须先禁用这两个机制
第一个机制是NODELETE标志,其值需要为false,默认也是false
査看命令: ceph osd pool get pool-name nodelete
修改命令: ceph osd pool set pool-name nodelete false
第二个机制是集群范围的配置参数mon allow pool delete,其默认值为“false”,这表示默认不能删除存储池,临时修改删除标识 (不用重启ceph-mon.target服务)
ceph tell mon.* injectargs --mon-allow-pool-delete={true false}
建议删除之前将其值设置为true,删除完成后再改为false
[root@ceph01 ceph]# ceph osd pool create sunday_test 16 16
pool 'sunday_test' created
[root@ceph01 ceph]# ceph osd pool ls
sunday_test
...
[root@ceph01 ceph]# ceph osd pool rm sunday_test sunday_test --yes-i-really-really-mean-it
Error EPERM: pool deletion is disabled; you must first set the mon_allow_pool_delete config option to true before you can destroy a pool
[root@ceph01 ceph]# ceph tell mon.* injectargs --mon-allow-pool-delete=true
mon.ceph01: injectargs:mon_allow_pool_delete = 'true'
mon.ceph02: injectargs:mon_allow_pool_delete = 'true'
mon.ceph03: injectargs:mon_allow_pool_delete = 'true'
[root@ceph01 ceph]# ceph osd pool rm sunday_test sunday_test --yes-i-really-really-mean-it
pool 'sunday_test' removed
[root@ceph01 ceph]# ceph tell mon.* injectargs --mon-allow-pool-delete=false
mon.ceph01: injectargs:mon_allow_pool_delete = 'false'
mon.ceph02: injectargs:mon_allow_pool_delete = 'false'
mon.ceph03: injectargs:mon_allow_pool_delete = 'false'
删除Pool
[root@ceph01 ceph]# ceph osd pool create sunday_test 16 16
pool 'sunday_test' created
[root@ceph01 ceph]# ceph osd pool get sunday_test nodelete
nodelete: false
vim /etc/ceph/ceph.conf
...
[mon]
mon_allow_pool_delete = true
[root@ceph01 ceph]# ceph-deploy --overwrite-conf config push ceph01 ceph02 ceph03
[root@ceph01 ceph]# export HOSTS="ceph01 ceph02 ceph03"
[root@ceph01 ceph]# for i in $HOSTS; do ssh $i "systemctl restart ceph-mon.target";done
[root@ceph01 ceph]# ceph osd pool get sunday_test nodelete
nodelete: false
[root@ceph01 ceph]# ceph osd pool rm sunday_test sunday_test --yes-i-really-really-mean-it
pool 'sunday_test' removed
副本数
副本数量范围0-10个,不能超过10个
容量随副本数变化
[root@ceph01 ceph]# ceph osd pool ls detail | grep sunday-test
pool 14 'sunday-test' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 16 pgp_num 16 autoscale_mode warn last_change 66 flags hashpspool stripe_width 0
[root@ceph01 ceph]# ceph osd pool get sunday-test size
size: 3
[root@ceph01 ceph]# ceph osd pool get sunday-test min_size
min_size: 2
[root@ceph01 ceph]# ceph df | grep sunday-test
sunday-test 14 16 180 MiB 3 540 MiB 0.19 94 GiB
[root@ceph01 ceph]# ceph osd pool set sunday-test size 6
set pool 14 size to 6
[root@ceph01 ceph]# ceph df | grep sunday-test
sunday-test 14 16 90 MiB 3 540 MiB 0.19 47 GiB
快照
# 创建快照
ceph osd pool mksnap <pool-name> <snap-name>
rados -p <pool-name> mksnap <snap-name>
# 列出快照
rados -p <pool-name> lssnap
# 回滚快照
rados -p <pool-name> rollback <snap-name>
rados -p <pool-name> rollback ceph.conf <snap-name> # ceph.conf 快照中的文件
# 删除快照
ceph osd pool rmsnap <pool-name> <snap-name>
rados -p <pool-name> rmsnap <snap-name>
限额
限额最大对象数量
[root@ceph01 ~]# ceph osd pool create demo-test1 8 8
pool 'demo-test1' created
[root@ceph01 ~]# ceph osd pool create demo-test2 8 8
pool 'demo-test2' created
[root@ceph01 ~]# ceph osd pool ls | grep demo-test
demo-test1
demo-test2
[root@ceph01 ~]# ceph osd pool ls detail | grep demo-test
pool 26 'demo-test1' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 8 pgp_num 8 autoscale_mode warn last_change 243 flags hashpspool stripe_width 0
pool 27 'demo-test2' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 8 pgp_num 8 autoscale_mode warn last_change 246 flags hashpspool stripe_width 0
限额命令
[root@ceph01 ~]# ceph osd pool --help | grep set-quota
osd pool set-quota <poolname> max_objects|max_bytes <val> set object or byte limit on pool
限额对象数量
如限制指定demo-test1 池最多3个文件
[root@ceph01 ~]# ceph osd pool set-quota demo-test1 max_objects 3
set-quota max_objects = 3 for pool demo-test1
[root@ceph01 ~]# ceph osd pool get-quota demo-test1
quotas for pool 'demo-test1':
max objects: 3 objects
max bytes : N/A
[root@ceph01 ~]# echo 11 > test.txt
[root@ceph01 ~]# rados put 1.txt ./test.txt --pool=demo-test1
[root@ceph01 ~]# rados put 2.txt ./test.txt --pool=demo-test1
[root@ceph01 ~]# rados put 3.txt ./test.txt --pool=demo-test1
[root@ceph01 ~]# rados put 4.txt ./test.txt --pool=demo-test1
[root@ceph01 ~]# rados put 5.txt ./test.txt --pool=demo-test1
2024-06-17 02:03:52.783 7fded95f99c0 0 client.225570.objecter FULL, paused modify 0x559c72216ff0 tid 0
# 这里有延迟 实际写入了4个, 第5个才被限制了
[root@ceph01 ~]# rados ls --pool=demo-test1
3.txt
2.txt
4.txt
1.txt
[root@ceph01 ~]# ceph df
...
POOLS:
POOL ID PGS STORED OBJECTS USED %USED MAX AVAIL
...
demo-test1 26 8 12 B 4 768 KiB 0 94 GiB
[root@ceph01 ~]# ceph -s
cluster:
id: ed040fb0-fa20-456a-a9f0-c9a96cdf089e
health: HEALTH_WARN
1 pool(s) full # 可以看到这里报警了
application not enabled on 1 pool(s)
限额最大容量
如限额demo-test2 池最大容量为100M
[root@ceph01 ~]# ceph osd pool set-quota demo-test2 max_bytes 100M
set-quota max_bytes = 104857600 for pool demo-test2
[root@ceph01 ~]# ceph osd pool get-quota demo-test2
quotas for pool 'demo-test2':
max objects: N/A
max bytes : 100 MiB
# 30M的测试文件
[root@ceph01 ~]# dd if=/dev/zero of=./test.bin bs=1M count=30
30+0 records in
30+0 records out
31457280 bytes (31 MB) copied, 0.024166 s, 1.3 GB/s
[root@ceph01 ~]# rados put test1.bin ./test.bin --pool=demo-test2
[root@ceph01 ~]# rados put test1.bin ./test.bin --pool=demo-test2
[root@ceph01 ~]# rados put test2.bin ./test.bin --pool=demo-test2
[root@ceph01 ~]# rados put test4.bin ./test.bin --pool=demo-test2
[root@ceph01 ~]# rados put test5.bin ./test.bin --pool=demo-test2
[root@ceph01 ~]# rados put test6.bin ./test.bin --pool=demo-test2
2024-06-17 02:11:43.196 7fb3129019c0 0 client.207577.objecter FULL, paused modify 0x555d78cfcac0 tid 0
# 这里有延迟,第6个才被限制
[root@ceph01 ~]# rados ls --pool=demo-test2
test3.bin
test5.bin
test1.bin
test2.bin
test4.bin
[root@ceph01 ~]# ceph df
...
POOLS:
POOL ID PGS STORED OBJECTS USED %USED MAX AVAIL
...
demo-test2 27 8 150 MiB 5 450 MiB 0.16 93 GiB
[root@ceph01 ~]# ceph -s
cluster:
id: ed040fb0-fa20-456a-a9f0-c9a96cdf089e
health: HEALTH_WARN
2 pool(s) full # 第二个报警(满了)
application not enabled on 2 pool(s)
取消限额
[root@ceph01 ~]# ceph osd pool set-quota demo-test1 max_objects 0
set-quota max_objects = 0 for pool demo-test1
[root@ceph01 ~]# ceph osd pool set-quota demo-test2 max_bytes 0
set-quota max_bytes = 0 for pool demo-test2
压缩
ceph osd pool set <pool-name> compression_algorithm snapply
压缩算法有none,zlib,lz4,zstd,snappy, 默认snappy
ceph osd pool set <pool-name> compression_mode aggressive
none 不压缩 默认
passive 提示COMPRESSIABLE则压缩
aggressive 除非提示INCOMPRESSIBLE 否则压缩
force 始终压缩
ceph osd erasure-code-profile set myprofile k=4 m=2 crush-failure-domain=osd
ceph osd pool create mypool 16 16 erasure myprofile
[root@ceph01 ceph]# ceph osd pool get sunday-test pg_num
pg_num: 16
[root@ceph01 ceph]# ceph osd pool get sunday-test pgp_num
pgp_num: 16
动态调整
pgp_num >= pg_num
[root@ceph01 ceph]# ceph tell mon.* injectargs --mon-max-pg-per-osd=1000
mon.ceph01: injectargs:mon_max_pg_per_osd = '1000' (not observed, change may require restart)
Error ENXIO: problem getting command descriptions from mon.ceph02
mon.ceph03: injectargs:mon_max_pg_per_osd = '1000' (not observed, change may require restart)
[root@ceph01 ceph]# ceph osd pool set sunday-pool pg_num 256
set pool 13 pg_num to 256
[root@ceph01 ceph]# ceph osd pool ls detail | grep sunday-pool
pool 13 'sunday-pool' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 256 pgp_num 256 autoscale_mode warn last_change 74 lfor 0/0/72 flags hashpspool stripe_width 0
ceph pg stat
ceph pg dump
常见数量统计
OSD < 5个, pg_num: 256
5个<OSD<10个, pg_num: 512
10个<OSD<50个, pg_num: 2048
50个<OSD, pg_num: 4096
或计算 (OSD数量*100)/pool 副本数量 = 结果 为2的幂次 (2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 ...)
如200个OSD且副本数为3的集群:
(200x100)/3 = 666.7 =8192
pg状态 重点的clean 和active ,其他状态属于特殊状态,有些是临时的
异常处理步骤
- 1.重启osd相关服务
- 2.调整属性参数
- 3.重置pg
# 获取特殊状态的PG
[root@ceph01 ceph]# ceph pg dump_stuck stale
ok
# 注:异常状态:inactive|unclean|state|undersized|degraded
# 列出不一致的PG
[root@ceph01 ceph]# rados list-inconsistent-pg sunday-pool
[]
# 列出不一致的rados对象
[root@ceph01 ceph]# rados list-inconsistent-obj 7.c4
{"epoch":156, "inconsistents":[]}
# 列出定置放群组中不一致的快照集:
[root@ceph01 ceph]# rados list-inconsistent-snapset 7.c4
{"epoch":156, "inconsistents":[]}
# 修复损坏的pg信息
[root@ceph01 ceph]# ceph pg repair 7.c4
instructing pg 7.c4 on osd.4 to repair
[root@ceph01 ceph]# ceph pg dump
...
8.37 0 0 0 0 0 0 0 0 0 0 active+clean 2024-04-22 07:33:41.364000 0'0 75:57 [2,0,1] 2 [2,0,1] 2 0'0 2024-04-22 07:33:41.363932 0'0 2024-04-18 05:55:37.319596 0
...
[root@ceph01 ceph]# ceph pg map 8.37
osdmap e75 pg 8.37 (8.37) -> up [2,0,1] acting [2,0,1]
查看pg map
[root@ceph01 ceph]# rados put /etc/hosts /etc/hosts --pool sunday-pool
[root@ceph01 ceph]# ceph osd map sunday-pool /etc/hosts
osdmap e75 pool 'sunday-pool' (13) object '/etc/hosts' -> pg 13.ec6af904 (13.4) -> up ([1,0,2], p1) acting ([1,0,2], p1)
[root@ceph01 ceph]#
kvm rbd
[root@ceph01 ceph]# ceph osd pool create sunday-rbd 16 16
pool 'sunday-rbd' created
[root@ceph01 ceph]# ceph osd pool application enable sunday-rbd rbd
enabled application 'rbd' on pool 'sunday-rbd'
[root@ceph01 ceph]# rbd pool init sunday-rbd
权限
[root@ceph01 ceph]# ceph auth get-or-create client.sunday mon 'allow r' osd 'allow class-read object_prefix rbd_children, allow rwx pool=sunday-rbd'
[client.sunday]
key = AQBodiZmpLvoBhAAM5qC1YCiLyhB9oDGwP5t3g==
[root@ceph01 ceph]# ceph auth ls
client.sunday
key: AQBodiZmpLvoBhAAM5qC1YCiLyhB9oDGwP5t3g==
caps: [mon] allow r
caps: [osd] allow class-read object_prefix rbd_children, allow rwx pool=sunday-rbd
[root@ceph01 ceph]# ceph auth get client.sunday -o ceph.client.sunday.keyring
exported keyring for client.sunday
[root@ceph01 ceph]# cat ceph.client.sunday.keyring
[client.sunday]
key = AQBodiZmpLvoBhAAM5qC1YCiLyhB9oDGwP5t3g==
caps mon = "allow r"
caps osd = "allow class-read object_prefix rbd_children, allow rwx pool=sunday-rbd"
[root@ceph01 ceph]# scp ceph.client.sunday.keyring root@192.168.77.42:/etc/ceph/ceph.client.sunday.keyring
[root@ceph01 ceph]# scp ceph.client.sunday.keyring root@192.168.77.43:/etc/ceph/ceph.client.sunday.keyring
[root@ceph01 ceph]# ceph --user sunday -s
kvm集成ceph
[root@ceph01 ceph]# vim ceph-client-sunday-secret.xml
<secret ephemeral='no' private='no'>
<usage type='ceph'>
<name>client.sunday secret</name>
</usage>
</secret>
[root@ceph01 ceph]# virsh secret-define --file ceph-client-sunday-secret.xml
生成secret caf4336b-d2de-4699-8aee-94569531559b
[root@ceph01 ceph]# virsh secret-set-value --secret caf4336b-d2de-4699-8aee-94569531559b --base64 $(ceph auth get-key client.sunday)
[root@ceph01 ceph]# virsh secret-list
两个方式
# 镜像创建
qume-img create -f rbd rbd:sunday/sunday-image 1G
# 导入镜像文件到rbd
qemg-img info /data/images/cirros-0.5.2.x86_64-disk.img # 确认下载的镜像格式须为qcow2
qemu-img convert -f qcow2 -O raw /data/images/cirros-0.5.2.x86_64-disk.img rbd:<pool-name>/<image-name>
rbd --user sunday --pool sunday-pool ls -l