分布式文件系统(Distributed File System)
- 分布式文件系统是指文件系统管理的物理存储资源不一定直接连接在本地节点上,而是通过计算机网络与节点相连
- 分布式文件系统的设计基于C/S模式
常用分布式文件系统
Lustre、Hadoop、FastDFS、Ceph、GlusterFS
什么是Ceph
- Ceph是一个分布式文件系统
- 具有高扩展、高可用、高性能的特点
- Ceph可以提供对象存储、块存储、文件系统存储
- Ceph可以提供PB级别的存储空间
- 软件定义存储作为存储行业的一大发展趋势,已经越来越受到市场认可
Ceph组件
- OSDs:存储设备
- Monitors:集群监控组件
- RBD:操纵块设备镜像的工具
- MDSs:存放文件系统的元数据(对象存储和块存储不需要该组件)
- Client:Ceph客户端
安装ceph
一、安装前准备
1、物理机作为所有节点yum源
[root@room9pc01 soft]# mkdir /var/ftp/ceph
[root@room9pc01 soft]# ls
rhcs2.0-rhosp9-20161113-x86_64.iso
[root@room9pc01 soft]# mount rhcs2.0-rhosp9-20161113-x86_64.iso /var/ftp/ceph/
[root@room9pc01 soft]# ls /var/ftp/ceph/
rhceph-2.0-rhel-7-x86_64 rhel-7-server-openstack-9-rpms rhscon-2.0-rhel-7-x86_64
[root@room9pc01 soft]# cd /var/ftp/ceph/rhceph-2.0-rhel-7-x86_64/
[root@room9pc01 rhceph-2.0-rhel-7-x86_64]# ls
EULA GPL MON OSD README RPM-GPG-KEY-redhat-release Tools TRANS.TBL
############################################################################
[root@client10 ~]# vim /etc/yum.repos.d/dvd.repo //客户端配置yum源
[Centos]
name=Centos7.4
baseurl=ftp://192.168.4.254/rhel7
enabled=1
gpgcheck=0
[MON]
name=MON
baseurl=ftp://192.168.4.254/ceph/rhceph-2.0-rhel-7-x86_64/MON
enabled=1
gpgcheck=0
[OSD]
name=OSD
baseurl=ftp://192.168.4.254/ceph/rhceph-2.0-rhel-7-x86_64/OSD
enabled=1
gpgcheck=0
[Tools]
name=Tools
baseurl=ftp://192.168.4.254/ceph/rhceph-2.0-rhel-7-x86_64/Tools
enabled=1
gpgcheck=0
[root@client10 ~]# yum repolist
已加载插件:fastestmirror
Centos | 4.1 kB 00:00:00
MON | 4.1 kB 00:00:00
OSD | 4.1 kB 00:00:00
Tools | 3.8 kB 00:00:00
(1/8): MON/primary_db | 40 kB 00:00:00
(2/8): OSD/group_gz | 447 B 00:00:00
(3/8): Centos/primary_db | 4.0 MB 00:00:00
(4/8): OSD/primary_db | 31 kB 00:00:00
(5/8): MON/group_gz | 489 B 00:00:00
(6/8): Tools/group_gz | 459 B 00:00:00
(7/8): Centos/group_gz | 137 kB 00:00:00
(8/8): Tools/primary_db | 31 kB 00:00:00
Determining fastest mirrors
源标识 源名称 状态
Centos Centos7.4 4,986
MON MON 41
OSD OSD 28
Tools Tools 33
repolist: 5,088
[root@client10 ~]# for i in 11 12 13
> do
> scp /etc/yum.repos.d/dvd.repo 192.168.4.$i:/etc/yum.repos.d/
> done //拷贝yum文件给其余node主机
2、修改/etc/hosts并同步到其他主机
[root@client10 ~]# vim /etc/hosts
192.168.4.10 client10
192.168.4.11 node1
192.168.4.12 node2
192.168.4.13 node3
[root@client10 ~]# for i in 11 12 13; do scp /etc/hosts 192.168.4.$i:/etc/; done
3、配置ssh无密码连接
[root@node1 ~]# ssh-keygen -N '' -f /root/.ssh/id_rsa //在node1上创建密钥
[root@node1 ~]# for i in 10 11 12 13
> do
> ssh-copy-id 192.168.4.$i
> done //传密钥给其他主机(包括自己)
4、配置NTP时间同步(物理机作为NTP服务器)
[root@room9pc01 ~]# vim /etc/chrony.conf //修改物理机时间同步配置文件
server 0.centos.pool.ntp.org iburst
allow 192.168.4.0/24
local stratum 10
[root@room9pc01 ~]# systemctl restart chronyd
#################################################################
[root@node1 ~]# vim /etc/chrony.conf //修改node节点时间同步配置文件
#server 0.centos.pool.ntp.org iburst
#server 1.centos.pool.ntp.org iburst
#server 2.centos.pool.ntp.org iburst
#server 3.centos.pool.ntp.org iburst
server 192.168.4.254 iburst //以物理机作为时间同步服务器
[root@node1 ~]# systemctl restart chronyd
[root@node1 ~]# for i in 10 12 13
> do
> scp /etc/chrony.conf 192.168.4.$i:/etc/
> done
[root@node1 ~]# for i in client10 node2 node3
> do
> ssh $i "systemctl restart chronyd"
> done
[root@node1 ~]# chronyc sources -v //查看时间同步情况
5、为每台node节点主机添加3块20G虚拟磁盘
二、部署ceph集群
1、安装部署软件
[root@node1 ~]# yum -y install ceph-deploy //以node1作为部署主机,安装ceph部署工具
[root@node1 ~]# ceph-deploy --help
[root@node1 ~]# mkdir ceph-cluster //创建ceph-deploy命令工作目录
[root@node1 ~]# cd ceph-cluster/
2、部署集群配置
[root@node1 ceph-cluster]# ls
[root@node1 ceph-cluster]# ceph-deploy new node1 node2 node3 //这里将3台monitor装在node节点上,实际生产环境应独立出来
[root@node1 ceph-cluster]# ls
ceph.conf ceph-deploy-ceph.log ceph.mon.keyring
[root@node1 ceph-cluster]# cat ceph.conf
[global]
fsid = 8b8cc6ac-7792-4541-82d1-e0e6bcfb640a
mon_initial_members = node1, node2, node3
mon_host = 192.168.4.11,192.168.4.12,192.168.4.13
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
3、给所有节点安装软件包
[root@node1 ceph-cluster]# ceph-deploy install node1 node2 node3
[root@node1 ceph-cluster]# rpm -qa |grep ceph
libcephfs1-10.2.2-38.el7cp.x86_64
ceph-osd-10.2.2-38.el7cp.x86_64
ceph-deploy-1.5.33-1.el7cp.noarch
ceph-base-10.2.2-38.el7cp.x86_64
ceph-mds-10.2.2-38.el7cp.x86_64
ceph-common-10.2.2-38.el7cp.x86_64
ceph-mon-10.2.2-38.el7cp.x86_64
ceph-selinux-10.2.2-38.el7cp.x86_64
python-cephfs-10.2.2-38.el7cp.x86_64
ceph-radosgw-10.2.2-38.el7cp.x86_64
4、初始化所有节点的mon服务(主机名解析必须对)
[root@node1 ceph-cluster]# ceph-deploy mon create-initial
[root@node2 ~]# ceph -s //任意主机查看集群状态
cluster e865557c-60fd-4f40-af88-7cfa7457e3e2
health HEALTH_ERR
no osds
monmap e1: 3 mons at {node1=192.168.4.11:6789/0,node2=192.168.4.12:6789/0,node3=192.168.4.13:6789/0}
election epoch 4, quorum 0,1,2 node1,node2,node3
osdmap e1: 0 osds: 0 up, 0 in
flags sortbitwise
pgmap v2: 64 pgs, 1 pools, 0 bytes data, 0 objects
0 kB used, 0 kB / 0 kB avail
64 creating
5、磁盘分区(这里以node1操作为例)
[root@node1 ceph-cluster]# lsblk //分区之前查看磁盘分区大小
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sr0 11:0 1 1024M 0 rom
vda 252:0 0 20G 0 disk
├─vda1 252:1 0 1G 0 part /boot
└─vda2 252:2 0 19G 0 part
├─rhel-root 253:0 0 17G 0 lvm /
└─rhel-swap 253:1 0 2G 0 lvm [SWAP]
vdb 252:16 0 20G 0 disk
vdc 252:32 0 20G 0 disk
vdd 252:48 0 20G 0 disk
[root@node1 ceph-cluster]# parted /dev/vdb mklabel gpt //定义分区表格式
[root@node1 ceph-cluster]# parted /dev/vdb mkpart primary 1 50% //vdb分区,将前50%作为一个分区
[root@node1 ceph-cluster]# parted /dev/vdb mkpart primary 50% 100% //vdb分区,将后50%作为一个分区
[root@node1 ceph-cluster]# lsblk //查看分区后的磁盘分区大小
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sr0 11:0 1 1024M 0 rom
vda 252:0 0 20G 0 disk
├─vda1 252:1 0 1G 0 part /boot
└─vda2 252:2 0 19G 0 part
├─rhel-root 253:0 0 17G 0 lvm /
└─rhel-swap 253:1 0 2G 0 lvm [SWAP]
vdb 252:16 0 20G 0 disk
├─vdb1 252:17 0 10G 0 part //vdb1和vdb2分别作为vdc和vdd的缓存盘
└─vdb2 252:18 0 10G 0 part
vdc 252:32 0 20G 0 disk
vdd 252:48 0 20G 0 disk
[root@node1 ceph-cluster]# chown ceph.ceph /dev/vdb1 //修改分区所有者,临时生效
[root@node1 ceph-cluster]# chown ceph.ceph /dev/vdb2
[root@node1 ceph-cluster]# vim /etc/udev/rules.d/70-vdb.rules //修改分区所有者,永久生效
{DEVNAME}=="/dev/vdb1",OWNER="ceph",GROUP="ceph"
{DEVNAME}=="/dev/vdb2",OWNER="ceph",GROUP="ceph"
6、初始化清空磁盘数据(在node1上操作)
[root@node1 ceph-cluster]# ceph-deploy disk zap node1:vdc node1:vdd
[root@node1 ceph-cluster]# ceph-deploy disk zap node2:vdc node2:vdd
[root@node1 ceph-cluster]# ceph-deploy disk zap node3:vdc node3:vdd
7、创建osd存储空间(在node1上操作)
[root@node1 ceph-cluster]# ceph-deploy osd create node1:vdc:/dev/vdb1 node1:vdd:/dev/vdb2
[root@node1 ceph-cluster]# ceph-deploy osd create node2:vdc:/dev/vdb1 node2:vdd:/dev/vdb2
[root@node1 ceph-cluster]# ceph-deploy osd create node3:vdc:/dev/vdb1 node3:vdd:/dev/vdb2
[root@node1 ceph-cluster]# ceph -s //查看集群状态
cluster e865557c-60fd-4f40-af88-7cfa7457e3e2
health HEALTH_OK //健康状态为OK,集群创建成功
monmap e1: 3 mons at {node1=192.168.4.11:6789/0,node2=192.168.4.12:6789/0,node3=192.168.4.13:6789/0}
election epoch 4, quorum 0,1,2 node1,node2,node3
osdmap e35: 6 osds: 6 up, 6 in //6个osd存储设备
flags sortbitwise
pgmap v83: 64 pgs, 1 pools, 0 bytes data, 0 objects
202 MB used, 119 GB / 119 GB avail //集群提供120G存储空间
64 active+clean
三、创建Ceph块存储
1、查看共享池
[root@node1 ceph-cluster]# ceph osd lspools
0 rbd, //rbd共享池空间为120G
2、创建镜像,查看镜像(在任意node节点操作,均可识别)
[root@node2 ~]# rbd create demo-image --image-feature layering --size 10G //默认从rbd中取出10G空间创建镜像demo-image
[root@node2 ~]# rbd create rbd/image --image-feature layering --size 10G //指定从rbd中取出10G空间创建镜像image
[root@node2 ~]# rbd list //列出镜像
demo-image
image
[root@node2 ~]# rbd info image //查看镜像信息
rbd image 'image':
size 10240 MB in 2560 objects
order 22 (4096 kB objects)
block_name_prefix: rbd_data.1037238e1f29
format: 2
features: layering
flags:
3、动态调整容量
[root@node2 ~]# rbd resize --size 5G image --allow-shrink //缩小容量为5G
Resizing image: 100% complete...done.
[root@node2 ~]# rbd info image
rbd image 'image':
size 5120 MB in 1280 objects
order 22 (4096 kB objects)
block_name_prefix: rbd_data.1037238e1f29
format: 2
features: layering
flags:
[root@node2 ~]# rbd resize --size 15G image //扩大容量至15G
Resizing image: 100% complete...done.
[root@node2 ~]# rbd info image
rbd image 'image':
size 15360 MB in 3840 objects
order 22 (4096 kB objects)
block_name_prefix: rbd_data.1037238e1f29
format: 2
features: layering
flags:
4、客户端通过KRBD访问
[root@client10 ~]# yum -y install ceph-common
[root@node1 ceph-cluster]# cd /etc/ceph/
[root@node1 ceph]# ls
ceph.client.admin.keyring ceph.conf rbdmap tmpgIWUMA
[root@node1 ceph]# scp ceph.conf 192.168.4.10:/etc/ceph/ //拷贝ceph集群配置文件(包含集群信息)至客户端
[root@node1 ceph]# scp ceph.client.admin.keyring client10:/etc/ceph/ //拷贝连接密钥至客户端,否则客户端无权限访问ceph集群
[root@client10 ~]# ls /etc/ceph/
ceph.client.admin.keyring ceph.conf rbdmap
[root@client10 ~]# rbd map image //将集群内镜像image映射为客户端本地磁盘
/dev/rbd0
[root@client10 ~]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sr0 11:0 1 1024M 0 rom
vda 252:0 0 20G 0 disk
├─vda1 252:1 0 1G 0 part /boot
└─vda2 252:2 0 19G 0 part
├─rhel-root 253:0 0 17G 0 lvm /
└─rhel-swap 253:1 0 2G 0 lvm [SWAP]
rbd0 251:0 0 15G 0 disk //客户端增加一个15G分区
[root@client10 ~]# rbd showmapped //查看镜像挂载情况
id pool image snap device
0 rbd image - /dev/rbd0
[root@client10 ~]# rbd unmap /dev/rbd0 //卸载分区
[root@client10 ~]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sr0 11:0 1 1024M 0 rom
vda 252:0 0 20G 0 disk
├─vda1 252:1 0 1G 0 part /boot
└─vda2 252:2 0 19G 0 part
├─rhel-root 253:0 0 17G 0 lvm /
└─rhel-swap 253:1 0 2G 0 lvm [SWAP]
[root@client10 ~]# rbd map image //重新映射镜像image为客户端本地磁盘
5、客户端格式化、挂载分区
[root@client10 ~]# mkfs.xfs /dev/rbd0 //格式化分区
[root@client10 ~]# mount /dev/rbd0 /mnt/
[root@client10 ~]# echo "test" > /mnt/test.txt //写入测试文件
6、创建镜像快照
[root@node1 ceph]# rbd snap ls image //查看镜像快照
[root@node1 ceph]# rbd snap create image --snap image-snap1 //给image镜像创建名为image-snap1的快照
[root@node1 ceph]# rbd snap ls image
SNAPID NAME SIZE
4 image-snap1 15360 MB
7、还原快照
[root@client10 ~]# rm -rf /mnt/test.txt
[root@client10 ~]# ls /mnt/test.txt
ls: 无法访问/mnt/test.txt: 没有那个文件或目录
[root@client10 ~]# umount /mnt //卸载/mnt
[root@node1 ceph]# rbd snap rollback image --snap image-snap1 //还原快照
Rolling back to snapshot: 100% complete...done.
[root@client10 ~]# mount /dev/rbd0 /mnt //重新挂载/mnt
[root@client10 ~]# ls /mnt //查看还原情况
test.txt
8、创建、删除克隆快照镜像
[root@node1 ceph]# rbd snap protect image --snap image-snap1 //克隆快照之前,需要先保护快照,否则勿删后,克隆的快照镜像无法使用
[root@node1 ceph]# rbd snap rm image --snap image-snap1 //删除快照失败
rbd: snapshot 'image-snap1' is protected from removal.
2019-06-03 11:45:06.908919 7fabef8bcd80 -1 librbd::Operations: snapshot is protected
[root@node1 ceph]# rbd clone image --snap image-snap1 image-clone --image-feature layering //使用image的快照image-snap1克隆一个新的image-clone镜像
[root@node1 ceph]# rbd list //查看镜像
demo-image
image
image-clone
[root@node1 ceph]# rbd info image-clone
rbd image 'image-clone':
size 15360 MB in 3840 objects
order 22 (4096 kB objects)
block_name_prefix: rbd_data.104b238e1f29
format: 2
features: layering
flags:
parent: rbd/image@image-snap1 //克隆镜像数据来源于快照image-snap1
overlap: 15360 MB
[root@node1 ceph]# rbd flatten image-clone //如果希望克隆镜像可以独立工作,就需要将父快照中的数据,全部拷贝一份,但比较耗时
[root@node1 ceph]# rbd info image-clone
rbd image 'image-clone':
size 15360 MB in 3840 objects
order 22 (4096 kB objects)
block_name_prefix: rbd_data.104b238e1f29
format: 2
features: layering
flags:
[root@node1 ceph]# rbd snap unprotect image --snap image-snap1 //取消快照保护
[root@node1 ceph]# rbd snap rm image --snap image-snap1 //删除快照
9、块存储应用案例:物理机作为客户端部署虚拟机,虚拟机调用ceph块存储
(1)ceph认证账户
[root@room9pc01 ~]# yum -y install ceph-common
[root@room9pc01 ~]# scp 192.168.4.11:/etc/ceph/ceph.conf /etc/ceph/
[root@room9pc01 ~]# scp 192.168.4.11:/etc/ceph/ceph.client.admin.keyring /etc/ceph/
[root@room9pc01 ~]# ls /etc/ceph/
ceph.client.admin.keyring ceph.conf rbdmap
[root@room9pc01 ~]# vim secret.xml //创建secret文件,使kvm虚拟机也能访问ceph块存储
<secret ephemeral='no' private='no'>
<usage type='ceph'>
<name>client.admin secret</name>
</usage>
</secret>
[root@room9pc01 ~]# virsh secret-define --file secret.xml //生成UUID账户信息
生成 secret 4a126092-b631-43d0-b70f-2a5eb628a4ae
[root@room9pc01 ~]# virsh secret-list
UUID 用量
--------------------------------------------------------------------------------
4a126092-b631-43d0-b70f-2a5eb628a4ae ceph client.admin secret
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
[root@room9pc01 ~]# virsh secret-undefine 4a126092-b631-43d0-b70f-2a5eb628a4ae //删除UUID
已删除 secret 4a126092-b631-43d0-b70f-2a5eb628a4ae
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
[root@room9pc01 ~]# virsh secret-define --file secret.xml //重新生成UUID
生成 secret 328305b0-a53c-4134-b3b4-490fe7af4657
[root@room9pc01 ~]# cat /etc/ceph/ceph.client.admin.keyring
[client.admin]
key = AQAV3vNczPvQCBAA9UHZSC3jiL3NblRULDEcVw==
[root@room9pc01 ~]# virsh secret-set-value \
> --secret 328305b0-a53c-4134-b3b4-490fe7af4657 \
> --base64 AQAV3vNczPvQCBAA9UHZSC3jiL3NblRULDEcVw== //设置secret,添加账户密钥
secret 值设定
(2)新建虚拟机(new),编辑虚拟机配置文件new.xml
[root@node1 ceph]# rbd create vm1-image --image-feature layering --size 20G //创建镜像vm1-imgae
[root@node1 ceph]# rbd list
demo-image
image
image-clone
vm1-image
[root@room9pc01 ~]# virsh edit new
<disk type='network' device='disk'> //修改disk区域,type改为network
<driver name='qemu' type='raw'/>
<auth username='admin'>
<secret type='ceph' uuid='328305b0-a53c-4134-b3b4-490fe7af4657'/> //修改UUID
</auth>
<source protocol='rbd' name='rbd/vm1-image'> //源文件为ceph镜像
<host name='192.168.4.11' port='6789'/>
</source>
<target dev='vda' bus='virtio'/> //修改bus接口类型
<address type='pci' domain='0x0000' bus='0x22' slot='0x07' function='0x0'/>
</disk>
(3)kvm查看虚拟机new,即为ceph集群上的一块磁盘
四、创建ceph文件系统
(1)部署node3主机作为mds节点(注:一般mds节点应为独立主机,且需要配置NTP、相同yun源以及无密码远程,并安装ceph-mds)
[root@node3 ~]# rpm -qa |grep ceph-mds
ceph-mds-10.2.2-38.el7cp.x86_64
[root@node1 ~]# cd ceph-cluster/
[root@node1 ceph-cluster]# ceph-deploy mds create node3 //启动node3主机的mds服务
[root@node3 ~]# systemctl status ceph-mds@node3.service
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
[root@node1 ceph-cluster]# ceph-deploy admin mds_node //同步配置文件和密钥至mds节点,由于node3之前已做相关配置,故省略此步操作
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
(2)创建存储池
[root@node3 ~]# ceph osd lspools //查看共享池
0 rbd,
[root@node3 ~]# ceph osd pool create cephfs_data 128 //创建存储池的block区域
[root@node3 ~]# ceph osd pool create cephfs_metadata 128 //创建存储池的inode区域
[root@node3 ~]# ceph osd lspools
0 rbd,1 cephfs_data,2 cephfs_metadata,
[root@node3 ~]# ceph mds stat //参看mds状态
e2:, 1 up:standby
(3)创建ceph文件系统
[root@node3 ~]# ceph fs new myfs1 cephfs_metadata cephfs_data //创建文件系统,注意先写metadata池,再写data池
[root@node3 ~]# ceph mds stat
e5: 1/1/1 up {0=node3=up:active}
[root@node3 ~]# ceph fs ls //查看文件系统
name: myfs1, metadata pool: cephfs_metadata, data pools: [cephfs_data ]
(4)客户端挂载
[root@client10 ~]# mount -t ceph 192.168.4.11:6789:/ /mnt/cephfs/ \
> -o name=admin,secret=AQAV3vNczPvQCBAA9UHZSC3jiL3NblRULDEcVw== //挂载ceph文件系统的根目录至客户端本地目录/mnt/media,secret为ceph密钥文件中的key值
[root@client10 ~]# df -h /mnt/cephfs/ //查看挂载的文件系统
文件系统 容量 已用 可用 已用% 挂载点
192.168.4.11:6789:/ 120G 500M 120G 1% /mnt/media
五、创建对象存储
部署node3主机作为rgw节点(注:一般rgw节点应为独立主机,且需要配置NTP、相同yun源以及无密码远程,并安装ceph-radosgw)
[root@node3 ~]# rpm -qa |grep ceph-radosgw
ceph-radosgw-10.2.2-38.el7cp.x86_64
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
[root@node1 ceph-cluster]# ceph-deploy install --rgw rgw_node //独立rgw节点主机需要安装ceph-radosgw
[root@node1 ceph-cluster]# ceph-deploy admin rgw_node //同步配置文件和密钥至rgw节点,由于node3之前已做相关配置,故省略此步操作
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
[root@node1 ceph-cluster]# ceph-deploy rgw create node3 //启动node3的rgw服务
[root@node3 ~]# systemctl status ceph-radosgw@rgw.node3.service //查看node3节点rgw服务启动情况
[root@node3 ~]# vim /etc/ceph/ceph.conf
#添加以下配置,修改rgw服务端口
[client.rgw.node3]
host = node3
rgw_frontends = "civetweb port=8000"
[root@node3 ~]# systemctl restart ceph-radosgw@rgw.node3.service
[root@node3 ~]# netstat -ntulp |grep :8000
部署完以上对象存储服务器之后,需要开发相应的程序运用该对象存储服务