在上一篇文章中介绍了mfs的安装配置与基本维护,但mfs存在一个master单点的问题,这一篇文章就来介绍利用corosync+pacemaker+DRBD解决MFS master的单点问题,在上一篇文章中,192.168.5.72这台服务器原来作为metalogger server,现在把这台服务器作为元服务器的备机,把metalogger server迁移到192.168.5.73上面,5.73既当metalogger server又当Chunkservers。
配置环境:
CentOS 7.5 x 64 metaserver Master: 192.168.5.71 metaserver Slave:192.168.5.72 VIP:192.168.5.77 metalogger server:192.168.5.73 Chunkservers: 192.168.5.73 192.168.5.74 192.168.5.75
#hosts文件配置
cat >> /etc/hosts << EOF 192.168.5.77 mfsmaster 192.168.5.71 mfs71 192.168.5.72 mfs72 192.168.5.73 mfs73 192.168.5.74 mfs74 192.168.5.75 mfs75 EOF
#这里mfsmaster、mfschunkserver、metalogger、mfs client的安装配置就不做介绍了
一、安装DRBD(主备节点安装)
rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org rpm -Uvh https://www.elrepo.org/elrepo-release-7.0-3.el7.elrepo.noarch.rpm yum install drbd84 kmod-drbd84 -y
#格式化磁盘,两台服务器上的分区/dev/sdb1作为drbd的网络mirror分区(用独立分区做DRBD)
fdisk /dev/sdb mkfs.ext4 /dev/sdb1
#开始配置DRBD
modprobe drbd lsmod | grep drbd
vi /etc/drbd.d/global_common.conf
global { usage-count no; } common { protocol C; disk { on-io-error detach; } syncer { rate 100M; } } resource mfs { on mfs71 { device /dev/drbd1; disk /dev/sdb1; address 192.168.5.71:7899; meta-disk internal; } on mfs72 { device /dev/drbd1; disk /dev/sdb1; address 192.168.5.72:7899; meta-disk internal; } }
#启动DRBD
dd if=/dev/zero bs=1M count=128 of=/dev/sdb1 sync drbdadm create-md mfs service drbd start chkconfig drbd on
[root@mfs71 mfs]# cat /proc/drbd version: 8.4.11-1 (api:1/proto:86-101) GIT-hash: 66145a308421e9c124ec391a7848ac20203bb03c build by mockbuild@, 2018-04-26 12:10:42 1: cs:Connected ro:Secondary/Secondary ds:Inconsistent/Inconsistent C r----- ns:0 nr:0 dw:0 dr:0 al:8 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:104853340
#初始化一个主机(这几步只在主节点上操作)
[root@nfs71 ~]# drbdsetup /dev/drbd1 primary [root@nfs71 ~]# drbdadm primary --force mfs [root@nfs71 ~]# drbdadm -- --overwrite-data-of-peer primary mfs
#主节点查看同步状态
[root@mfs71 mfs]# cat /proc/drbd version: 8.4.11-1 (api:1/proto:86-101) GIT-hash: 66145a308421e9c124ec391a7848ac20203bb03c build by mockbuild@, 2018-04-26 12:10:42 1: cs:SyncSource ro:Primary/Secondary ds:UpToDate/Inconsistent C r----- ns:129444 nr:0 dw:0 dr:131548 al:8 bm:0 lo:0 pe:2 ua:0 ap:0 ep:1 wo:f oos:104724828 [>....................] sync'ed: 0.2% (102268/102392)M finish: 2:42:36 speed: 10,708 (10,708) K/sec
##查看格式化进度
[root@nfs71 ~]# watch -n1 'cat /proc/drbd'
说明:
cs:两台数据连接状态
ro:两台主机的状态
ds:磁盘状态是“UpToDate/UpToDate”,同步状态。
##文件系统的挂载只能在master节点进行,备机的DRBD设备无法被挂载,因为它是用来接收主机数据的,由DRBD负责操作
mkdir -p /data/mfs chown -R mfs:mfs /data/mfs mkfs.ext4 /dev/drbd1 mount /dev/drbd1 /data/mfs
#查看挂载情况
[root@mfs71 mfs]# df -h 文件系统 容量 已用 可用 已用% 挂载点 /dev/drbd1 99G 61M 94G 1% /data/mfs
二、Metaserver配置
#将metadata.mfs的存储目录修改到drbd下面
vi mfsmaster.cfg DATA_PATH = /data/mfs
#试着启动mfsmaster(只在主节点上启动)
cp /usr/local/mfs/var/mfs/* /data/mfs/ chown -R mfs:mfs /data/mfs /usr/local/mfs/sbin/mfsmaster start
[root@mfs71 mfs]# ps -ef|grep mfs root 25966 2 0 11月02 ? 00:00:00 [drbd_w_mfs] root 25969 2 0 11月02 ? 00:00:00 [drbd_r_mfs] root 25975 2 0 11月02 ? 00:00:03 [drbd_a_mfs] root 25976 2 0 11月02 ? 00:00:00 [drbd_as_mfs] mfs 31508 1 5 15:31 ? 00:00:00 /usr/local/mfs/sbin/mfsmaster start root 31510 31452 0 15:31 pts/0 00:00:00 grep --color=auto mfs
[root@mfs71 mfs]# lsof -i:9420 COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME mfsmaster 31508 mfs 9u IPv4 160777 0t0 TCP *:9420 (LISTEN)
#关闭mfsmaster开机启动(主从节点都要执行,后面mfsmaster的启动由pacemaker来完成)
systemctl disable moosefs-master.service systemctl stop moosefs-master.service
#关闭DRBD服务并卸载设备
umount /data/mfs #这一步只在主节点上执行
三、pacemaker+corosync安装配置
#安装pacemaker corosync
yum install pcs pacemaker corosync fence-agents-all -y
#启动pcsd服务(开机自启动)
systemctl start pcsd.service systemctl enable pcsd.service#为hacluster设置密码,安装组件生成的hacluster用户,用来本地启动pcs进程,因此我们需要设定密码,每个节点的密码相同
passwd hacluster balala369
#集群各节点之间认证(主节点mfs71上操作)
[root@mfs71 ~]# pcs cluster auth 192.168.5.71 192.168.5.72 Username: hacluster Password: 192.168.5.71: Authorized 192.168.5.72: Authorized
##创建msfcluster 集群资源(主节点mfs71上操作)
[root@mfs71 mfs]# pcs cluster setup --name mfscluster 192.168.5.71 192.168.5.72 Destroying cluster on nodes: 192.168.5.71, 192.168.5.72... 192.168.5.72: Stopping Cluster (pacemaker)... 192.168.5.71: Stopping Cluster (pacemaker)... 192.168.5.72: Successfully destroyed cluster 192.168.5.71: Successfully destroyed cluster Sending 'pacemaker_remote authkey' to '192.168.5.71', '192.168.5.72' 192.168.5.71: successful distribution of the file 'pacemaker_remote authkey' 192.168.5.72: successful distribution of the file 'pacemaker_remote authkey' Sending cluster config files to the nodes... 192.168.5.71: Succeeded 192.168.5.72: Succeeded Synchronizing pcsd certificates on nodes 192.168.5.71, 192.168.5.72... 192.168.5.71: Success 192.168.5.72: Success Restarting pcsd on the nodes in order to reload the certificates... 192.168.5.71: Success 192.168.5.72: Success
##查看corosync配置文件
[root@mfs71 mfs]# cat /etc/corosync/corosync.conf totem { version: 2 cluster_name: mfscluster secauth: off transport: udpu } nodelist { node { ring0_addr: 192.168.5.71 nodeid: 1 } node { ring0_addr: 192.168.5.72 nodeid: 2 } } quorum { provider: corosync_votequorum two_node: 1 } logging { to_logfile: yes logfile: /var/log/cluster/corosync.log to_syslog: yes }
#设置集群自启动
[root@mfs71 mfs]# pcs cluster enable --all 192.168.5.71: Cluster Enabled 192.168.5.72: Cluster Enabled
[root@mfs71 ~]# systemctl start corosync.service [root@mfs71 ~]# systemctl start pacemaker.service [root@mfs71 ~]# systemctl enable corosync [root@mfs71 ~]# systemctl enable pacemaker [root@mfs72 ~]# systemctl start corosync.service [root@mfs72 ~]# systemctl start pacemaker.service [root@mfs72 ~]# systemctl enable corosync [root@mfs72 ~]# systemctl enable pacemaker
#查看集群状态
[root@mfs71 mfs]# pcs cluster status Cluster Status: Stack: corosync Current DC: mfs71 (version 1.1.18-11.el7_5.3-2b07d5c5a9) - partition with quorum Last updated: Mon Nov 5 16:09:30 2018 Last change: Mon Nov 5 16:09:09 2018 by hacluster via crmd on mfs71 2 nodes configured 0 resources configured PCSD Status: mfs72 (192.168.5.72): Online mfs71 (192.168.5.71): Online
[root@mfs72 ~]# pcs cluster status Cluster Status: Stack: corosync Current DC: mfs71 (version 1.1.18-11.el7_5.3-2b07d5c5a9) - partition with quorum Last updated: Mon Nov 5 16:09:53 2018 Last change: Mon Nov 5 16:09:15 2018 by hacluster via crmd on mfs71 2 nodes configured 0 resources configured PCSD Status: mfs72 (192.168.5.72): Online mfs71 (192.168.5.71): Online
##查看启动节点状态
[root@mfs71 mfs]# corosync-cfgtool -s Printing ring status. Local node ID 1 RING ID 0 id = 192.168.5.71 status = ring 0 active with no faults
[root@mfs72 ~]# corosync-cfgtool -s Printing ring status. Local node ID 2 RING ID 0 id = 192.168.5.72 status = ring 0 active with no faults
#查看pacemaker进程
[root@mfs71 mfs]# ps axf |grep pacemaker 473 pts/0 S+ 0:00 | \_ grep --color=auto pacemaker 310 ? Ss 0:00 /usr/sbin/pacemakerd -f 311 ? Ss 0:00 \_ /usr/libexec/pacemaker/cib 312 ? Ss 0:00 \_ /usr/libexec/pacemaker/stonithd 313 ? Ss 0:00 \_ /usr/libexec/pacemaker/lrmd 314 ? Ss 0:00 \_ /usr/libexec/pacemaker/attrd 315 ? Ss 0:00 \_ /usr/libexec/pacemaker/pengine 316 ? Ss 0:00 \_ /usr/libexec/pacemaker/crmd
#查看集群信息
[root@mfs71 mfs]# corosync-cmapctl | grep members runtime.totem.pg.mrp.srp.members.1.config_version (u64) = 0 runtime.totem.pg.mrp.srp.members.1.ip (str) = r(0) ip(192.168.5.71) runtime.totem.pg.mrp.srp.members.1.join_count (u32) = 1 runtime.totem.pg.mrp.srp.members.1.status (str) = joined runtime.totem.pg.mrp.srp.members.2.config_version (u64) = 0 runtime.totem.pg.mrp.srp.members.2.ip (str) = r(0) ip(192.168.5.72) runtime.totem.pg.mrp.srp.members.2.join_count (u32) = 1 runtime.totem.pg.mrp.srp.members.2.status (str) = joined
#禁用STONITH
[root@mfs71 mfs]# pcs property set stonith-enabled=false
#无法仲裁时候,选择忽略
[root@mfs71 mfs]# pcs property set no-quorum-policy=ignore
#检查配置是否正确
[root@mfs71 mfs]# crm_verify -L -V
#从pacemaker 1.1.8开始,crm发展成了一个独立项目,叫crmsh。也就是说,我们安装了pacemaker后,并没有crm这个命令,我们要实现对集群资源管理,还需要独立安装crmsh,crmsh依赖于许多包如:pssh
[root@mfs71 mfs]# wget -O /etc/yum.repos.d/network:ha-clustering:Stable.repo http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-7/network:ha-clustering:Stable.repo [root@mfs71 mfs]# yum -y install crmsh
[root@mfs72 mfs]# wget -O /etc/yum.repos.d/network:ha-clustering:Stable.repo http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-7/network:ha-clustering:Stable.repo [root@mfs72 mfs]# yum -y install crmsh
#如果yum安装报错,那就下载rpm包进行安装
cd /opt wget http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-7/noarch/crmsh-3.0.0-6.2.noarch.rpm wget http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-7/noarch/crmsh-scripts-3.0.0-6.2.noarch.rpm wget http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-7/noarch/pssh-2.3.1-7.3.noarch.rpm wget http://mirror.yandex.ru/opensuse/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-7/noarch/python-parallax-1.0.1-29.1.noarch.rpm wget http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-7/noarch/python-pssh-2.3.1-7.3.noarch.rpm yum -y install crmsh-3.0.0-6.2.noarch.rpm crmsh-scripts-3.0.0-6.2.noarch.rpm pssh-2.3.1-7.3.noarch.rpm python-parallax-1.0.1-29.1.noarch.rpm python-pssh-2.3.1-7.3.noarch.rpm
#crm(只在主节点mfs71上配置)
[root@mfs71 mfs]# crm #查看systemd类型可代理的服务,其中有moosefs-master drbd crm(live)# ra crm(live)ra# list systemd #添加DRBD资源 crm(live)# configure #注意:这里的drbd_resource=mfs要与/etc/drbd.d/global_common.conf里定义的resource mfs要一致 crm(live)configure# primitive mfsdrbd ocf:linbit:drbd params drbd_resource=mfs op start timeout=240s op stop timeout=100s op monitor role=Master interval=20s timeout=30s op monitor role=Slave interval=30s timeout=30s crm(live)configure# ms ms_mfsdrbd mfsdrbd meta master-max=1 master-node-max=1 clone-max=2 clone-node-max=1 notify=true target-role=Started
## 添加文件系统资源
crm(live)configure# primitive drbdfs ocf:heartbeat:Filesystem params device=/dev/drbd1 directory=/data/mfs fstype=ext4 op monitor interval=30s timeout=40s op start timeout=60 op stop timeout=60 on-fail=restart
## 添加VIP资源(以192.168.5.77作为浮动IP,名字为mfsvip并且告诉集群每30秒检查它一次)
crm(live)configure# primitive mfsvip ocf:heartbeat:IPaddr params ip=192.168.5.77 op monitor interval=20 timeout=30 on-fail=restart
#配置监控的服务
crm(live)configure# primitive mfsserver systemd:moosefs-master op monitor interval=20s timeout=15s on-fail=restart crm(live)configure# show
#语法检查
crm(live)configure# verify crm(live)configure# commit
## 定义约束(排列约束、顺序约束)
#挂载资源追随drbd主资源
crm(live)configure# colocation drbd_with_ms_mfsdrbd inf: drbdfs ms_mfsdrbd:Master
#节点上存在drbdMaster才能启动drbdfs服务
crm(live)configure# order drbd_after_ms_mfsdrbd mandatory: ms_mfsdrbd:promote drbdfs:start
#mfs服务追随挂载资源
crm(live)configure# colocation mfsserver_with_drbdfs inf: mfsserver drbdfs
#drbdfs服务启动才能启动mfs服务
crm(live)configure# order mfsserver_after_drbdfs mandatory: drbdfs:start mfsserver:start
#vip追随mfs服务
crm(live)configure# colocation mfsvip_with_mfsserver inf: mfsvip mfsserver
#vip启动才能启动mfs服务
crm(live)configure# order mfsvip_before_mfsserver mandatory: mfsvip mfsserver crm(live)configure# show
#语法检查
crm(live)configure# verify crm(live)configure# commit crm(live)configure# quit
#查看集群信息
crm status
#查看主节点服务是否运行正常?DRBD是否挂载?
#修改chunkserver的mfschunkserver.cfg,将MASTER_HOST修改成VIP
MASTER_HOST = 192.168.5.77
#修改metalogger的mfsmetalogger.cfg,将MASTER_HOST修改成VIP(在192.168.5.73上启用mfsmetalogger)
MASTER_HOST = 192.168.5.77
#客户端通过VIP挂载(也可以通过主机名挂载)
/usr/local/mfs/bin/mfsmount /mnt/mfs -H 192.168.5.77
/usr/local/mfs/bin/mfsmount -m /mnt/mfsmeta/ -H 192.168.5.77
四、用apache来替换mfscgiserv
#安装apache
yum -y install httpd
#创建apache认证用户
htpasswd -cm /etc/httpd/conf/htpasswd.users blufly balala369
#httpd.conf配置
vi /etc/httpd/conf/httpd.conf
DocumentRoot "/usr/local/mfs/share/mfscgi" <Directory "/usr/local/mfs/share/mfscgi"> #确保cgi模块的加载 LoadModule cgi_module modules/mod_cgi.so AddHandler cgi-script .cgi Alias /cgi-bin/ "/usr/local/mfs/share/mfscgi/" <Directory "/usr/local/mfs/share/mfscgi"> AllowOverride None Options ExecCGI Order allow,deny Allow from all AuthName "Mfs access" AuthType Basic AuthUserFile /etc/httpd/conf/htpasswd.users Require valid-user </Directory>
#启动apache
systemctl start httpd.service
#设置自启
systemctl enable httpd.service
#通过apache查看mfs运行情况,首先apache需要登录认证
#mfs集群资源信息
#chunkserver信息
#客户端挂载信息
五、MFS故障切换
#模拟主从节点自动切换
[root@mfs71 mfs]# crm crm(live)# node standby crm(live)# status #将原来的主节点上线 crm(live)# node online crm(live)# status
#master由mfs71切换到mfs72上
#模拟moosefs-master服务挂掉,现在的master是mfs72
[root@mfs72 ~]# systemctl stop moosefs-master.service
#在手动将moosefs-master关掉后,资源并没有转移,因为节点没有故障,所以资源不会转移,默认情况下,pacemaker不会对任何资源进行监控。所以,即便是资源关掉了,只要节点没有故障,资源依然不会转移。要想达到资源转移的目的,得定义监控(monitor)。
#虽然我们在MFS资源定义中加了“monitor”选项,但发现并没有起到作用,服务不会自动拉起,所以通过加监控脚本的方式暂时解决。
cat /root/monitor_mfs.sh
#!/bin/bash #监控mfs服务的运行情况 while true do drbdstatus=`cat /proc/drbd 2> /dev/null | grep ro | tail -n1 | awk -F':' '{print $4}' | awk -F'/' '{print $1}'` #判断drbd的状态 mfsstatus=`/bin/systemctl status moosefs-master.service |grep active | grep -c running` #判断mfs是否运行 if [ -z $drbdstatus ];then sleep 10 continue elif [ $drbdstatus == 'Primary' ];then #若drbd是Primary状态 if [ $mfsstatus -eq 0 ];then #若mfs未运行 systemctl start moosefs-master.service &> /dev/null #启动mfs服务 systemctl start moosefs-master.service &> /dev/null newmfsstatus=`/bin/systemctl status moosefs-master.service |grep active | grep -c running` #再次判断mfs是否成功启动 if [ $newmfsstatus -eq 0 ];then #若mfs未运行,也就是无法启动 /bin/systemctl stop pacemaker.service &> /dev/null #将pacemaker服务stop掉,目的是自动切换到另一台备用机 /bin/systemctl stop pacemaker.service &> /dev/null fi fi fi sleep 10 done
#添加执行权限
[root@mfs72 ~]# chmod +x /root/monitor_mfs.sh [root@mfs72 ~]# nohup /root/monitor_mfs.sh &
#设置开机自启动
[root@mfs71 ~]# echo "nohup /root/monitor_mfs.sh &" >> /etc/rc.local [root@mfs72 ~]# echo "nohup /root/monitor_mfs.sh &" >> /etc/rc.local
#运行monitor_mfs.sh脚本后,首先会尝试重启本机的moosefs-master服务
#贴一个crmsh的配置文件,在configure模式下,用edit来编辑
node 1: mfs71 \ attributes standby=off node 2: mfs72 \ attributes standby=off primitive drbdfs Filesystem \ params device="/dev/drbd1" directory="/data/mfs" fstype=ext4 \ op monitor interval=30s timeout=40s \ op start timeout=60 interval=0 \ op stop timeout=60 on-fail=restart interval=0 primitive mfsdrbd ocf:linbit:drbd \ params drbd_resource=mfs \ op start timeout=240s interval=0 \ op stop timeout=100s interval=0 \ op monitor role=Master interval=20s timeout=30s \ op monitor role=Slave interval=30s timeout=30s primitive mfsserver systemd:moosefs-master \ op monitor interval=20s timeout=15s on-fail=restart primitive mfsvip IPaddr \ params ip=192.168.5.77 \ op monitor interval=20 timeout=30 on-fail=restart ms ms_mfsdrbd mfsdrbd \ meta master-max=1 master-node-max=1 clone-max=2 clone-node-max=1 notify=true target-role=Started order drbd_after_ms_mfsdrbd Mandatory: ms_mfsdrbd:promote drbdfs:start colocation drbd_with_ms_mfsdrbd inf: drbdfs ms_mfsdrbd:Master order mfsserver_after_drbdfs Mandatory: drbdfs:start mfsserver:start colocation mfsserver_with_drbdfs inf: mfsserver drbdfs order mfsvip_before_mfsserver Mandatory: mfsvip mfsserver colocation mfsvip_with_mfsserver inf: mfsvip mfsserver property cib-bootstrap-options: \ have-watchdog=false \ dc-version=1.1.18-11.el7_5.3-2b07d5c5a9 \ cluster-infrastructure=corosync \ cluster-name=mfscluster \ stonith-enabled=false \ no-quorum-policy=ignore