最近公司机房新上了ssd存储盘,赶紧申请使用,已经无法忍受etcd超时打不开web页面的现象了。更换成ssd,瞬间感受丝滑的酸爽。记录下平台迁移的过程。
要做平台迁移,前提是新旧机的网络要互通,kubernetes版本1.16.3 版本比较旧了回头升级下。
一、rancher管理平台
旧平台
ip | CPU | 内存(G) | 磁盘 |
172.26.179.146master | 4 | 8 | 40G(ceph磁盘) |
172.26.179.147 | 4 | 8 | 40G(ceph磁盘) |
172.26.179.148 | 4 | 8 | 40G(ceph磁盘) |
新平台
ip | CPU | 内存(G) | 磁盘 |
172.25.149.111master | 4 | 8 | 40G(SSD磁盘) |
172.25.149.112 | 4 | 8 | 40G(SSD磁盘) |
172.25.149.113 | 4 | 8 | 40G(SSD磁盘) |
二、新环境准备(在三台新机同时执行)
1.性能优化
echo "
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_tw_recycle = 0
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-iptables=1
net.ipv4.ip_forward=1
net.ipv4.conf.all.forwarding=1
net.ipv4.neigh.default.gc_thresh1=4096
net.ipv4.neigh.default.gc_thresh2=6144
net.ipv4.neigh.default.gc_thresh3=8192
net.ipv4.neigh.default.gc_interval=60
net.ipv4.neigh.default.gc_stale_time=120
# 参考 https://github.com/prometheus/node_exporter#disabled-by-default
kernel.perf_event_paranoid=-1
#sysctls for k8s node config
net.ipv4.tcp_slow_start_after_idle=0
net.core.rmem_max=16777216
fs.inotify.max_user_watches=524288
kernel.softlockup_all_cpu_backtrace=1
kernel.softlockup_panic=1
fs.file-max=2097152
fs.inotify.max_user_instances=8192
fs.inotify.max_queued_events=16384
vm.max_map_count=262144
fs.may_detach_mounts=1
net.core.netdev_max_backlog=16384
net.ipv4.tcp_wmem=4096 12582912 16777216
net.core.wmem_max=16777216
net.core.somaxconn=32768
net.ipv4.ip_forward=1
net.ipv4.tcp_max_syn_backlog=8096
net.ipv4.tcp_rmem=4096 12582912 16777216
net.ipv6.conf.all.disable_ipv6=1
net.ipv6.conf.default.disable_ipv6=1
net.ipv6.conf.lo.disable_ipv6=1
kernel.yama.ptrace_scope=0
vm.swappiness=0
# 可以控制core文件的文件名中是否添加pid作为扩展。
kernel.core_uses_pid=1
# Do not accept source routing
net.ipv4.conf.default.accept_source_route=0
net.ipv4.conf.all.accept_source_route=0
# Promote secondary addresses when the primary address is removed
net.ipv4.conf.default.promote_secondaries=1
net.ipv4.conf.all.promote_secondaries=1
# Enable hard and soft link protection
fs.protected_hardlinks=1
fs.protected_symlinks=1
# 源路由验证
# see details in https://help.aliyun.com/knowledge_detail/39428.html
net.ipv4.conf.all.rp_filter=0
net.ipv4.conf.default.rp_filter=0
net.ipv4.conf.default.arp_announce = 2
net.ipv4.conf.lo.arp_announce=2
net.ipv4.conf.all.arp_announce=2
# see details in https://help.aliyun.com/knowledge_detail/41334.html
net.ipv4.tcp_max_tw_buckets=5000
net.ipv4.tcp_syncookies=1
net.ipv4.tcp_fin_timeout=30
net.ipv4.tcp_synack_retries=2
kernel.sysrq=1
" >> /etc/sysctl.conf
sysctl -p
cat >> /etc/security/limits.conf <<EOF
* soft nofile 65535
* hard nofile 65536
EOF
2.安装docker
yum install -y yum-utils device-mapper-persistent-data lvm2
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum install -y docker-ce-20.10.7-3.el7.x86_64 docker-ce-cli-20.10.7-3.el7.x86_64
启动docker
systemctl start docker
systemctl enable docker
systemctl status docker
优化docker的配置
cat > /etc/docker/daemon.json <<EOF
{
"oom-score-adjust": -1000,
"log-driver": "json-file",
"log-opts": {
"max-size": "500m",
"max-file": "3"
},
"registry-mirrors": ["https://7bezldxe.mirror.aliyuncs.com"]
}
EOF
重新启动docker
systemctl restart docker
3.时区优化
时间同步
timedatectl status
timedatectl set-timezone Asia/Shanghai
timedatectl set-ntp yes
date
4.创建 centos账户
#创建centos用户
useradd centos
#配置密码
echo “新密码”|passwd --stdin centos
#将centos用户加入docker用户组
usermod -aG docker centos
5.配置私有镜像仓库
根据个人情况配置
6.配置hosts及免密登录
echo "172.25.149.122 k8s.example.com" >> /etc/hosts
# 从172.26.179.146免密登录3台新机
在172.26.179.146上执行
ssh-copy-id centos@172.26.149.111
ssh-copy-id centos@172.26.149.112
ssh-copy-id centos@172.26.149.113
根据提示输入密码即可完成免密配置
# 从172.26.149.111免密登录6台新机
在172.26.149.111上执行
ssh-keygen -t rsa #全部回车会生成密钥
ssh-copy-id centos@172.26.179.146
ssh-copy-id centos@172.26.179.147
ssh-copy-id centos@172.26.179.148
ssh-copy-id centos@172.26.149.111
ssh-copy-id centos@172.26.149.112
ssh-copy-id centos@172.26.149.113
根据提示输入密码即可完成免密配置
三、在线热迁移(172.26.179.146上执行)
1.修改cluster.yml
将原来的配置文件修改为
nodes:
- address: 172.26.179.146
user: centos
role: [controlplane,worker,etcd]
- address: 172.26.179.147
user: centos
role: [controlplane,worker,etcd]
- address: 172.26.179.148
user: centos
role: [controlplane,worker,etcd]
- address: 172.25.149.111
user: centos
role: [controlplane,worker,etcd]
- address: 172.25.149.112
user: centos
role: [controlplane,worker,etcd]
- address: 172.25.149.113
user: rancher
role: [controlplane,worker,etcd]
services:
etcd:
snapshot: true
creation: 6h
retention: 24h
private_registries:
- url: 10.15.128.38
user: admin
password: Abc123@#!ddd
is_default: true
#rke up --config ./cluster.yml
#根据网络情况而定耗时 ,最后成功时会看到如下字样
INFO[0339] [sync] Successfully synced nodes Labels and Taints
INFO[0339] [network] Setting up network plugin: canal
INFO[0339] [addons] Saving ConfigMap for addon rke-network-plugin to Kubernetes
INFO[0339] [addons] Successfully saved ConfigMap for addon rke-network-plugin to Kubernetes
INFO[0339] [addons] Executing deploy job rke-network-plugin
INFO[0339] [addons] Setting up coredns
INFO[0339] [addons] Saving ConfigMap for addon rke-coredns-addon to Kubernetes
INFO[0339] [addons] Successfully saved ConfigMap for addon rke-coredns-addon to Kubernetes
INFO[0339] [addons] Executing deploy job rke-coredns-addon
INFO[0339] [addons] CoreDNS deployed successfully..
INFO[0339] [dns] DNS provider coredns deployed successfully
INFO[0339] [addons] Setting up Metrics Server
INFO[0339] [addons] Saving ConfigMap for addon rke-metrics-addon to Kubernetes
INFO[0339] [addons] Successfully saved ConfigMap for addon rke-metrics-addon to Kubernetes
INFO[0339] [addons] Executing deploy job rke-metrics-addon
INFO[0339] [addons] Metrics Server deployed successfully
INFO[0339] [ingress] Setting up nginx ingress controller
INFO[0339] [addons] Saving ConfigMap for addon rke-ingress-controller to Kubernetes
INFO[0339] [addons] Successfully saved ConfigMap for addon rke-ingress-controller to Kubernetes
INFO[0339] [addons] Executing deploy job rke-ingress-controller
INFO[0339] [ingress] ingress controller nginx deployed successfully
INFO[0339] [addons] Setting up user addons
INFO[0339] [addons] no user addons defined
INFO[0339] Finished building Kubernetes cluster successfully
会在同目录下产生两个文件,kube_config_cluster.yml cluster.rkestate ,保存好文件
连同cluster.yml、rke、kubectl拷贝至172.26.149.111的/home/rancher目录下
以下命令均在172.26.149.111上执行
在172.26.149.111上执行
sudo chmod +x rke kubectl
sudo mv {rke,kubectl} /bin/
修改cluster.yml(在172.25.149.111上面执行)
修改后的内容如下
nodes:
- address: 172.25.149.111
user: centos
role: [controlplane,worker,etcd]
- address: 172.25.149.112
user: centos
role: [controlplane,worker,etcd]
- address: 172.25.149.113
user: rancher
role: [controlplane,worker,etcd]
services:
etcd:
snapshot: true
creation: 6h
retention: 24h
private_registries:
- url: 10.15.128.38
user: admin
password: Abc123@#!ddd
is_default: true
执行升级,将旧平台的机器从集群移除。
#rke up --config ./cluster.yml
#根据网络情况而定耗时 ,最后成功时会看到如下字样
INFO[0339] [sync] Successfully synced nodes Labels and Taints
INFO[0339] [network] Setting up network plugin: canal
INFO[0339] [addons] Saving ConfigMap for addon rke-network-plugin to Kubernetes
INFO[0339] [addons] Successfully saved ConfigMap for addon rke-network-plugin to Kubernetes
INFO[0339] [addons] Executing deploy job rke-network-plugin
INFO[0339] [addons] Setting up coredns
INFO[0339] [addons] Saving ConfigMap for addon rke-coredns-addon to Kubernetes
INFO[0339] [addons] Successfully saved ConfigMap for addon rke-coredns-addon to Kubernetes
INFO[0339] [addons] Executing deploy job rke-coredns-addon
INFO[0339] [addons] CoreDNS deployed successfully..
INFO[0339] [dns] DNS provider coredns deployed successfully
INFO[0339] [addons] Setting up Metrics Server
INFO[0339] [addons] Saving ConfigMap for addon rke-metrics-addon to Kubernetes
INFO[0339] [addons] Successfully saved ConfigMap for addon rke-metrics-addon to Kubernetes
INFO[0339] [addons] Executing deploy job rke-metrics-addon
INFO[0339] [addons] Metrics Server deployed successfully
INFO[0339] [ingress] Setting up nginx ingress controller
INFO[0339] [addons] Saving ConfigMap for addon rke-ingress-controller to Kubernetes
INFO[0339] [addons] Successfully saved ConfigMap for addon rke-ingress-controller to Kubernetes
INFO[0339] [addons] Executing deploy job rke-ingress-controller
INFO[0339] [ingress] ingress controller nginx deployed successfully
INFO[0339] [addons] Setting up user addons
INFO[0339] [addons] no user addons defined
INFO[0339] Finished building Kubernetes cluster successfully
配置环境
#在profile文件末尾添加kube_config_rancher-cluster.yml文件路径并保存#其实可以自定义位置
export KUBECONFIG=/home/rancher/kube_config_cluster.yml
[root@k8s-master ~]# echo "source <(kubectl completion bash)" >> ~/.bashrc
[root@k8s-master ~]# source ~/.bashrc
[root@k8s-master ~]# su - rancher
[rancher@k8s-master ~]# echo "source <(kubectl completion bash)" >> ~/.bashrc
[rancher@k8s-master ~]# source ~/.bashrc
测试集群
通过kubectl测试您的连接,并查看您的所有节点是否处于Ready状态
[centos@k8s-master ~]# kubectl get node
[centos@k8s-master ~]# kubectl get pods --all-namespaces
四、nginx切换
在nginx服务器上切换nginx.config ,修改配置前保存一份nginx.config
切换完后,观察一周时间,由于使用了ssd盘,现在的性能要比之前好很多,不会再报etcd超时的问题,顺利解决了etcd超时管理web页面打不开的现象。瞬间丝滑!