1、前言

Docker默认的网络环境下,单台主机上的Docker容器可以通过docker0网桥直接通信,而不同主机上的Docker容器之间只能通过在主机上做端口映射进行通信。这种端口映射方式对很多集群应用来说很不方便,如果能让处于不同宿主机上的Docker容器之间直接使用自己的IP地址进行通信,会解决很多问题。

2、环境准备

准备2台服务器,在VMware中安装CentOS,网络适配器选择“桥接模式”

bridge模式跨主机 docker docker 跨主机通信_bridge模式跨主机 docker

设置网络的IP Address、Netmask和Gateway

bridge模式跨主机 docker docker 跨主机通信_docker_02

 

bridge模式跨主机 docker docker 跨主机通信_docker_03

也可以等安装完成后再修改IP地址

文件

手动配置

/etc/sysconfig/network-scripts/ifcfg-ens33

IPADDR=192.168.1.17

GATEWAY=192.168.1.1

NETMASK=255.255.255.0

/etc/resolv.conf

nameserver 202.96.128.86

nameserver 202.96.134.33

安装完成后,有2台服务器192.168.1.17和192.168.1.18。 

服务器

安装的应用

192.168.1.17

etcd, flannel, docker

192.168.1.18

flannel, docker

3、192.168.1.17的配置

3.1 关闭防火墙

systemctl stop firewalld.service
systemctl disable firewalld.service

3.2 安装etcd

yum install etcd -y

#修改配置文件
vim /etc/etcd/etcd.conf

配置文件中修改的内容

ETCD_LISTEN_CLIENT_URLS="http://localhost:2379"
改为
ETCD_LISTEN_CLIENT_URLS="http://0.0.0.0:2379"

ETCD_ADVERTISE_CLIENT_URLS="http://localhost:2379"
改为
ETCD_ADVERTISE_CLIENT_URLS="http://0.0.0.0:2379"

启动etcd 

#启动etcd
systemctl start etcd.service
#设置开机运行etcd
systemctl enable etcd.service

查看etcd的启动状态

[root@localhost ~]# ps -ef | grep etcd
etcd       1758      1  1 02:55 ?        00:00:03 /usr/bin/etcd --name=default --data-dir=/var/lib/etcd/default.etcd --listen-client-urls=http://0.0.0.0:2379

[root@localhost ~]# netstat -tunlp | grep etcd
tcp        0      0 127.0.0.1:2380          0.0.0.0:*               LISTEN      1758/etcd           
tcp6       0      0 :::2379                 :::*                    LISTEN      1758/etcd

配置etcd网络

[root@localhost ~]# etcdctl mk /atomic.io/network/config '{ "Network": "10.28.0.0/16" }'
{ "Network": "10.28.0.0/16" }

#如果需要更换网段,可以先删除rm,再配置mk新的网段
etcdctl rm /atomic.io/network/config

3.3 安装flannel

yum install flannel -y

# 修改配置文件
vim /etc/sysconfig/flanneld

配置文件中修改的内容

FLANNEL_ETCD_ENDPOINTS="http://127.0.0.1:2379"
修改为
FLANNEL_ETCD_ENDPOINTS="http://192.168.1.17:2379"

启动flanneld

#启动flanneld
systemctl start flanneld.service
#设置开机运行flanneld
systemctl enable flanneld.service

查看flanneld的启动状态

[root@localhost ~]# ps -ef | grep flannel
root       2000      1  0 03:09 ?        00:00:00 /usr/bin/flanneld -etcd-endpoints=http://192.168.1.17:2379 -etcd-prefix=/atomic.io/network

3.4 安装docker

yum install -y yum-utils 
yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
yum install -y docker-ce

修改docker的网段,方法一

#先查询出flannel的SUBNET和MTU
[root@localhost ~]# cat /run/flannel/subnet.env
FLANNEL_NETWORK=10.28.0.0/16
FLANNEL_SUBNET=10.28.89.1/24
FLANNEL_MTU=1472
FLANNEL_IPMASQ=false

修改docker的启动参数
vim /usr/lib/systemd/system/docker.service

ExecStart=/usr/bin/dockerd -H fd:// --containerd=/run/containerd/containerd.sock
修改为
ExecStart=/usr/bin/dockerd -H fd:// --containerd=/run/containerd/containerd.sock --bip=10.28.89.1/24 --mtu=1472

 修改docker的网段,方法二

修改docker的启动参数
vim /usr/lib/systemd/system/docker.service

ExecStart=/usr/bin/dockerd -H fd:// --containerd=/run/containerd/containerd.sock
修改为
ExecStart=/usr/bin/dockerd -H fd:// --containerd=/run/containerd/containerd.sock $DOCKER_NETWORK_OPTIONS

方法一和方法二的效果是相同的,方法二中DOCKER_NETWORK_OPTIONS变量的来源于/run/flannel/docker文件,而/run/flannel/docoker文件被/usr/lib/systemd/system/docker.service.d/flannel.conf引用。

[root@localhost ~]# cat /run/flannel/docker
DOCKER_OPT_BIP="--bip=10.28.89.1/24"
DOCKER_OPT_IPMASQ="--ip-masq=true"
DOCKER_OPT_MTU="--mtu=1472"
DOCKER_NETWORK_OPTIONS=" --bip=10.28.89.1/24 --ip-masq=true --mtu=1472"

/run/flannel/docker文件由flannel生成

[root@localhost ~]# cat /usr/lib/systemd/system/flanneld.service

...
ExecStartPost=/usr/libexec/flannel/mk-docker-opts.sh -k DOCKER_NETWORK_OPTIONS -d /run/flannel/docker
...

启动docker

#修改了service文件需要执行daemon-reload
systemctl daemon-reload

systemctl start docker
systemctl enable docker

3.5 查看网络状态

[root@localhost ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    link/ether 00:0c:29:a4:48:7b brd ff:ff:ff:ff:ff:ff
    inet 192.168.1.17/24 brd 192.168.1.255 scope global noprefixroute ens33
       valid_lft forever preferred_lft forever
    inet6 fe80::5a41:60d3:a93b:8822/64 scope link noprefixroute 
       valid_lft forever preferred_lft forever
3: flannel0: <POINTOPOINT,MULTICAST,NOARP,UP,LOWER_UP> mtu 1472 qdisc pfifo_fast state UNKNOWN group default qlen 500
    link/none 
    inet 10.28.89.0/16 scope global flannel0
       valid_lft forever preferred_lft forever
    inet6 fe80::5ea6:dd66:bdcf:8933/64 scope link flags 800 
       valid_lft forever preferred_lft forever
4: docker0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc noqueue state DOWN group default 
    link/ether 02:42:a8:fb:bc:cd brd ff:ff:ff:ff:ff:ff
    inet 10.28.89.1/24 brd 10.28.89.255 scope global docker0
       valid_lft forever preferred_lft forever
[root@localhost ~]# ip r
default via 192.168.1.1 dev ens33 proto static metric 100 
10.28.0.0/16 dev flannel0 proto kernel scope link src 10.28.89.0 
10.28.89.0/24 dev docker0 proto kernel scope link src 10.28.89.1 
192.168.1.0/24 dev ens33 proto kernel scope link src 192.168.1.17 metric 100

4、192.168.1.18的配置

4.1 关闭防火墙

systemctl stop firewalld.service
systemctl disable firewalld.service

4.2 安装flannel

和3.3一样,没有区别

4.3 安装docker

和3.4一样,没有区别

4.4 查看网络状态

[root@localhost ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    link/ether 00:0c:29:66:4d:ae brd ff:ff:ff:ff:ff:ff
    inet 192.168.1.18/24 brd 192.168.1.255 scope global noprefixroute ens33
       valid_lft forever preferred_lft forever
    inet6 fe80::7d9e:19a5:3dd5:364/64 scope link noprefixroute 
       valid_lft forever preferred_lft forever
3: flannel0: <POINTOPOINT,MULTICAST,NOARP,UP,LOWER_UP> mtu 1472 qdisc pfifo_fast state UNKNOWN group default qlen 500
    link/none 
    inet 10.28.60.0/16 scope global flannel0
       valid_lft forever preferred_lft forever
    inet6 fe80::d399:9dc3:fb6d:9e3c/64 scope link flags 800 
       valid_lft forever preferred_lft forever
4: docker0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc noqueue state DOWN group default 
    link/ether 02:42:5b:89:2c:18 brd ff:ff:ff:ff:ff:ff
    inet 10.28.60.1/24 brd 10.28.60.255 scope global docker0
       valid_lft forever preferred_lft forever
[root@localhost ~]# ip r
default via 192.168.1.1 dev ens33 proto static metric 100 
10.28.0.0/16 dev flannel0 proto kernel scope link src 10.28.60.0 
10.28.60.0/24 dev docker0 proto kernel scope link src 10.28.60.1 
192.168.1.0/24 dev ens33 proto kernel scope link src 192.168.1.18 metric 100

五、iptables

如上操作后,跨宿主机的Docker容器之间还是相互ping不通的,这是由于防火墙问题引起的!可是明明已经在前面部署的时候,通过"systemctl stop firewalld.service"关闭了防火墙,为什么还有防火墙问题?这是因为linux还有底层的iptables,而且docker会自动创建自己的iptables链

iptables的结构:iptables -> Tables -> Chains -> Rules。简单地讲,Tables由Chains组成,而Chains又由Rules组成。

iptables具有Filter, NAT, Mangle, Raw四种内建表:

  • Filter表 – 负责过滤功能,防火墙,也就是由filter表来决定一个数据包是否继续发往它的目的地址或者被丢弃
  • NAT表 – 网络地址转换功能
  • Mangle表 – 拆解报文,做出修改,并重新封装的功能,用于修改IP头部信息
  • Raw表 – nat表上启用的连接追踪机制

Chains或Rules对应的策略设置

  • ACCEPT – 允许防火墙接收数据包
  • DROP – 防火墙丢弃包
  • REJECT – 和DROP不同的是,REJECT会在拒绝流量后回复拒绝信息,DROP是直接丢弃流量
  • RETURN – 防火墙停止执行当前链中的后续Rules,并返回到调用链(the calling chain)中

 

执行下面2条命令防火墙就可以放行了

# 清空iptables规则
iptables -F  

#修改chain=FORWARD的策略为ACCEPT
iptables -P FORWARD ACCEPT

其他iptables常用的命令

### 没带-t参数时,默认操作的是filter表 ###

# 带行号显示
iptables -nvL --line-numbers    

# 显示filter的配置, -n(numeric output of addresses and ports),-v(verbose mode),-L(list)
iptables -nvL 

# 显示mangle的配置
iptables -t mangle -nvL

# 显示nat的配置
iptables -t nat -nvL

# 显示raw的配置
iptables -t raw -nvL

# 修改chain=DOCKER-USER下rule=1的策略为ACCEPT
iptables -R DOCKER-USER 1 -j ACCEPT

# 把规则转化为一条命名
iptables [-t table] -S [chain-name [rule-num]]

----------------链管理----------------

# 新增一条自定义链
iptables -N chain-name

# 删除自定义的空链,先清除规则才能删除链
iptables -X chain-name

# 重命名自定义链,但要求是未被引用的链
iptables -E chain-name chain-new-name

# 设置链的默认策略
iptables -P {PREROUTING | INPUT | FORWARD | OUTPUT | POSTROUTING} {DROP | ACCEPT | REJECT} 

----------------规则管理----------------

-A追加规则、-D删除规则、-R修改规则、-I插入规则、-L查看规则、-F清空规则

常用参数:(-p协议、-s源地址、-d目的地址、--sport源端口、--dport目的端口、-i进入网卡、-o出去网卡、-j动作)

以上修改的配置,在设备重启后配置将会丢失,需要安装iptables.service并调用save命令,配置信息才不会丢失。

Note. You need to distinguish between the iptables service and the iptables command. 
Although firewalld is a replacement for the firewall management provided by iptables service, 
it still uses the iptables command for dynamic communication with the kernel packet filter (netfilter). 
So it is only the iptables service that is replaced, not the iptables command. 
That can be a confusing distinction at first.

CentOS默认是使用firewalld替换了iptables service的,它们都是通过iptables command来控制netfilter

为了防止与iptables冲突,必须先禁止Firewalld开机启动。

*****阿里云和华为云的ECS默认是没有安装iptables service的,Firewalld有安装,但默认是disable的*****

# 安装iptables服务
yum install -y iptables-services

# 启动iptables
systemctl start iptables

# 设置iptables开机启动
systemctl enable iptables.service

# 清空所有规则
iptables -F

# 修改chain=FORWARD的策略为ACCEPT
iptables -P FORWARD ACCEPT

# 确认下新规则是否生效
iptables -nL

# 保存规则
[root@localhost ~]# service iptables save
iptables: Saving firewall rules to /etc/sysconfig/iptables:[  OK  ]

注意:就算清空了iptables的规则,只要docker服务重启之后,iptables就会变成下面的配置,因为docker会自动维护iptables

[root@localhost ~]# iptables -nL --line-numbers
Chain INPUT (policy ACCEPT)
num  target     prot opt source               destination         

Chain FORWARD (policy DROP)
num  target     prot opt source               destination         
1    DOCKER-USER  all  --  0.0.0.0/0            0.0.0.0/0           
2    DOCKER-ISOLATION-STAGE-1  all  --  0.0.0.0/0            0.0.0.0/0           
3    ACCEPT     all  --  0.0.0.0/0            0.0.0.0/0            ctstate RELATED,ESTABLISHED
4    DOCKER     all  --  0.0.0.0/0            0.0.0.0/0           
5    ACCEPT     all  --  0.0.0.0/0            0.0.0.0/0           
6    ACCEPT     all  --  0.0.0.0/0            0.0.0.0/0           

Chain OUTPUT (policy ACCEPT)
num  target     prot opt source               destination         

Chain DOCKER (1 references)
num  target     prot opt source               destination         

Chain DOCKER-ISOLATION-STAGE-1 (1 references)
num  target     prot opt source               destination         
1    DOCKER-ISOLATION-STAGE-2  all  --  0.0.0.0/0            0.0.0.0/0           
2    RETURN     all  --  0.0.0.0/0            0.0.0.0/0           

Chain DOCKER-ISOLATION-STAGE-2 (1 references)
num  target     prot opt source               destination         
1    DROP       all  --  0.0.0.0/0            0.0.0.0/0           
2    RETURN     all  --  0.0.0.0/0            0.0.0.0/0           

Chain DOCKER-USER (1 references)
num  target     prot opt source               destination         
1    RETURN     all  --  0.0.0.0/0            0.0.0.0/0

比如,下面是执行了端口映射8000:80前后,nat表的对比 

bridge模式跨主机 docker docker 跨主机通信_运维_04

上图是执行以下命令前后的对比

docker run --rm -d -p 8000:80 --name ng2 nginx

不要在/usr/lib/systemd/system/docker.service中修改ExecStart的启动参数,不要加上--iptables=false,让docker管理iptables

 六、测试

6.1 在192.168.1.17执行的结果

[root@localhost ~]# docker pull busybox
Using default tag: latest
latest: Pulling from library/busybox
d9cbbca60e5f: Pull complete 
Digest: sha256:836945da1f3afe2cfff376d379852bbb82e0237cb2925d53a13f53d6e8a8c48c
Status: Downloaded newer image for busybox:latest
docker.io/library/busybox:latest

[root@localhost ~]# docker run -itd --name=node1 busybox
598f7f96a71714f27aa0e30c88b4aa682b9781a9c7f18724625aacc4eaf3c7a8

[root@localhost ~]# docker exec -it node1 ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
5: eth0@if6: <BROADCAST,MULTICAST,UP,LOWER_UP,M-DOWN> mtu 1472 qdisc noqueue 
    link/ether 02:42:0a:1c:59:02 brd ff:ff:ff:ff:ff:ff
    inet 10.28.89.2/24 brd 10.28.89.255 scope global eth0
       valid_lft forever preferred_lft forever

6.2 在192.168.1.18执行的结果

[root@localhost ~]# docker pull busybox
Using default tag: latest
latest: Pulling from library/busybox
d9cbbca60e5f: Pull complete 
Digest: sha256:836945da1f3afe2cfff376d379852bbb82e0237cb2925d53a13f53d6e8a8c48c
Status: Downloaded newer image for busybox:latest
docker.io/library/busybox:latest

[root@localhost ~]# docker run -itd --name=node1 busybox
013ec9e4851eb062c98311f0607033aa0a42cd21b5ad82c72f794cb1fc635e38

[root@localhost ~]# docker exec -it node1 ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
5: eth0@if6: <BROADCAST,MULTICAST,UP,LOWER_UP,M-DOWN> mtu 1472 qdisc noqueue 
    link/ether 02:42:0a:1c:3c:02 brd ff:ff:ff:ff:ff:ff
    inet 10.28.60.2/24 brd 10.28.60.255 scope global eth0
       valid_lft forever preferred_lft forever

6.3 在192.168.1.17的宿主机和容器里ping其他宿主机里容器的IP

[root@localhost ~]# ping 10.28.60.2 -c4
PING 10.28.60.2 (10.28.60.2) 56(84) bytes of data.
64 bytes from 10.28.60.2: icmp_seq=1 ttl=61 time=2.05 ms
64 bytes from 10.28.60.2: icmp_seq=2 ttl=61 time=1.80 ms
64 bytes from 10.28.60.2: icmp_seq=3 ttl=61 time=2.04 ms
64 bytes from 10.28.60.2: icmp_seq=4 ttl=61 time=2.07 ms
--- 10.28.60.2 ping statistics ---
4 packets transmitted, 4 received, 0% packet loss, time 3005ms
rtt min/avg/max/mdev = 1.805/1.995/2.076/0.114 ms


[root@localhost ~]# docker exec -it node1 ping 10.28.60.2 -c4
PING 10.28.60.2 (10.28.60.2): 56 data bytes
64 bytes from 10.28.60.2: seq=0 ttl=60 time=0.535 ms
64 bytes from 10.28.60.2: seq=1 ttl=60 time=2.181 ms
64 bytes from 10.28.60.2: seq=2 ttl=60 time=2.285 ms
64 bytes from 10.28.60.2: seq=3 ttl=60 time=2.287 ms
--- 10.28.60.2 ping statistics ---
4 packets transmitted, 4 packets received, 0% packet loss
round-trip min/avg/max = 0.535/1.822/2.287 ms