最近在看Swarm,梳理了了一遍Swarm集群负载均衡的实现方式,记录如下
在Swarm中,通IPVS和VXLAN技术和实现服务负载均衡,其中IPVS负责根据FWM(firewall mark)取代VIP进行数据包的负载均衡,VXLAN负责组建跨主机之间的ingress网络通讯。
先看以下Swarm集群示例。
上图同一节点内 和 两个节点之间网络连接的情况,详述如下:

Swarm集群和服务已提前创建完成,具体信息如下:
[root@centos7-10 ~]# docker node ls
ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION
w92t66jii4jflz1vb0kotr0df * centos7-10.localdomain Ready Active Leader 24.0.7
9p6kone8vdwkdne55svrj6k5n centos7-18.localdomain Ready Active 24.0.7
x5y16qjbxuxv322to4hbiymik centos7-22.localdomain Ready Active 24.0.7
[root@centos7-10 ~]# docker network ls
NETWORK ID NAME DRIVER SCOPE
e384cf8f2e42 bridge bridge local
333348eb8ff4 docker_gwbridge bridge local
65a57293726b host host local
9j40lm9s5dte ingress overlay swarm
2db17c1087bf none null local
9u6c1vlfy4kv ol_network overlay swarm
[root@centos7-10 ~]# docker network inspect ingress
[
{
"Name": "ingress",
"Id": "9j40lm9s5dtedg04r6jywqr1k",
"Created": "2024-03-05T14:40:26.202301824+08:00",
"Scope": "swarm",
"Driver": "overlay",
"EnableIPv6": false,
"IPAM": {
"Driver": "default",
"Options": null,
"Config": [
{
"Subnet": "10.0.0.0/24",
"Gateway": "10.0.0.1"
}
]
},
"Internal": false,
"Attachable": false,
"Ingress": true,
"ConfigFrom": {
"Network": ""
},
"ConfigOnly": false,
"Containers": {
"8593fe83b9259ace2cf6c1f0718c48c9206da0a657759676f7599014120d48e0": {
"Name": "my-nginx.3.s754p61l6729e117si904236b",
"EndpointID": "2425b820ba06cbb53bb5f329f87d6013874968c77ebbe0a4f1e38b0756097242",
"MacAddress": "02:42:0a:00:00:10",
"IPv4Address": "10.0.0.16/24",
"IPv6Address": ""
},
"86e2f87808941e12b5912ec5ee922dc48b04fbb4ce035189323eda51d4b74cac": {
"Name": "my-nginx.1.q3gjmacbb0k0ay2ehi1uvz7dw",
"EndpointID": "660df37adc3404a3bbeaf7cd9c979710bd49f100176e5185bfa0d861059c84e1",
"MacAddress": "02:42:0a:00:00:11",
"IPv4Address": "10.0.0.17/24",
"IPv6Address": ""
},
"ingress-sbox": {
"Name": "ingress-endpoint",
"EndpointID": "a4139caa54328f05f0a06ca8e0f7218fa8209fb09553c67f4dd073372eaffd33",
"MacAddress": "02:42:0a:00:00:02",
"IPv4Address": "10.0.0.2/24",
"IPv6Address": ""
}
},
"Options": {
"com.docker.network.driver.overlay.vxlanid_list": "4096"
},
"Labels": {},
"Peers": [
{
"Name": "ec5d0b20deba",
"IP": "10.211.55.10"
},
{
"Name": "202290dfb303",
"IP": "10.211.55.22"
},
{
"Name": "b1b45c422a80",
"IP": "10.211.55.18"
}
]
}
]
[root@centos7-10 ~]# docker network inspect docker_gwbridge
[
{
"Name": "docker_gwbridge",
"Id": "333348eb8ff434a7be478f93741588da9624bbab0b0787e5c68f2696816d3909",
"Created": "2024-02-22T13:42:37.480107344+08:00",
"Scope": "local",
"Driver": "bridge",
"EnableIPv6": false,
"IPAM": {
"Driver": "default",
"Options": null,
"Config": [
{
"Subnet": "172.18.0.0/16",
"Gateway": "172.18.0.1"
}
]
},
"Internal": false,
"Attachable": false,
"Ingress": false,
"ConfigFrom": {
"Network": ""
},
"ConfigOnly": false,
"Containers": {
"86e2f87808941e12b5912ec5ee922dc48b04fbb4ce035189323eda51d4b74cac": {
"Name": "gateway_a567c55696f4",
"EndpointID": "f9af1ab146a6cfe6be4df5b3c848e87c017251f0edd4482474ca1959d9ddb3df",
"MacAddress": "02:42:ac:12:00:03",
"IPv4Address": "172.18.0.3/16",
"IPv6Address": ""
},
"ingress-sbox": {
"Name": "gateway_ingress-sbox",
"EndpointID": "6f7823fd4b2c2fb825213d5aa8b9e05cd3f7e286597b5ecba62dc474aeb748fb",
"MacAddress": "02:42:ac:12:00:02",
"IPv4Address": "172.18.0.2/16",
"IPv6Address": ""
}
},
"Options": {
"com.docker.network.bridge.enable_icc": "false",
"com.docker.network.bridge.enable_ip_masquerade": "true",
"com.docker.network.bridge.name": "docker_gwbridge"
},
"Labels": {}
}
]
[root@centos7-10 ~]# docker service ls
ID NAME MODE REPLICAS IMAGE PORTS
7rfsp2zazavr my-nginx replicated 3/3 nginx-sj:2024022601 *:9090->80/tcp
[root@centos7-10 ~]# docker service inspect my-nginx
[
{
"ID": "7rfsp2zazavrnckuw074dv8dl",
"Version": {
"Index": 1726
},
"CreatedAt": "2024-02-26T09:59:24.571367943Z",
"UpdatedAt": "2024-03-06T05:36:56.386485665Z",
"Spec": {
"Name": "my-nginx",
"Labels": {},
"TaskTemplate": {
"ContainerSpec": {
"Image": "nginx-sj:2024022601",
"Init": false,
"Mounts": [
{
"Type": "volume",
"Source": "my-volume",
"Target": "/usr/share/nginx/html/"
}
],
"StopGracePeriod": 10000000000,
"DNSConfig": {},
"Isolation": "default"
},
"Resources": {
"Limits": {},
"Reservations": {}
},
"RestartPolicy": {
"Condition": "any",
"Delay": 5000000000,
"MaxAttempts": 0
},
"Placement": {},
"Networks": [
{
"Target": "9u6c1vlfy4kvli7ihka8f5mjf"
}
],
"ForceUpdate": 0,
"Runtime": "container"
},
"Mode": {
"Replicated": {
"Replicas": 3
}
},
"UpdateConfig": {
"Parallelism": 1,
"FailureAction": "pause",
"Monitor": 5000000000,
"MaxFailureRatio": 0,
"Order": "stop-first"
},
"RollbackConfig": {
"Parallelism": 1,
"FailureAction": "pause",
"Monitor": 5000000000,
"MaxFailureRatio": 0,
"Order": "stop-first"
},
"EndpointSpec": {
"Mode": "vip",
"Ports": [
{
"Protocol": "tcp",
"TargetPort": 80,
"PublishedPort": 9090,
"PublishMode": "ingress"
}
]
}
},
"PreviousSpec": {
"Name": "my-nginx",
"Labels": {},
"TaskTemplate": {
"ContainerSpec": {
"Image": "nginx-sj:2024022601",
"Init": false,
"Mounts": [
{
"Type": "volume",
"Source": "my-volume",
"Target": "/usr/share/nginx/html/"
}
],
"DNSConfig": {},
"Isolation": "default"
},
"Resources": {
"Limits": {},
"Reservations": {}
},
"Placement": {},
"Networks": [
{
"Target": "9u6c1vlfy4kvli7ihka8f5mjf"
}
],
"ForceUpdate": 0,
"Runtime": "container"
},
"Mode": {
"Replicated": {
"Replicas": 4
}
},
"EndpointSpec": {
"Mode": "vip",
"Ports": [
{
"Protocol": "tcp",
"TargetPort": 80,
"PublishedPort": 9090,
"PublishMode": "ingress"
}
]
}
},
"Endpoint": {
"Spec": {
"Mode": "vip",
"Ports": [
{
"Protocol": "tcp",
"TargetPort": 80,
"PublishedPort": 9090,
"PublishMode": "ingress"
}
]
},
"Ports": [
{
"Protocol": "tcp",
"TargetPort": 80,
"PublishedPort": 9090,
"PublishMode": "ingress"
}
],
"VirtualIPs": [
{
"NetworkID": "9j40lm9s5dtedg04r6jywqr1k",
"Addr": "10.0.0.15/24"
},
{
"NetworkID": "9u6c1vlfy4kvli7ihka8f5mjf",
"Addr": "172.16.0.27/16"
}
]
}
}
]
// 查看节点centos7-10 的nat表。集群内其他节点相同,不一一例举了。
[root@centos7-10 ~]# iptables -L -nv -t nat
Chain PREROUTING (policy ACCEPT 13068 packets, 1352K bytes)
pkts bytes target prot opt in out source destination
9629 585K DOCKER-INGRESS all -- * * 0.0.0.0/0 0.0.0.0/0 ADDRTYPE match dst-type LOCAL
9560 581K DOCKER all -- * * 0.0.0.0/0 0.0.0.0/0 ADDRTYPE match dst-type LOCAL
......
Chain DOCKER-INGRESS (2 references)
pkts bytes target prot opt in out source destination
75 4800 DNAT tcp -- * * 0.0.0.0/0 0.0.0.0/0 tcp dpt:9090 to:172.18.0.2:9090
9565 581K RETURN all -- * * 0.0.0.0/0 0.0.0.0/0
[root@centos7-10 ~]# iptables -L -nv -t filter
Chain INPUT (policy ACCEPT 375K packets, 47M bytes)
pkts bytes target prot opt in out source destination
Chain FORWARD (policy ACCEPT 0 packets, 0 bytes)
pkts bytes target prot opt in out source destination
120 9960 DOCKER-USER all -- * * 0.0.0.0/0 0.0.0.0/0
120 9960 DOCKER-INGRESS all -- * * 0.0.0.0/0 0.0.0.0/0
2208 1526K DOCKER-ISOLATION-STAGE-1 all -- * * 0.0.0.0/0 0.0.0.0/0
0 0 ACCEPT all -- * docker0 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED
0 0 DOCKER all -- * docker0 0.0.0.0/0 0.0.0.0/0
0 0 ACCEPT all -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
0 0 ACCEPT all -- docker0 docker0 0.0.0.0/0 0.0.0.0/0
1408 1492K ACCEPT all -- * docker_gwbridge 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED
0 0 DOCKER all -- * docker_gwbridge 0.0.0.0/0 0.0.0.0/0
800 34026 ACCEPT all -- docker_gwbridge !docker_gwbridge 0.0.0.0/0 0.0.0.0/0
0 0 DROP all -- docker_gwbridge docker_gwbridge 0.0.0.0/0 0.0.0.0/0
Chain OUTPUT (policy ACCEPT 344K packets, 45M bytes)
pkts bytes target prot opt in out source destination
Chain DOCKER (2 references)
pkts bytes target prot opt in out source destination
Chain DOCKER-INGRESS (1 references)
pkts bytes target prot opt in out source destination
525 35100 ACCEPT tcp -- * * 0.0.0.0/0 0.0.0.0/0 tcp dpt:9090
375 39600 ACCEPT tcp -- * * 0.0.0.0/0 0.0.0.0/0 state RELATED,ESTABLISHED tcp spt:9090
2208 1526K RETURN all -- * * 0.0.0.0/0 0.0.0.0/0
......
// 将容器命名空间目录连接到/var/run/netns
[root@centos7-10 ~]# ln -s /var/run/docker/netns/ /var/run/netns
// 查看命名空间
[root@centos7-10 ~]# ip netns show
a567c55696f4 (id: 4) // 容器命名空间,每个容器1个
1-9u6c1vlfy4 (id: 2) // 自定义Overlay网络命名空间,本文请忽略
lb_9u6c1vlfy (id: 3) // 自定义Overlay网络命名空间,本文请忽略
1-9j40lm9s5d (id: 0) // ingess网络命名空间,包含Vxlan VTEP(VXLAN Tunnel Endpoints)
ingress_sbox (id: 1) // ingess网络命名空间,网络入口,启用IPVS负载均衡
[root@centos7-10 ~]# ip netns exec ingress_sbox ip a
......
6: eth0@if7: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc noqueue state UP group default
link/ether 02:42:0a:00:00:02 brd ff:ff:ff:ff:ff:ff link-netnsid 0
inet 10.0.0.2/24 brd 10.0.0.255 scope global eth0
valid_lft forever preferred_lft forever
inet 10.0.0.15/32 scope global eth0
valid_lft forever preferred_lft forever
8: eth1@if9: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default
link/ether 02:42:ac:12:00:02 brd ff:ff:ff:ff:ff:ff link-netnsid 1
inet 172.18.0.2/16 brd 172.18.255.255 scope global eth1
valid_lft forever preferred_lft forever
验证流程:curl http://10.211.55.10:9090 --> 10.211.55.90:9090 --> 172.18.0.2:9090
验证结果:
// curl http://10.211.55.10:9090
jinshideMacBook-Pro-2:~ shijin$ curl http://10.211.55.10:9090/index1.html
// 容器nginx服务显示每个集群节点宿主机的IP
Host IP is 10.211.55.10
// 查看enp0s5信息
[root@centos7-10 ~]# ip a show enp0s5
2: enp0s5: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
link/ether 00:1c:42:ae:b6:41 brd ff:ff:ff:ff:ff:ff
inet 10.211.55.10/24 brd 10.211.55.255 scope global noprefixroute dynamic enp0s5
valid_lft 1502sec preferred_lft 1502sec
inet6 fdb2:2c26:f4e4:0:cd1f:12f3:4076:6d89/64 scope global noprefixroute dynamic
valid_lft 2591770sec preferred_lft 604570sec
inet6 fe80::7e0c:1902:e1ca:4324/64 scope link noprefixroute
valid_lft forever preferred_lft forever
inet6 fe80::567a:248b:5e94:5d19/64 scope link noprefixroute
valid_lft forever preferred_lft forever
// 抓宿主机物理网卡enp0s5
[root@centos7-10 ~]# tcpdump -nnnv -i enp0s5 port 9090
tcpdump: listening on enp0s5, link-type EN10MB (Ethernet), capture size 262144 bytes
15:41:33.029847 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 64)
10.211.55.2.51883 > 10.211.55.10.9090: Flags [SEW], cksum 0xf5aa (correct), seq 1488146503, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 1319360275 ecr 0,sackOK,eol], length 0
15:41:33.030088 IP (tos 0x0, ttl 62, id 0, offset 0, flags [DF], proto TCP (6), length 60)
10.211.55.10.9090 > 10.211.55.2.51883: Flags [S.E], cksum 0x83e0 (incorrect -> 0x46cb), seq 3176725104, ack 1488146504, win 27960, options [mss 1410,sackOK,TS val 100373895 ecr 1319360275,nop,wscale 7], length 0
// 外部请求数据包已经转发至172.18.0.2.9090
[root@centos7-10 ~]# ip netns exec ingress_sbox tcpdump -nnnv -i eth1
tcpdump: listening on eth1, link-type EN10MB (Ethernet), capture size 262144 bytes
15:41:33.029944 IP (tos 0x0, ttl 63, id 0, offset 0, flags [DF], proto TCP (6), length 64)
10.211.55.2.51883 > 172.18.0.2.9090: Flags [SEW], cksum 0x8b73 (correct), seq 1488146503, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 1319360275 ecr 0,sackOK,eol], length 0
15:41:33.030079 IP (tos 0x0, ttl 63, id 0, offset 0, flags [DF], proto TCP (6), length 60)
172.18.0.2.9090 > 10.211.55.2.51883: Flags [S.E], cksum 0xee17 (incorrect -> 0xdc93), seq 3176725104, ack 1488146504, win 27960, options [mss 1410,sackOK,TS val 100373895 ecr 1319360275,nop,wscale 7], length 0
以节点centos7-10为例
[root@centos7-10 ~]# ip netns exec ingress_sbox iptables -L -nv -t mangle
Chain PREROUTING (policy ACCEPT 949 packets, 78785 bytes)
pkts bytes target prot opt in out source destination
547 36556 MARK tcp -- * * 0.0.0.0/0 0.0.0.0/0 tcp dpt:9090 MARK set 0x102
Chain INPUT (policy ACCEPT 559 packets, 37601 bytes)
pkts bytes target prot opt in out source destination
0 0 MARK all -- * * 0.0.0.0/0 10.0.0.15 MARK set 0x102
......
[root@centos7-10 ~]# ip netns exec ingress_sbox iptables -L -nv -t nat
Chain PREROUTING (policy ACCEPT 79 packets, 5076 bytes)
pkts bytes target prot opt in out source destination
Chain INPUT (policy ACCEPT 1 packets, 84 bytes)
pkts bytes target prot opt in out source destination
Chain OUTPUT (policy ACCEPT 8 packets, 546 bytes)
pkts bytes target prot opt in out source destination
0 0 DOCKER_OUTPUT all -- * * 0.0.0.0/0 127.0.0.11
Chain POSTROUTING (policy ACCEPT 8 packets, 546 bytes)
pkts bytes target prot opt in out source destination
0 0 DOCKER_POSTROUTING all -- * * 0.0.0.0/0 127.0.0.11
78 4992 SNAT all -- * * 0.0.0.0/0 10.0.0.0/24 ipvs to:10.0.0.2
......
[root@centos7-10 ~]# ip netns exec ingress_sbox ipvsadm -L -n
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
FWM 258 rr
-> 10.0.0.17:0 Masq 1 0 0
-> 10.0.0.20:0 Masq 1 0 0
-> 10.0.0.21:0 Masq 1 0 0
[root@centos7-10 ~]#
验证流程:curl http://10.211.55.10:9090 --> 10.211.55.90:9090 --> 172.18.0.2:9090 --> IPVS --> 1-9j40lm9s5d命名空间
验证结果:
jinshideMacBook-Pro-2:~ shijin$ curl http://10.211.55.10:9090/index1.html
Host IP is 10.211.55.18
jinshideMacBook-Pro-2:~ shijin$ curl http://10.211.55.10:9090/index1.html
Host IP is 10.211.55.22
jinshideMacBook-Pro-2:~ shijin$ curl http://10.211.55.10:9090/index1.html
Host IP is 10.211.55.10
[root@centos7-10 ~]# ip netns exec ingress_sbox ipvsadm -L -n -c
IPVS connection entries
pro expire state source virtual destination
TCP 01:44 TIME_WAIT 10.211.55.2:57786 172.18.0.2:9090 10.0.0.20:9090
TCP 01:49 TIME_WAIT 10.211.55.2:57800 172.18.0.2:9090 10.0.0.21:9090
TCP 01:52 TIME_WAIT 10.211.55.2:57807 172.18.0.2:9090 10.0.0.17:9090
// 10.0.0.2.57786 > 10.0.0.20.9090
17:52:51.203834 IP (tos 0x0, ttl 63, id 0, offset 0, flags [DF], proto TCP (6), length 64)
10.0.0.2.57786 > 10.0.0.20.9090: Flags [SEW], cksum 0xd7ab (correct), seq 968936781, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 1327886090 ecr 0,sackOK,eol], length 0
17:52:51.204644 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60)
10.0.0.20.9090 > 10.0.0.2.57786: Flags [S.E], cksum 0x7b1a (correct), seq 3086272880, ack 968936782, win 27960, options [mss 1410,sackOK,TS val 108230949 ecr 1327886090,nop,wscale 7], length 0
// 10.0.0.2.57800 > 10.0.0.21.9090
17:52:57.148304 IP (tos 0x0, ttl 63, id 0, offset 0, flags [DF], proto TCP (6), length 64)
10.0.0.2.57800 > 10.0.0.21.9090: Flags [SEW], cksum 0x3f51 (correct), seq 4174221620, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 1327891553 ecr 0,sackOK,eol], length 0
17:52:57.148775 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60)
10.0.0.21.9090 > 10.0.0.2.57800: Flags [S.E], cksum 0xc796 (correct), seq 3791952458, ack 4174221621, win 27960, options [mss 1410,sackOK,TS val 101489099 ecr 1327891553,nop,wscale 7], length 0
// 10.0.0.2.57807 > 10.0.0.17.9090
17:52:59.366754 IP (tos 0x0, ttl 63, id 0, offset 0, flags [DF], proto TCP (6), length 64)
10.0.0.2.57807 > 10.0.0.17.9090: Flags [SEW], cksum 0xab41 (correct), seq 3929549799, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 1327893584 ecr 0,sackOK,eol], length 0
17:52:59.366977 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60)
10.0.0.17.9090 > 10.0.0.2.57807: Flags [S.E], cksum 0x1441 (incorrect -> 0x693f), seq 3691124848, ack 3929549800, win 27960, options [mss 1410,sackOK,TS val 108260232 ecr 1327893584,nop,wscale 7], length 0

从上图中可见,Centos7-10节点1的1-9j40lm9s5d命名空间下,有Linux bridge br0。br0共接入了3个接口(网卡),同时负责接口间的网络通讯,其中:
[root@centos7-10 ~]# ip netns exec 1-9j40lm9s5d brctl show
bridge name bridge id STP enabled interfaces
br0 8000.32f9e45ee4ae no veth0
veth7
vxlan0
[root@centos7-10 ~]# ip netns exec 1-9j40lm9s5d ip a
......
2: br0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc noqueue state UP group default
link/ether 32:f9:e4:5e:e4:ae brd ff:ff:ff:ff:ff:ff
inet 10.0.0.1/24 brd 10.0.0.255 scope global br0
valid_lft forever preferred_lft forever
5: vxlan0@if5: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc noqueue master br0 state UNKNOWN group default
link/ether 32:f9:e4:5e:e4:ae brd ff:ff:ff:ff:ff:ff link-netnsid 0
7: veth0@if6: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc noqueue master br0 state UP group default
link/ether 52:65:8a:62:c2:b0 brd ff:ff:ff:ff:ff:ff link-netnsid 1
62: veth7@if61: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc noqueue master br0 state UP group default
link/ether ee:55:4f:3f:9f:ed brd ff:ff:ff:ff:ff:ff link-netnsid 2
上图中的5: vxlan0@if53是Vxlan的VTEP(VXLAN Tunnel Endpoints),它与Centos7-18和Centos7-22上的VTEP组成跨主机的ingress Overlay网络。每个主机内的VTEP均接入br0,通过br0将ingress外部入口和内部容器接入ingress网络,从而能够将访问IPVS的外部请求负载均衡到多个节点的容器(Real server)上
[root@centos7-10 ~]# ip netns exec 1-9j40lm9s5d ip -d link show type vxlan
5: vxlan0@if5: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc noqueue master br0 state UNKNOWN mode DEFAULT group default
link/ether 32:f9:e4:5e:e4:ae brd ff:ff:ff:ff:ff:ff link-netnsid 0 promiscuity 1
vxlan id 4096 srcport 0 0 dstport 4789 proxy l2miss l3miss ageing 300 noudpcsum noudp6zerocsumtx noudp6zerocsumrx
bridge_slave state forwarding priority 32 cost 100 hairpin off guard off root_block off fastleave off learning on flood on port_id 0x8001 port_no 0x1 designated_port 32769 designated_cost 0 designated_bridge 8000.32:f9:e4:5e:e4:ae designated_root 8000.32:f9:e4:5e:e4:ae hold_timer 0.00 message_age_timer 0.00 forward_delay_timer 0.00 topology_change_ack 0 config_pending 0 proxy_arp off proxy_arp_wifi off mcast_router 1 mcast_fast_leave off mcast_flood on addrgenmode eui64 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535
[root@centos7-10 ~]# ip netns exec 1-9j40lm9s5d bridge -d fdb show dev vxlan0
32:f9:e4:5e:e4:ae master br0 permanent
// Centos7-18节点ingress_sbox下eh0(10.0.0.3)的mac
02:42:0a:00:00:03 dst 10.211.55.18 link-netnsid 0 self permanent
// Centos7-18节点容器7fd6d0335ad4的eh0(10.0.0.20)的mac
02:42:0a:00:00:14 dst 10.211.55.18 link-netnsid 0 self permanent
02:42:0a:00:00:04 dst 10.211.55.22 link-netnsid 0 self permanent
02:42:0a:00:00:15 dst 10.211.55.22 link-netnsid 0 self permanent
验证流程:curl http://10.211.55.10:9090 --> 10.211.55.90:9090 --> 172.18.0.2:9090 --> IPVS --> 1-9j40lm9s5d下br0 --> 当前节点的容器(real server)
验证结果:
jinshideMacBook-Pro-2:~ shijin$ curl http://10.211.55.10:9090/index1.html
// 容器内nginx服务显示结果,显示宿主机IP
Host IP is 10.211.55.10
jinshideMacBook-Pro-2:~ shijin$
// 抓1-9j40lm9s5d下br0
[root@centos7-10 ~]# ip netns exec 1-9j40lm9s5d tcpdump -nnnv -i br0
tcpdump: listening on br0, link-type EN10MB (Ethernet), capture size 262144 bytes
15:04:21.151015 IP (tos 0x0, ttl 63, id 0, offset 0, flags [DF], proto TCP (6), length 64)
10.0.0.2.55022 > 10.0.0.17.9090: Flags [SEW], cksum 0xa206 (correct), seq 2680037566, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 1340372305 ecr 0,sackOK,eol], length 0
15:04:21.151137 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60)
10.0.0.17.9090 > 10.0.0.2.55022: Flags [S.E], cksum 0x1441 (incorrect -> 0x4cd5), seq 2459281603, ack 2680037567, win 27960, options [mss 1410,sackOK,TS val 120829713 ecr 1340372305,nop,wscale 7], length 0
// 抓1-9j40lm9s5d下veth7
[root@centos7-10 ~]# ip netns exec 1-9j40lm9s5d tcpdump -nnnv -i veth7
tcpdump: listening on veth7, link-type EN10MB (Ethernet), capture size 262144 bytes
15:04:21.151032 IP (tos 0x0, ttl 63, id 0, offset 0, flags [DF], proto TCP (6), length 64)
10.0.0.2.55022 > 10.0.0.17.9090: Flags [SEW], cksum 0xa206 (correct), seq 2680037566, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 1340372305 ecr 0,sackOK,eol], length 0
15:04:21.151137 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60)
10.0.0.17.9090 > 10.0.0.2.55022: Flags [S.E], cksum 0x1441 (incorrect -> 0x4cd5), seq 2459281603, ack 2680037567, win 27960, options [mss 1410,sackOK,TS val 120829713 ecr 1340372305,nop,wscale 7], length 0
验证流程:curl http://10.211.55.10:9090 --> 10.211.55.90:9090 --> 172.18.0.2:9090 --> IPVS --> 1-9j40lm9s5d下br0 --> vxlan0(Vxlan VTEP)–> 其它节点的vxlan0(Vxlan VTEP)–> 其它节点的容器(real server)
验证结果:
jinshideMacBook-Pro-2:~ shijin$ curl http://10.211.55.10:9090/index1.html
// 容器内nginx服务显示结果,显示宿主机IP
Host IP is 10.211.55.18
[root@centos7-10 ~]# ip netns exec 1-9j40lm9s5d tcpdump -nnnv -i br0
tcpdump: listening on br0, link-type EN10MB (Ethernet), capture size 262144 bytes
15:40:51.450406 IP (tos 0x0, ttl 63, id 0, offset 0, flags [DF], proto TCP (6), length 64)
10.0.0.2.60559 > 10.0.0.20.9090: Flags [SEW], cksum 0x44b4 (correct), seq 3572036980, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 1342550781 ecr 0,sackOK,eol], length 0
15:40:51.450845 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60)
10.0.0.20.9090 > 10.0.0.2.60559: Flags [S.E], cksum 0x0fd9 (correct), seq 499769354, ack 3572036981, win 27960, options [mss 1410,sackOK,TS val 122999839 ecr 1342550781,nop,wscale 7], length 0
[root@centos7-10 ~]# ip netns exec 1-9j40lm9s5d tcpdump -nnnv -i vxlan0
tcpdump: listening on vxlan0, link-type EN10MB (Ethernet), capture size 262144 bytes
15:40:51.450418 IP (tos 0x0, ttl 63, id 0, offset 0, flags [DF], proto TCP (6), length 64)
10.0.0.2.60559 > 10.0.0.20.9090: Flags [SEW], cksum 0x44b4 (correct), seq 3572036980, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 1342550781 ecr 0,sackOK,eol], length 0
15:40:51.450845 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60)
10.0.0.20.9090 > 10.0.0.2.60559: Flags [S.E], cksum 0x0fd9 (correct), seq 499769354, ack 3572036981, win 27960, options [mss 1410,sackOK,TS val 122999839 ecr 1342550781,nop,wscale 7], length 0
// 在centos7-18 上抓enp0s5的UDP包
[root@centos7-18 ~]# tcpdump 'udp[39]=6 and (udp[42:4]=0x0a000002 or udp[42:4]=0x0a000014)' -nnnv -e -i enp0s5
tcpdump: listening on enp0s5, link-type EN10MB (Ethernet), capture size 262144 bytes
15:40:51.444134 00:1c:42:ae:b6:41 > 00:1c:42:60:87:b2, ethertype IPv4 (0x0800), length 128: (tos 0x0, ttl 64, id 44918, offset 0, flags [none], proto UDP (17), length 114)
10.211.55.10.45803 > 10.211.55.18.4789: VXLAN, flags [I] (0x08), vni 4096
02:42:0a:00:00:02 > 02:42:0a:00:00:14, ethertype IPv4 (0x0800), length 78: (tos 0x0, ttl 63, id 0, offset 0, flags [DF], proto TCP (6), length 64)
10.0.0.2.60559 > 10.0.0.20.9090: Flags [SEW], cksum 0x44b4 (correct), seq 3572036980, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 1342550781 ecr 0,sackOK,eol], length 0
15:40:51.444366 00:1c:42:60:87:b2 > 00:1c:42:ae:b6:41, ethertype IPv4 (0x0800), length 124: (tos 0x0, ttl 64, id 31402, offset 0, flags [none], proto UDP (17), length 110)
10.211.55.18.38329 > 10.211.55.10.4789: VXLAN, flags [I] (0x08), vni 4096
02:42:0a:00:00:14 > 02:42:0a:00:00:02, ethertype IPv4 (0x0800), length 74: (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60)
10.0.0.20.9090 > 10.0.0.2.60559: Flags [S.E], cksum 0x0fd9 (correct), seq 499769354, ack 3572036981, win 27960, options [mss 1410,sackOK,TS val 122999839 ecr 1342550781,nop,wscale 7], length 0
[root@centos7-18 ~]# ip netns exec 1-9j40lm9s5d tcpdump -nnnv -i vxlan0
tcpdump: listening on vxlan0, link-type EN10MB (Ethernet), capture size 262144 bytes
15:40:51.444193 IP (tos 0x0, ttl 63, id 0, offset 0, flags [DF], proto TCP (6), length 64)
10.0.0.2.60559 > 10.0.0.20.9090: Flags [SEW], cksum 0x44b4 (correct), seq 3572036980, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 1342550781 ecr 0,sackOK,eol], length 0
15:40:51.444351 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60)
10.0.0.20.9090 > 10.0.0.2.60559: Flags [S.E], cksum 0x1444 (incorrect -> 0x0fd9), seq 499769354, ack 3572036981, win 27960, options [mss 1410,sackOK,TS val 122999839 ecr 1342550781,nop,wscale 7], length 0
[root@centos7-18 ~]# ip netns exec 7fd6d0335ad4 iptables -L -nv -t nat
Chain PREROUTING (policy ACCEPT 0 packets, 0 bytes)
pkts bytes target prot opt in out source destination
21 1344 REDIRECT tcp -- * * 0.0.0.0/0 10.0.0.20 tcp dpt:9090 redir ports 80
......
[root@centos7-18 ~]# ip netns exec 7fd6d0335ad4 iptables -L -nv -t filter
Chain INPUT (policy ACCEPT 0 packets, 0 bytes)
pkts bytes target prot opt in out source destination
147 9828 ACCEPT tcp -- * * 0.0.0.0/0 10.0.0.20 tcp dpt:80 ctstate NEW,ESTABLISHED
0 0 DROP sctp -- * * 0.0.0.0/0 10.0.0.20
0 0 DROP udp -- * * 0.0.0.0/0 10.0.0.20
0 0 DROP tcp -- * * 0.0.0.0/0 10.0.0.20
Chain FORWARD (policy ACCEPT 0 packets, 0 bytes)
pkts bytes target prot opt in out source destination
Chain OUTPUT (policy ACCEPT 0 packets, 0 bytes)
pkts bytes target prot opt in out source destination
105 11088 ACCEPT tcp -- * * 10.0.0.20 0.0.0.0/0 tcp spt:80 ctstate ESTABLISHED
0 0 DROP sctp -- * * 10.0.0.20 0.0.0.0/0
0 0 DROP udp -- * * 10.0.0.20 0.0.0.0/0
0 0 DROP tcp -- * * 10.0.0.20 0.0.0.0/0
验证流程:curl http://10.211.55.10:9090 --> 10.211.55.90:9090 --> 172.18.0.2:9090 --> IPVS --> 1-9j40lm9s5d下br0 --> vxlan0(Vxlan VTEP)–> 节点centos7-18的vxlan0(Vxlan VTEP)–> 节点centos7-18下的容器eth0
验证结果:
jinshideMacBook-Pro-2:~ shijin$ curl http://10.211.55.10:9090/index1.html
Host IP is 10.211.55.18
[root@centos7-18 ~]# ip netns exec 7fd6d0335ad4 tcpdump -nnnv -i eth0
tcpdump: listening on eth0, link-type EN10MB (Ethernet), capture size 262144 bytes
16:49:55.214784 IP (tos 0x0, ttl 63, id 0, offset 0, flags [DF], proto TCP (6), length 64)
10.0.0.2.54718 > 10.0.0.20.9090: Flags [SEW], cksum 0x7148 (correct), seq 2186891382, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 1346671753 ecr 0,sackOK,eol], length 0
16:49:55.214825 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60)
10.0.0.20.9090 > 10.0.0.2.54718: Flags [S.E], cksum 0x1444 (incorrect -> 0x5a11), seq 589922860, ack 2186891383, win 27960, options [mss 1410,sackOK,TS val 127143610 ecr 1346671753,nop,wscale 7], length 0
1. Swarm集群通过IPVS和Overlay网络实现负载均衡。IPVS采用NAT模式对创建服务的所有容器进行负载均衡;VXLAN负责组建Overlay网络,实现跨主机之间的网络通讯。
2. Swarm集群的每个节点,都实现了IPVS的逻辑,都接入了Swarm服务创建的所有容器作为Real Server。