走过路过的运维大佬,docker swarm 熟悉的也来看看

2021-10-14 17:31:29 +08:00
 zxkxhnqwe123

公司的开发测试环境想部署个 docker 集群, k8s 不会 ,k3s 更不会.

目前搭配的组合是 esxi 作为基础系统 , 虚拟出多个 centos8 的系统 ,centos8 安装 docker swarm 集群, 出现了一个问题!! docker swarm 部署服务完成后,跨主机容器内都能正常 ping 但是 宿主机去访问 docker 开放的端口 访问三次只有一次成功. 具体如下:

公司路由器网关 10.0.0.1

1. server-01 10.0.0.21 (manage)
2. server-02 10.0.0.22
3. server-03 10.0.0.23




### 防火墙全部关闭 只有 iptable
server-01 $ docker swarm init --default-addr-pool 192.0.0.0/24
server-02 $ docker swarm join
server-03 $ docker swarm join







### 
server-01 $ docker node ls
ID                            HOSTNAME    STATUS    AVAILABILITY   MANAGER STATUS   ENGINE VERSION
km7dmxn402qt0s473kpqb47ac *   Server-01   Ready     Active         Leader           20.10.9
k5vq74oh1njscvv4mf9gpyogh     Server-02   Ready     Active                          20.10.9
rxzmo276saehmh1rc118fdxxe     Server-03   Ready     Active                          20.10.9
 





### 网络状态如下
server-01 $ docker network inspect ingress
[
    {
        "Name": "ingress",
        "Id": "m7ia7lmmlu1zm0zchr13ohk4q",
        "Created": "2021-10-14T15:08:48.036907446+08:00",
        "Scope": "swarm",
        "Driver": "overlay",
        "EnableIPv6": false,
        "IPAM": {
            "Driver": "default",
            "Options": null,
            "Config": [
                {
                    "Subnet": "192.0.0.0/24",
                    "Gateway": "192.0.0.1"
                }
            ]
        },
        "Internal": false,
        "Attachable": false,
        "Ingress": true,
        "ConfigFrom": {
            "Network": ""
        },
        "ConfigOnly": false,
        "Containers": {
            "ingress-sbox": {
                "Name": "ingress-endpoint",
                "EndpointID": "4b5146ca8e180dd88a5271b7d29b439f6d5995801a47d8c648379d9b51ab0b77",
                "MacAddress": "02:42:c0:00:00:02",
                "IPv4Address": "192.0.0.2/24",
                "IPv6Address": ""
            }
        },
        "Options": {
            "com.docker.network.driver.overlay.vxlanid_list": "4096"
        },
        "Labels": {},
        "Peers": [
            {
                "Name": "6ebb8868ac00",
                "IP": "10.0.0.21"
            },
            {
                "Name": "7982d5a14bf2",
                "IP": "10.0.0.22"
            },
            {
                "Name": "b25e17d118a4",
                "IP": "10.0.0.23"
            }
        ]
    }
]








server-01 $ docker network inspect docker_gwbridge
[
    {
        "Name": "docker_gwbridge",
        "Id": "6f2d03207e884bfec1918d4e8fc1a1f5f14ec9e5bcd71fd409a26630ab73d413",
        "Created": "2021-10-14T15:08:48.422229208+08:00",
        "Scope": "local",
        "Driver": "bridge",
        "EnableIPv6": false,
        "IPAM": {
            "Driver": "default",
            "Options": null,
            "Config": [
                {
                    "Subnet": "172.18.0.0/16",
                    "Gateway": "172.18.0.1"
                }
            ]
        },
        "Internal": false,
        "Attachable": false,
        "Ingress": false,
        "ConfigFrom": {
            "Network": ""
        },
        "ConfigOnly": false,
        "Containers": {
            "ingress-sbox": {
                "Name": "gateway_ingress-sbox",
                "EndpointID": "1c4c1b5ba462d87832710029171c3911df457c950055a369670f59cef374247b",
                "MacAddress": "02:42:ac:12:00:02",
                "IPv4Address": "172.18.0.2/16",
                "IPv6Address": ""
            }
        },
        "Options": {
            "com.docker.network.bridge.enable_icc": "false",
            "com.docker.network.bridge.enable_ip_masquerade": "true",
            "com.docker.network.bridge.name": "docker_gwbridge"
        },
        "Labels": {}
    }
]




### 创建 nginx 服务
server-01 $ docker service create --replicas 3 -p 80:80 --name nginx nginx

server-01 $ docker service ps nginx
ID             NAME      IMAGE          NODE        DESIRED STATE   CURRENT STATE           ERROR     PORTS
xsomsqqtkr62   nginx.1  nginx:latest   Server-02   Running         Running 2 minutes ago             
selbdoapjek0   nginx.2   nginx:latest   Server-03   Running         Running 2 minutes ago             
w5bigfn8xtz4   nginx.3   nginx:latest   Server-01   Running         Running 2 minutes ago             




server-01 $ docker service ls
ID             NAME      MODE         REPLICAS   IMAGE          PORTS
ro33x7v9ceri   nginx     replicated   3/3        nginx:latest   *:80->80/tcp





server-01 $ docker ps -a
CONTAINER ID   IMAGE          COMMAND                  CREATED         STATUS         PORTS     NAMES
37de9b7759c9   nginx:latest   "/docker-entrypoint.…"   5 minutes ago   Up 5 minutes   80/tcp    nginx.3.w5bigfn8xtz4pi10hoe62gi4b









···························································








## 重点来了!!!
[root@Server-01 ~]# curl 10.0.0.21 --卡住
^C
[root@Server-01 ~]# curl 10.0.0.21 --卡住
^C
[root@Server-01 ~]# curl 10.0.0.21 --三次成功一次
<!DOCTYPE html>
<html>
<head>
<title>Welcome to nginx!</title>
<style>
html { color-scheme: light dark; }
body { width: 35em; margin: 0 auto;
font-family: Tahoma, Verdana, Arial, sans-serif; }
</style>
</head>
<body>
<h1>Welcome to nginx!</h1>
<p>If you see this page, the nginx web server is successfully installed and
working. Further configuration is required.</p>

<p>For online documentation and support please refer to
<a href="http://nginx.org/">nginx.org</a>.<br/>
Commercial support is available at
<a href="http://nginx.com/">nginx.com</a>.</p>

<p><em>Thank you for using nginx.</em></p>
</body>
</html>






[root@Server-01 ~]# netstat -tunlp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address           Foreign Address         State       PID/Program name    
tcp        0      0 192.168.122.1:53        0.0.0.0:*               LISTEN      1740/dnsmasq        
tcp        0      0 0.0.0.0:22              0.0.0.0:*               LISTEN      1068/sshd           
tcp        0      0 0.0.0.0:111             0.0.0.0:*               LISTEN      1/systemd           
tcp6       0      0 :::22                   :::*                    LISTEN      1068/sshd           
tcp6       0      0 :::2377                 :::*                    LISTEN      1222/dockerd        
tcp6       0      0 :::7946                 :::*                    LISTEN      1222/dockerd        
tcp6       0      0 :::111                  :::*                    LISTEN      1/systemd           
tcp6       0      0 :::80                   :::*                    LISTEN      1222/dockerd        
udp        0      0 192.168.122.1:53        0.0.0.0:*                           1740/dnsmasq        
udp        0      0 0.0.0.0:67              0.0.0.0:*                           1740/dnsmasq        
udp        0      0 0.0.0.0:111             0.0.0.0:*                           1/systemd           
udp        0      0 0.0.0.0:4789            0.0.0.0:*                           -                   
udp6       0      0 :::7946                 :::*                                1222/dockerd        
udp6       0      0 :::111                  :::*                                1/systemd  






[root@Server-01 ~]# iptables -nL --line-number
Chain INPUT (policy ACCEPT)
num  target     prot opt source               destination         
1    LIBVIRT_INP  all  --  0.0.0.0/0            0.0.0.0/0           

Chain FORWARD (policy DROP)
num  target     prot opt source               destination         
1    DOCKER-USER  all  --  0.0.0.0/0            0.0.0.0/0           
2    DOCKER-INGRESS  all  --  0.0.0.0/0            0.0.0.0/0           
3    DOCKER-ISOLATION-STAGE-1  all  --  0.0.0.0/0            0.0.0.0/0           
4    ACCEPT     all  --  0.0.0.0/0            0.0.0.0/0            ctstate RELATED,ESTABLISHED
5    DOCKER     all  --  0.0.0.0/0            0.0.0.0/0           
6    ACCEPT     all  --  0.0.0.0/0            0.0.0.0/0           
7    ACCEPT     all  --  0.0.0.0/0            0.0.0.0/0            ctstate RELATED,ESTABLISHED
8    DOCKER     all  --  0.0.0.0/0            0.0.0.0/0           
9    ACCEPT     all  --  0.0.0.0/0            0.0.0.0/0           
10   ACCEPT     all  --  0.0.0.0/0            0.0.0.0/0           
11   LIBVIRT_FWX  all  --  0.0.0.0/0            0.0.0.0/0           
12   LIBVIRT_FWI  all  --  0.0.0.0/0            0.0.0.0/0           
13   LIBVIRT_FWO  all  --  0.0.0.0/0            0.0.0.0/0           
14   DROP       all  --  0.0.0.0/0            0.0.0.0/0           

Chain OUTPUT (policy ACCEPT)
num  target     prot opt source               destination         
1    LIBVIRT_OUT  all  --  0.0.0.0/0            0.0.0.0/0           

Chain LIBVIRT_INP (1 references)
num  target     prot opt source               destination         
1    ACCEPT     udp  --  0.0.0.0/0            0.0.0.0/0            udp dpt:53
2    ACCEPT     tcp  --  0.0.0.0/0            0.0.0.0/0            tcp dpt:53
3    ACCEPT     udp  --  0.0.0.0/0            0.0.0.0/0            udp dpt:67
4    ACCEPT     tcp  --  0.0.0.0/0            0.0.0.0/0            tcp dpt:67

Chain LIBVIRT_OUT (1 references)
num  target     prot opt source               destination         
1    ACCEPT     udp  --  0.0.0.0/0            0.0.0.0/0            udp dpt:53
2    ACCEPT     tcp  --  0.0.0.0/0            0.0.0.0/0            tcp dpt:53
3    ACCEPT     udp  --  0.0.0.0/0            0.0.0.0/0            udp dpt:68
4    ACCEPT     tcp  --  0.0.0.0/0            0.0.0.0/0            tcp dpt:68

Chain LIBVIRT_FWO (1 references)
num  target     prot opt source               destination         
1    ACCEPT     all  --  192.168.122.0/24     0.0.0.0/0           
2    REJECT     all  --  0.0.0.0/0            0.0.0.0/0            reject-with icmp-port-unreachable

Chain LIBVIRT_FWI (1 references)
num  target     prot opt source               destination         
1    ACCEPT     all  --  0.0.0.0/0            192.168.122.0/24     ctstate RELATED,ESTABLISHED
2    REJECT     all  --  0.0.0.0/0            0.0.0.0/0            reject-with icmp-port-unreachable

Chain LIBVIRT_FWX (1 references)
num  target     prot opt source               destination         
1    ACCEPT     all  --  0.0.0.0/0            0.0.0.0/0           

Chain DOCKER (2 references)
num  target     prot opt source               destination         

Chain DOCKER-ISOLATION-STAGE-1 (1 references)
num  target     prot opt source               destination         
1    DOCKER-ISOLATION-STAGE-2  all  --  0.0.0.0/0            0.0.0.0/0           
2    DOCKER-ISOLATION-STAGE-2  all  --  0.0.0.0/0            0.0.0.0/0           
3    RETURN     all  --  0.0.0.0/0            0.0.0.0/0           

Chain DOCKER-ISOLATION-STAGE-2 (2 references)
num  target     prot opt source               destination         
1    DROP       all  --  0.0.0.0/0            0.0.0.0/0           
2    DROP       all  --  0.0.0.0/0            0.0.0.0/0           
3    RETURN     all  --  0.0.0.0/0            0.0.0.0/0           

Chain DOCKER-USER (1 references)
num  target     prot opt source               destination         
1    RETURN     all  --  0.0.0.0/0            0.0.0.0/0           

Chain DOCKER-INGRESS (1 references)
num  target     prot opt source               destination         
1    ACCEPT     tcp  --  0.0.0.0/0            0.0.0.0/0            tcp dpt:80
2    ACCEPT     tcp  --  0.0.0.0/0            0.0.0.0/0            state RELATED,ESTABLISHED tcp spt:80
3    RETURN     all  --  0.0.0.0/0            0.0.0.0/0        

3914 次点击
所在节点    Linux
24 条回复
mepwang
2021-10-18 11:25:35 +08:00
看不出来什么问题,curl 调用三次成功一次,会不会和你的副本数量有关系?
能给的建议不多,
你把 replica 的数目改成 4 个或 2 个,看看 curl 调用成功的几率是不是变成 4 次或者 2 次成功一次。
感觉是你的 swarm 集群有点问题,直觉上是网络转发这块。
你给你的应用添加一个 overlay network 试试看?
zxkxhnqwe123
2022-01-30 15:42:43 +08:00
终于解决了 !!!! 放假花了两天时间解决了,也当学习了 . 这两天重装了 不下 20 次 ,3 台虚拟机 不停重启,重装.
原理就是开启 esxi 网卡的混杂模式 , 网卡用 E1000e (这个其实不太确定,不想验证了). 然后确认下 swarm 网关和局域网的网关是否冲突了. 这些做完就是圆满结束

感谢以上的朋友帮忙!!!

判断依据 https://stackoverflow.com/questions/59007780/container-running-on-docker-swarm-not-accessible-from-outside
isnullstring
129 天前
@zxkxhnqwe123 #22 回来留个脚印
我的情况跟楼主一样,先是确认 swarm 网关,默认是 10.0.0.0 ,跟现有一致的话肯定是不行的
环境 :exsi 6.7 + ubuntu 22.04 + 10.0.0.0
完整解决办法:
1 、虚拟交换机 开混杂模式
2 、必须修改虚拟机网卡类型,E1000
3 、初始化集群时指定 IP 段,注意 stackoverflow 中的回答
-------------------------------------------------swarm 网段---------------------通讯 IP
docker swarm init --default-addr-pool 11.0.0.0/8 --advertise-addr 10.0.1.137
isnullstring
129 天前
@isnullstring #23 还有个奇怪现象,只有 1 个管理节点和 1 个工作节点时候就没毛病,但是通过管理节点无法访问工作节点的端口,第二个节点一加进来就凉

这是一个专为移动设备优化的页面(即为了让你能够在 Google 搜索结果里秒开这个页面),如果你希望参与 V2EX 社区的讨论,你可以继续到 V2EX 上打开本讨论主题的完整版本。

https://www.v2ex.com/t/807854

V2EX 是创意工作者们的社区,是一个分享自己正在做的有趣事物、交流想法,可以遇见新朋友甚至新机会的地方。

V2EX is a community of developers, designers and creative people.

© 2021 V2EX