走过路过的运维大佬,docker swarm 熟悉的也来看看

2021-10-14 17:31:29 +08:00
 zxkxhnqwe123

公司的开发测试环境想部署个 docker 集群, k8s 不会 ,k3s 更不会.

目前搭配的组合是 esxi 作为基础系统 , 虚拟出多个 centos8 的系统 ,centos8 安装 docker swarm 集群, 出现了一个问题!! docker swarm 部署服务完成后,跨主机容器内都能正常 ping 但是 宿主机去访问 docker 开放的端口 访问三次只有一次成功. 具体如下:

公司路由器网关 10.0.0.1

1. server-01 10.0.0.21 (manage)
2. server-02 10.0.0.22
3. server-03 10.0.0.23




### 防火墙全部关闭 只有 iptable
server-01 $ docker swarm init --default-addr-pool 192.0.0.0/24
server-02 $ docker swarm join
server-03 $ docker swarm join







### 
server-01 $ docker node ls
ID                            HOSTNAME    STATUS    AVAILABILITY   MANAGER STATUS   ENGINE VERSION
km7dmxn402qt0s473kpqb47ac *   Server-01   Ready     Active         Leader           20.10.9
k5vq74oh1njscvv4mf9gpyogh     Server-02   Ready     Active                          20.10.9
rxzmo276saehmh1rc118fdxxe     Server-03   Ready     Active                          20.10.9
 





### 网络状态如下
server-01 $ docker network inspect ingress
[
    {
        "Name": "ingress",
        "Id": "m7ia7lmmlu1zm0zchr13ohk4q",
        "Created": "2021-10-14T15:08:48.036907446+08:00",
        "Scope": "swarm",
        "Driver": "overlay",
        "EnableIPv6": false,
        "IPAM": {
            "Driver": "default",
            "Options": null,
            "Config": [
                {
                    "Subnet": "192.0.0.0/24",
                    "Gateway": "192.0.0.1"
                }
            ]
        },
        "Internal": false,
        "Attachable": false,
        "Ingress": true,
        "ConfigFrom": {
            "Network": ""
        },
        "ConfigOnly": false,
        "Containers": {
            "ingress-sbox": {
                "Name": "ingress-endpoint",
                "EndpointID": "4b5146ca8e180dd88a5271b7d29b439f6d5995801a47d8c648379d9b51ab0b77",
                "MacAddress": "02:42:c0:00:00:02",
                "IPv4Address": "192.0.0.2/24",
                "IPv6Address": ""
            }
        },
        "Options": {
            "com.docker.network.driver.overlay.vxlanid_list": "4096"
        },
        "Labels": {},
        "Peers": [
            {
                "Name": "6ebb8868ac00",
                "IP": "10.0.0.21"
            },
            {
                "Name": "7982d5a14bf2",
                "IP": "10.0.0.22"
            },
            {
                "Name": "b25e17d118a4",
                "IP": "10.0.0.23"
            }
        ]
    }
]








server-01 $ docker network inspect docker_gwbridge
[
    {
        "Name": "docker_gwbridge",
        "Id": "6f2d03207e884bfec1918d4e8fc1a1f5f14ec9e5bcd71fd409a26630ab73d413",
        "Created": "2021-10-14T15:08:48.422229208+08:00",
        "Scope": "local",
        "Driver": "bridge",
        "EnableIPv6": false,
        "IPAM": {
            "Driver": "default",
            "Options": null,
            "Config": [
                {
                    "Subnet": "172.18.0.0/16",
                    "Gateway": "172.18.0.1"
                }
            ]
        },
        "Internal": false,
        "Attachable": false,
        "Ingress": false,
        "ConfigFrom": {
            "Network": ""
        },
        "ConfigOnly": false,
        "Containers": {
            "ingress-sbox": {
                "Name": "gateway_ingress-sbox",
                "EndpointID": "1c4c1b5ba462d87832710029171c3911df457c950055a369670f59cef374247b",
                "MacAddress": "02:42:ac:12:00:02",
                "IPv4Address": "172.18.0.2/16",
                "IPv6Address": ""
            }
        },
        "Options": {
            "com.docker.network.bridge.enable_icc": "false",
            "com.docker.network.bridge.enable_ip_masquerade": "true",
            "com.docker.network.bridge.name": "docker_gwbridge"
        },
        "Labels": {}
    }
]




### 创建 nginx 服务
server-01 $ docker service create --replicas 3 -p 80:80 --name nginx nginx

server-01 $ docker service ps nginx
ID             NAME      IMAGE          NODE        DESIRED STATE   CURRENT STATE           ERROR     PORTS
xsomsqqtkr62   nginx.1  nginx:latest   Server-02   Running         Running 2 minutes ago             
selbdoapjek0   nginx.2   nginx:latest   Server-03   Running         Running 2 minutes ago             
w5bigfn8xtz4   nginx.3   nginx:latest   Server-01   Running         Running 2 minutes ago             




server-01 $ docker service ls
ID             NAME      MODE         REPLICAS   IMAGE          PORTS
ro33x7v9ceri   nginx     replicated   3/3        nginx:latest   *:80->80/tcp





server-01 $ docker ps -a
CONTAINER ID   IMAGE          COMMAND                  CREATED         STATUS         PORTS     NAMES
37de9b7759c9   nginx:latest   "/docker-entrypoint.…"   5 minutes ago   Up 5 minutes   80/tcp    nginx.3.w5bigfn8xtz4pi10hoe62gi4b









···························································








## 重点来了!!!
[root@Server-01 ~]# curl 10.0.0.21 --卡住
^C
[root@Server-01 ~]# curl 10.0.0.21 --卡住
^C
[root@Server-01 ~]# curl 10.0.0.21 --三次成功一次
<!DOCTYPE html>
<html>
<head>
<title>Welcome to nginx!</title>
<style>
html { color-scheme: light dark; }
body { width: 35em; margin: 0 auto;
font-family: Tahoma, Verdana, Arial, sans-serif; }
</style>
</head>
<body>
<h1>Welcome to nginx!</h1>
<p>If you see this page, the nginx web server is successfully installed and
working. Further configuration is required.</p>

<p>For online documentation and support please refer to
<a href="http://nginx.org/">nginx.org</a>.<br/>
Commercial support is available at
<a href="http://nginx.com/">nginx.com</a>.</p>

<p><em>Thank you for using nginx.</em></p>
</body>
</html>






[root@Server-01 ~]# netstat -tunlp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address           Foreign Address         State       PID/Program name    
tcp        0      0 192.168.122.1:53        0.0.0.0:*               LISTEN      1740/dnsmasq        
tcp        0      0 0.0.0.0:22              0.0.0.0:*               LISTEN      1068/sshd           
tcp        0      0 0.0.0.0:111             0.0.0.0:*               LISTEN      1/systemd           
tcp6       0      0 :::22                   :::*                    LISTEN      1068/sshd           
tcp6       0      0 :::2377                 :::*                    LISTEN      1222/dockerd        
tcp6       0      0 :::7946                 :::*                    LISTEN      1222/dockerd        
tcp6       0      0 :::111                  :::*                    LISTEN      1/systemd           
tcp6       0      0 :::80                   :::*                    LISTEN      1222/dockerd        
udp        0      0 192.168.122.1:53        0.0.0.0:*                           1740/dnsmasq        
udp        0      0 0.0.0.0:67              0.0.0.0:*                           1740/dnsmasq        
udp        0      0 0.0.0.0:111             0.0.0.0:*                           1/systemd           
udp        0      0 0.0.0.0:4789            0.0.0.0:*                           -                   
udp6       0      0 :::7946                 :::*                                1222/dockerd        
udp6       0      0 :::111                  :::*                                1/systemd  






[root@Server-01 ~]# iptables -nL --line-number
Chain INPUT (policy ACCEPT)
num  target     prot opt source               destination         
1    LIBVIRT_INP  all  --  0.0.0.0/0            0.0.0.0/0           

Chain FORWARD (policy DROP)
num  target     prot opt source               destination         
1    DOCKER-USER  all  --  0.0.0.0/0            0.0.0.0/0           
2    DOCKER-INGRESS  all  --  0.0.0.0/0            0.0.0.0/0           
3    DOCKER-ISOLATION-STAGE-1  all  --  0.0.0.0/0            0.0.0.0/0           
4    ACCEPT     all  --  0.0.0.0/0            0.0.0.0/0            ctstate RELATED,ESTABLISHED
5    DOCKER     all  --  0.0.0.0/0            0.0.0.0/0           
6    ACCEPT     all  --  0.0.0.0/0            0.0.0.0/0           
7    ACCEPT     all  --  0.0.0.0/0            0.0.0.0/0            ctstate RELATED,ESTABLISHED
8    DOCKER     all  --  0.0.0.0/0            0.0.0.0/0           
9    ACCEPT     all  --  0.0.0.0/0            0.0.0.0/0           
10   ACCEPT     all  --  0.0.0.0/0            0.0.0.0/0           
11   LIBVIRT_FWX  all  --  0.0.0.0/0            0.0.0.0/0           
12   LIBVIRT_FWI  all  --  0.0.0.0/0            0.0.0.0/0           
13   LIBVIRT_FWO  all  --  0.0.0.0/0            0.0.0.0/0           
14   DROP       all  --  0.0.0.0/0            0.0.0.0/0           

Chain OUTPUT (policy ACCEPT)
num  target     prot opt source               destination         
1    LIBVIRT_OUT  all  --  0.0.0.0/0            0.0.0.0/0           

Chain LIBVIRT_INP (1 references)
num  target     prot opt source               destination         
1    ACCEPT     udp  --  0.0.0.0/0            0.0.0.0/0            udp dpt:53
2    ACCEPT     tcp  --  0.0.0.0/0            0.0.0.0/0            tcp dpt:53
3    ACCEPT     udp  --  0.0.0.0/0            0.0.0.0/0            udp dpt:67
4    ACCEPT     tcp  --  0.0.0.0/0            0.0.0.0/0            tcp dpt:67

Chain LIBVIRT_OUT (1 references)
num  target     prot opt source               destination         
1    ACCEPT     udp  --  0.0.0.0/0            0.0.0.0/0            udp dpt:53
2    ACCEPT     tcp  --  0.0.0.0/0            0.0.0.0/0            tcp dpt:53
3    ACCEPT     udp  --  0.0.0.0/0            0.0.0.0/0            udp dpt:68
4    ACCEPT     tcp  --  0.0.0.0/0            0.0.0.0/0            tcp dpt:68

Chain LIBVIRT_FWO (1 references)
num  target     prot opt source               destination         
1    ACCEPT     all  --  192.168.122.0/24     0.0.0.0/0           
2    REJECT     all  --  0.0.0.0/0            0.0.0.0/0            reject-with icmp-port-unreachable

Chain LIBVIRT_FWI (1 references)
num  target     prot opt source               destination         
1    ACCEPT     all  --  0.0.0.0/0            192.168.122.0/24     ctstate RELATED,ESTABLISHED
2    REJECT     all  --  0.0.0.0/0            0.0.0.0/0            reject-with icmp-port-unreachable

Chain LIBVIRT_FWX (1 references)
num  target     prot opt source               destination         
1    ACCEPT     all  --  0.0.0.0/0            0.0.0.0/0           

Chain DOCKER (2 references)
num  target     prot opt source               destination         

Chain DOCKER-ISOLATION-STAGE-1 (1 references)
num  target     prot opt source               destination         
1    DOCKER-ISOLATION-STAGE-2  all  --  0.0.0.0/0            0.0.0.0/0           
2    DOCKER-ISOLATION-STAGE-2  all  --  0.0.0.0/0            0.0.0.0/0           
3    RETURN     all  --  0.0.0.0/0            0.0.0.0/0           

Chain DOCKER-ISOLATION-STAGE-2 (2 references)
num  target     prot opt source               destination         
1    DROP       all  --  0.0.0.0/0            0.0.0.0/0           
2    DROP       all  --  0.0.0.0/0            0.0.0.0/0           
3    RETURN     all  --  0.0.0.0/0            0.0.0.0/0           

Chain DOCKER-USER (1 references)
num  target     prot opt source               destination         
1    RETURN     all  --  0.0.0.0/0            0.0.0.0/0           

Chain DOCKER-INGRESS (1 references)
num  target     prot opt source               destination         
1    ACCEPT     tcp  --  0.0.0.0/0            0.0.0.0/0            tcp dpt:80
2    ACCEPT     tcp  --  0.0.0.0/0            0.0.0.0/0            state RELATED,ESTABLISHED tcp spt:80
3    RETURN     all  --  0.0.0.0/0            0.0.0.0/0        

3854 次点击
所在节点    Linux
24 条回复
saytesnake
2021-10-14 17:36:45 +08:00
在 esxi 网卡打开允许混合。
defunct9
2021-10-14 19:59:36 +08:00
哦。推倒重来。你用的 swarm 过时了。直接用 docker-compose
vinle
2021-10-14 22:36:52 +08:00
首先对楼主的测试方法有点好奇的是:你已经在 01 节点测试服务可用性的话,为什么不 curl localhost/127.0.0.1 ?你 curl 了 10 段的话,其中的测试结果应该是包含两个东西:节点服务可用性+节点本机 10 段网络的配置。

然后,作为两年前实践过用 swarm 来尝试搭过小集群的过来人( 3 台机子,每台约 10 个 service,每个 service 从 5~20 个 replicas 不等),只想说,这货就是个完全的社区项目,这并不是说 swarm 不能用,只是想要达到企业级的稳定性 /安全性 /灵活性是不可能的。为什么?其实了解下 swarm 这项目出来的目的,便会发现这东西是很难达到“好用”级别的(但是不可否认还是挺好玩)。而要想做到前面说的这些,唯有 kubernete,国内也有一些服务商有提供 out-of-box 的云原生基建平台,都非常不错。但是如果你要用手把手地用 swarm 来搞,那只能祝君好运,并且玩得愉快🌹
wellsc
2021-10-14 22:46:44 +08:00
@defunct9 哥,两个不是一种东西
liuxu
2021-10-14 23:14:01 +08:00
你要是 debian/ubuntu 的话我可以帮你详细分析下,其他的系统我就只能大致说下怎么查

首先你的 server-01 的 ip 似乎有一个 192.168.122.0/24,先确认下 server-0{1,2,3}和你本地机器的 ip 是不是在一个网段,互相 ping 一下
然后 server-0{1,2,3}的 iptable 、netstat 和 ifconfig 都看看
最后互相 curl,在双方机器上用 tcpdump 抓包看看
ik
2021-10-14 23:18:54 +08:00
iptables 规则问题? 三个 docker 服务都重启一下呢?
ziwen1943
2021-10-15 08:57:32 +08:00
看看防火墙和 iptables 是不是有奇奇怪怪的规则
zxkxhnqwe123
2021-10-15 09:05:15 +08:00
@vinle 三台服务器上面都是一样的 调用 curl 127.0.0.1 都是一样的效果. 并且所有系统都是干净重装好的
zxkxhnqwe123
2021-10-15 09:05:44 +08:00
@saytesnake 试过了 好像也不行 ! 叫混杂模式
zxkxhnqwe123
2021-10-15 09:07:59 +08:00
@saytesnake 主要是 我是开发人员,公司也没有专业运维,现在想解决 devops 自动化运维 测试环境,所以只能从简单的折腾
juzisang
2021-10-15 09:30:24 +08:00
byzf
2021-10-15 10:53:59 +08:00
以前碰到过几次请求三次只成功一次的情况,有 dns 配置的问题,有负载均衡的问题。
defunct9
2021-10-15 10:55:29 +08:00
开 ssh,让我上去看看
zxxufo008
2021-10-15 11:42:52 +08:00
@defunct9 好家伙,层主换了个头像,我还是通过这句话知道你还是你的 [滑稽]
defunct9
2021-10-15 12:00:15 +08:00
@zxxufo008 小孩长大了,头像也跟着长大了。是我,是我,还是我。
liuxu
2021-10-15 12:14:12 +08:00
@zxxufo008 我还以为是有人开机器人了,原来是换头像了
mepwang
2021-10-15 16:16:22 +08:00
curl -v 看看卡到哪一步了
jackleeforce3615
2021-10-15 16:53:29 +08:00
一直以为没多少人用 docker swarm 了
mkdir
2021-10-15 17:14:21 +08:00
@jackleeforce3615 一直用一直爽
zxkxhnqwe123
2021-10-15 17:54:25 +08:00
@mepwang

[root@Server-01 ~]# curl 127.0.0.1 -v
* Rebuilt URL to: 127.0.0.1/
* Trying 127.0.0.1...
* TCP_NODELAY set
^C
[root@Server-01 ~]# curl 127.0.0.1 -v
* Rebuilt URL to: 127.0.0.1/
* Trying 127.0.0.1...
* TCP_NODELAY set
* Connected to 127.0.0.1 (127.0.0.1) port 80 (#0)
> GET / HTTP/1.1
> Host: 127.0.0.1
> User-Agent: curl/7.61.1
> Accept: */*
>
< HTTP/1.1 200 OK
< Server: nginx/1.21.3
< Date: Fri, 15 Oct 2021 09:56:24 GMT
< Content-Type: text/html
< Content-Length: 615
< Last-Modified: Tue, 07 Sep 2021 15:21:03 GMT
< Connection: keep-alive
< ETag: "6137835f-267"
< Accept-Ranges: bytes
<
<!DOCTYPE html>
<html>
<head>
<title>Welcome to nginx!</title>
<style>
html { color-scheme: light dark; }
body { width: 35em; margin: 0 auto;
font-family: Tahoma, Verdana, Arial, sans-serif; }
</style>
</head>
<body>
<h1>Welcome to nginx!</h1>
<p>If you see this page, the nginx web server is successfully installed and
working. Further configuration is required.</p>

<p>For online documentation and support please refer to
<a href="http://nginx.org/">nginx.org</a>.<br/>
Commercial support is available at
<a href="http://nginx.com/">nginx.com</a>.</p>

<p><em>Thank you for using nginx.</em></p>
</body>
</html>
* Connection #0 to host 127.0.0.1 left intact
[root@Server-01 ~]# ^C
[root@Server-01 ~]# curl 127.0.0.1 -v
* Rebuilt URL to: 127.0.0.1/
* Trying 127.0.0.1...
* TCP_NODELAY set

这是一个专为移动设备优化的页面(即为了让你能够在 Google 搜索结果里秒开这个页面),如果你希望参与 V2EX 社区的讨论,你可以继续到 V2EX 上打开本讨论主题的完整版本。

https://www.v2ex.com/t/807854

V2EX 是创意工作者们的社区,是一个分享自己正在做的有趣事物、交流想法,可以遇见新朋友甚至新机会的地方。

V2EX is a community of developers, designers and creative people.

© 2021 V2EX