svclb pod 的 lb-tcp-443 容器启动找不到 iptables 命令

71 天前
 dxdydz

scclb event 概括:Back-off restarting failed container lb-tcp-443 in pod svclb-traefik-e91c51d8-5vt7w_kube-system(2a842b27-2c1f-42d7-b24a-2504dfdde6a7)

使用kubectl logs svclb-traefik-e91c51d8-5vt7w -n kube-system -c lb-tcp-443,输出如下:

+ trap exit TERM INT
+ BIN_DIR=/sbin
+ check_iptables_mode
+ set +e
+ lsmod
+ grep nf_tables
+ '[' 1 '=' 0 ]
+ mode=legacy
+ set -e
+ info 'legacy mode detected'
+ echo '[INFO] ' 'legacy mode detected'
[INFO]  legacy mode detected
+ set_legacy
+ ln -sf /sbin/xtables-legacy-multi /sbin/iptables
+ ln -sf /sbin/xtables-legacy-multi /sbin/iptables-save
+ ln -sf /sbin/xtables-legacy-multi /sbin/iptables-restore
+ ln -sf /sbin/xtables-legacy-multi /sbin/ip6tables
+ start_proxy
+ echo 0.0.0.0/0
+ grep -Eq :
+ iptables -t filter -I FORWARD -s 0.0.0.0/0 -p TCP --dport 443 -j ACCEPT
/usr/bin/entry: line 46: iptables: not found

系统日志如下:

Jul 07 09:45:10 ArchVF2 k3s[505]: I0707 09:45:10.681870     505 scope.go:115] "RemoveContainer" containerID="3571d6d9336cdea67e4d6b4da173b5f519e5c1e48aa141d6cf40489eb8a6fc0e"
Jul 07 09:45:10 ArchVF2 k3s[505]: I0707 09:45:10.682112     505 scope.go:115] "RemoveContainer" containerID="df5231ece32ced969d38c5687d3559384c6791c65a2f484e1e84863e6d15efa9"
Jul 07 09:45:10 ArchVF2 k3s[505]: E0707 09:45:10.687671     505 pod_workers.go:1294] "Error syncing pod, skipping" err="[failed to \"StartContainer\" for \"lb-tcp-80\" with CrashLoopBackOff: \"back-off 5m0s restarting failed container=lb-tcp-80 pod=svclb-traefik-e91c51d8-5vt7w_kube-system(2a842b27-2c1f-42d7-b24a-2504dfdde6a7)\", failed to \"StartContainer\" for \"lb-tcp-443\" with CrashLoopBackOff: \"back-off 5m0s restarting failed container=lb-tcp-443 pod=svclb-traefik-e91c51d8-5vt7w_kube-system(2a842b27-2c1f-42d7-b24a-2504dfdde6a7)\"]" pod="kube-system/svclb-traefik-e91c51d8-5vt7w" podUID=2a842b27-2c1f-42d7-b24a-2504dfdde6a7
Jul 07 09:45:14 ArchVF2 k3s[505]: I0707 09:45:14.448587     505 handler.go:232] Adding GroupVersion traefik.containo.us v1alpha1 to ResourceManager
Jul 07 09:45:14 ArchVF2 k3s[505]: I0707 09:45:14.448982     505 handler.go:232] Adding GroupVersion traefik.containo.us v1alpha1 to ResourceManager
Jul 07 09:45:14 ArchVF2 k3s[505]: I0707 09:45:14.451023     505 handler.go:232] Adding GroupVersion k3s.cattle.io v1 to ResourceManager
Jul 07 09:45:14 ArchVF2 k3s[505]: I0707 09:45:14.452731     505 handler.go:232] Adding GroupVersion traefik.containo.us v1alpha1 to ResourceManager
Jul 07 09:45:14 ArchVF2 k3s[505]: I0707 09:45:14.454400     505 handler.go:232] Adding GroupVersion traefik.containo.us v1alpha1 to ResourceManager
Jul 07 09:45:14 ArchVF2 k3s[505]: I0707 09:45:14.454759     505 handler.go:232] Adding GroupVersion traefik.containo.us v1alpha1 to ResourceManager
Jul 07 09:45:14 ArchVF2 k3s[505]: I0707 09:45:14.454992     505 handler.go:232] Adding GroupVersion helm.cattle.io v1 to ResourceManager
Jul 07 09:45:14 ArchVF2 k3s[505]: I0707 09:45:14.455554     505 handler.go:232] Adding GroupVersion helm.cattle.io v1 to ResourceManager
Jul 07 09:45:14 ArchVF2 k3s[505]: I0707 09:45:14.455838     505 handler.go:232] Adding GroupVersion traefik.containo.us v1alpha1 to ResourceManager
Jul 07 09:45:14 ArchVF2 k3s[505]: I0707 09:45:14.456597     505 handler.go:232] Adding GroupVersion traefik.containo.us v1alpha1 to ResourceManager
Jul 07 09:45:22 ArchVF2 k3s[505]: W0707 09:45:22.768545     505 sysinfo.go:203] Nodes topology is not available, providing CPU topology
Jul 07 09:45:22 ArchVF2 k3s[505]: W0707 09:45:22.770967     505 machine.go:65] Cannot read vendor id correctly, set empty.
Jul 07 09:45:23 ArchVF2 k3s[505]: I0707 09:45:23.681533     505 scope.go:115] "RemoveContainer" containerID="3571d6d9336cdea67e4d6b4da173b5f519e5c1e48aa141d6cf40489eb8a6fc0e"
Jul 07 09:45:23 ArchVF2 k3s[505]: I0707 09:45:23.681782     505 scope.go:115] "RemoveContainer" containerID="df5231ece32ced969d38c5687d3559384c6791c65a2f484e1e84863e6d15efa9"
Jul 07 09:45:23 ArchVF2 k3s[505]: E0707 09:45:23.687267     505 pod_workers.go:1294] "Error syncing pod, skipping" err="[failed to \"StartContainer\" for \"lb-tcp-80\" with CrashLoopBackOff: \"back-off 5m0s restarting failed container=lb-tcp-80 pod=svclb-traefik-e91c51d8-5vt7w_kube-system(2a842b27-2c1f-42d7-b24a-2504dfdde6a7)\", failed to \"StartContainer\" for \"lb-tcp-443\" with CrashLoopBackOff: \"back-off 5m0s restarting failed container=lb-tcp-443 pod=svclb-traefik-e91c51d8-5vt7w_kube-system(2a842b27-2c1f-42d7-b24a-2504dfdde6a7)\"]" pod="kube-system/svclb-traefik-e91c51d8-5vt7w" podUID=2a842b27-2c1f-42d7-b24a-2504dfdde6a7

补充,查找资料后提示似乎是宿主机iptablesiptables-nft内核模块问题,lsmod输出如下:

Module                  Size  Used by
veth                   45056  0
nft_log                12288  5
nft_limit              16384  5
xt_limit               12288  0
xt_NFLOG               12288  0
nfnetlink_log          28672  5
xt_physdev             12288  10
xt_comment             12288  172
ip_set                 61440  0
vxlan                 167936  0
nft_chain_nat          12288  6
nf_conntrack_netlink    65536  0
xt_addrtype            12288  13
tun                    86016  4
overlay               217088  9
8188eu               1658880  0
rtl8xxxu              270336  0
mac80211             1032192  1 rtl8xxxu
dwmac_starfive         12288  0
cfg80211              638976  2 mac80211,rtl8xxxu
stmmac_platform        28672  1 dwmac_starfive
stmmac                421888  4 dwmac_starfive,stmmac_platform
rfkill                 40960  3 cfg80211
vfat                   28672  1
pcs_xpcs               24576  1 stmmac
evdev                  32768  1
libarc4                12288  1 mac80211
fat                   122880  1 vfat
phylink                69632  2 stmmac,pcs_xpcs
ptp                    36864  1 stmmac
wave5                 147456  0
pps_core               24576  1 ptp
v4l2_mem2mem           36864  1 wave5
axp20x_pek             16384  0
imx708                 28672  0
goodix_ts              40960  0
designware_i2s         36864  2
sch_fq_codel           24576  5
br_netfilter           40960  0
bridge                442368  1 br_netfilter
stp                    12288  1 bridge
llc                    16384  2 bridge,stp
dm_mod                212992  0
zram                   53248  2
924 次点击
所在节点    Kubernetes
3 条回复
julyclyde
71 天前
不是缺命令吗?你咋会联想到模块去的?
guanzhangzhang
71 天前
1. 手动 docker run 下 image 看下有没有 iptables 命令,还是 PATH env 问题
2. 你自己做的镜像还是官方的,skopeo list-tags 看看新版本
dxdydz
71 天前
@guanzhangzhang 看到了:
# cat /usr/bin/entry
#!/bin/sh
set -ex

trap exit TERM INT

BIN_DIR="/sbin"

info()
{
echo '[INFO] ' "$@"
}
fatal()
{
echo '[ERROR] ' "$@" >&2
exit 1
}

check_iptables_mode() {
set +e
lsmod | grep "nf_tables" 2> /dev/null
if [ $? = 0 ]; then
mode=nft
else
mode=legacy
fi
set -e
}

set_nft() {
for i in iptables iptables-save iptables-restore ip6tables; do
ln -sf /sbin/xtables-nft-multi "$BIN_DIR/$i";
done
}

set_legacy() {
for i in iptables iptables-save iptables-restore ip6tables; do
ln -sf /sbin/xtables-legacy-multi "$BIN_DIR/$i";
done
}

start_proxy() {
for src_range in ${SRC_RANGES}; do
if echo ${src_range} | grep -Eq ":"; then
ip6tables -t filter -I FORWARD -s ${src_range} -p ${DEST_PROTO} --dport ${DEST_PORT} -j ACCEPT
else
iptables -t filter -I FORWARD -s ${src_range} -p ${DEST_PROTO} --dport ${DEST_PORT} -j ACCEPT
fi
done

for dest_ip in ${DEST_IPS}; do
if echo ${dest_ip} | grep -Eq ":"; then
[ $(cat /proc/sys/net/ipv6/conf/all/forwarding) == 1 ] || exit 1
ip6tables -t filter -A FORWARD -d ${dest_ip}/128 -p ${DEST_PROTO} --dport ${DEST_PORT} -j DROP
ip6tables -t nat -I PREROUTING -p ${DEST_PROTO} --dport ${SRC_PORT} -j DNAT --to [${dest_ip}]:${DEST_PORT}
ip6tables -t nat -I POSTROUTING -d ${dest_ip}/128 -p ${DEST_PROTO} -j MASQUERADE
else
[ $(cat /proc/sys/net/ipv4/ip_forward) == 1 ] || exit 1
iptables -t filter -A FORWARD -d ${dest_ip}/32 -p ${DEST_PROTO} --dport ${DEST_PORT} -j DROP
iptables -t nat -I PREROUTING -p ${DEST_PROTO} --dport ${SRC_PORT} -j DNAT --to ${dest_ip}:${DEST_PORT}
iptables -t nat -I POSTROUTING -d ${dest_ip}/32 -p ${DEST_PROTO} -j MASQUERADE
fi
done
}

check_iptables_mode
case $mode in
nft)
info "nft mode detected"
set_nft
;;
legacy)
info "legacy mode detected"
set_legacy
;;
*)
fatal "invalid iptables mode"
;;
esac
start_proxy

if [ ! -e /pause ]; then
mkfifo /pause
fi
</pause
/ # cat /usr/bin/entry
#!/bin/sh
set -ex

trap exit TERM INT

BIN_DIR="/sbin"

info()
{
echo '[INFO] ' "$@"
}
fatal()
{
echo '[ERROR] ' "$@" >&2
exit 1
}

check_iptables_mode() {
set +e
lsmod | grep "nf_tables" 2> /dev/null
if [ $? = 0 ]; then
mode=nft
else
mode=legacy
fi
set -e
}

set_nft() {
for i in iptables iptables-save iptables-restore ip6tables; do
ln -sf /sbin/xtables-nft-multi "$BIN_DIR/$i";
done
}

set_legacy() {
for i in iptables iptables-save iptables-restore ip6tables; do
ln -sf /sbin/xtables-legacy-multi "$BIN_DIR/$i";
done
}

start_proxy() {
for src_range in ${SRC_RANGES}; do
if echo ${src_range} | grep -Eq ":"; then
ip6tables -t filter -I FORWARD -s ${src_range} -p ${DEST_PROTO} --dport ${DEST_PORT} -j ACCEPT
else
iptables -t filter -I FORWARD -s ${src_range} -p ${DEST_PROTO} --dport ${DEST_PORT} -j ACCEPT
fi
done

for dest_ip in ${DEST_IPS}; do
if echo ${dest_ip} | grep -Eq ":"; then
[ $(cat /proc/sys/net/ipv6/conf/all/forwarding) == 1 ] || exit 1
ip6tables -t filter -A FORWARD -d ${dest_ip}/128 -p ${DEST_PROTO} --dport ${DEST_PORT} -j DROP
ip6tables -t nat -I PREROUTING -p ${DEST_PROTO} --dport ${SRC_PORT} -j DNAT --to [${dest_ip}]:${DEST_PORT}
ip6tables -t nat -I POSTROUTING -d ${dest_ip}/128 -p ${DEST_PROTO} -j MASQUERADE
else
[ $(cat /proc/sys/net/ipv4/ip_forward) == 1 ] || exit 1
iptables -t filter -A FORWARD -d ${dest_ip}/32 -p ${DEST_PROTO} --dport ${DEST_PORT} -j DROP
iptables -t nat -I PREROUTING -p ${DEST_PROTO} --dport ${SRC_PORT} -j DNAT --to ${dest_ip}:${DEST_PORT}
iptables -t nat -I POSTROUTING -d ${dest_ip}/32 -p ${DEST_PROTO} -j MASQUERADE
fi
done
}

check_iptables_mode
case $mode in
nft)
info "nft mode detected"
set_nft
;;
legacy)
info "legacy mode detected"
set_legacy
;;
*)
fatal "invalid iptables mode"
;;
esac
start_proxy

if [ ! -e /pause ]; then
mkfifo /pause
fi
</pause

/ # ls /sbin/xtables-nft-multi
/sbin/xtables-nft-multi
/ # ls /sbin/iptables
/sbin/iptables
/ # ls /sbin/xtables-legacy-multi
ls: /sbin/xtables-legacy-multi: No such file or directory
我大概知道怎么改了,感谢你

这是一个专为移动设备优化的页面(即为了让你能够在 Google 搜索结果里秒开这个页面),如果你希望参与 V2EX 社区的讨论,你可以继续到 V2EX 上打开本讨论主题的完整版本。

https://www.v2ex.com/t/1055397

V2EX 是创意工作者们的社区,是一个分享自己正在做的有趣事物、交流想法,可以遇见新朋友甚至新机会的地方。

V2EX is a community of developers, designers and creative people.

© 2021 V2EX