默认 /sys/fs/cgroup/pids/pids.max 是1024, 有些业务是要求突破这个值。如果不放松限制,会有 "read init-p: connection reset by peer" 这种错误,无法rsh进pod. 而且客户的java程序可能会出现线程创建失败的问题。
解决问题的思路,不要按照文档,开启集群的PodPidsLimit功能,而是用mc放开crio.conf里面的pid限制。
https://blog.spider.im/post/pid-limit-in-k8s/
这个pids系统限制的是线程+进程数,可以理解成pstree -pl看到的数量
直接覆盖 /etc/crio/crio.conf
# check current pids limit
crictl ps | awk '{print $1}' | xargs -I DEMO crictl exec DEMO cat /sys/fs/cgroup/pids/pids.max
oc label mcp worker custom-kubelet-pod-pids-limit=true
cat << EOF > crio.yaml
apiVersion: machineconfiguration.openshift.io/v1
kind: ContainerRuntimeConfig
metadata:
name: set-log-and-pid
spec:
machineConfigPoolSelector:
matchLabels:
custom-kubelet-pod-pids-limit: 'true'
containerRuntimeConfig:
pidsLimit: 10240
EOF
oc apply -f crio.yaml
oc delete -f crio.yaml
# PodPidsLimit
oc label mcp worker custom-kubelet-pod-pids-limit=true
cat << EOF > PodPidsLimit.yaml
apiVersion: machineconfiguration.openshift.io/v1
kind: KubeletConfig
metadata:
name: pod-pids-limit
spec:
machineConfigPoolSelector:
matchLabels:
custom-kubelet-pod-pids-limit: 'true'
kubeletConfig:
PodPidsLimit: 4096
EOF
oc apply -f PodPidsLimit.yaml
oc delete -f PodPidsLimit.yaml
cat << EOF > PodPidsLimit.yaml
apiVersion: machineconfiguration.openshift.io/v1
kind: KubeletConfig
metadata:
name: pod-pids-limit
spec:
machineConfigPoolSelector:
matchLabels:
custom-kubelet-pod-pids-limit: 'true'
kubeletConfig:
PodPidsLimit: 10240
EOF
oc apply -f PodPidsLimit.yaml