您现在的位置是:首页 >技术杂谈 >Kubernetes 污点、容忍策略、优先级与抢占、Pod安全网站首页技术杂谈
Kubernetes 污点、容忍策略、优先级与抢占、Pod安全
污点
污点使结点与pod产生排斥与标签相反
污点策略是通过嵌入合在键值对上的污点标签进行声明
污点标签必须绑定在键值对上,格式为:key=value:[污点标签]
taint翻译就是污点的意思
污点标签必须绑定在键值对上,格式为:key=value:[污点标签]
查看污点标签
kubectl describe nodes [结点名]
设置污点标签
kubectl taint node [结点名字] key=value:污点标签
删除污点标签
kubectl taint node [结点名字] key=value:污点标签-
污点标签
结点的调度是需要schedule筛选、打分的
PreferNoSchedule尽量不调度,只要不剩它一个就不被调度
NoSchedule不调度,筛选都进不去
NoExecute驱逐结点
PreferNoSchedule 、NoSchedule这两个只对新建的pod有效.
NoExecute对之前之后的pod都有效,删除、
管理污点标签
# 查看污点策略
[root@master ~]# kubectl describe nodes|grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: <none>
Taints: <none>
Taints: <none>
# node-0001 设置污点策略 PreferNoSchedule
[root@master ~]# kubectl taint node node-0001 k1=v1:PreferNoSchedule
node/node-0001 tainted
# node-0002 设置污点策略 NoSchedule
[root@master ~]# kubectl taint node node-0002 k2=v2:NoSchedule
node/node-0002 tainted
[root@master ~]# kubectl describe nodes |grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: k1=v1:PreferNoSchedule
Taints: k2=v2:NoSchedule
Taints: <none>
Pod资源文件
# 查看污点策略
[root@master ~]# kubectl describe nodes|grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: <none>
Taints: <none>
Taints: <none>
# node-0001 设置污点策略 PreferNoSchedule
[root@master ~]# kubectl taint node node-0001 k1=v1:PreferNoSchedule
node/node-0001 tainted
# node-0002 设置污点策略 NoSchedule
[root@master ~]# kubectl taint node node-0002 k2=v2:NoSchedule
node/node-0002 tainted
[root@master ~]# kubectl describe nodes |grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: k1=v1:PreferNoSchedule
Taints: k2=v2:NoSchedule
Taints: <none>
验证污点策略
# 优先使用没有污点的节点
[root@master ~]# sed "s,myphp,php1," myphp.yaml |kubectl apply -f -
pod/php1 created
[root@master ~]# sed "s,myphp,php2," myphp.yaml |kubectl apply -f -
pod/php2 created
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 13s 10.244.3.43 node-0003
php2 1/1 Running 0 5s 10.244.3.44 node-0003
# 最后使用 PreferNoSchedule 节点
[root@master ~]# sed 's,myphp,php3,' myphp.yaml |kubectl apply -f -
pod/php3 created
[root@master ~]# sed 's,myphp,php4,' myphp.yaml |kubectl apply -f -
pod/php4 created
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 3m16s 10.244.3.43 node-0003
php2 1/1 Running 0 3m8s 10.244.3.44 node-0003
php3 1/1 Running 0 113s 10.244.1.8 node-0001
php4 1/1 Running 0 9s 10.244.1.9 node-0001
# 不会使用 NoSchedule 节点
[root@master ~]# sed 's,myphp,php5,' myphp.yaml |kubectl apply -f -
pod/php5 created
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 3m16s 10.244.3.43 node-0003
php2 1/1 Running 0 3m8s 10.244.3.44 node-0003
php3 1/1 Running 0 113s 10.244.1.8 node-0001
php4 1/1 Running 0 9s 10.244.1.9 node-0001
php5 0/1 Pending 0 5s <none> <none>
验证驱逐策略
[root@master ~]# kubectl taint node node-0003 k3=v3:NoExecute
node/node-0003 tainted
[root@master ~]# kubectl describe nodes |grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: k1=v1:PreferNoSchedule
Taints: k2=v2:NoSchedule
Taints: k3=v3:NoExecute
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php3 1/1 Running 0 4m19s 10.244.1.8 node-0001
php4 1/1 Running 0 2m35s 10.244.1.9 node-0001
php5 0/1 Pending 0 2m31s <none> <none>
清理配置
[root@master ~]# kubectl delete pod php{3..5}
pod "php3" deleted
pod "php4" deleted
pod "php5" deleted
[root@master ~]# kubectl taint node node-0001 k1=v1:PreferNoSchedule-
node/node-0001 untainted
[root@master ~]# kubectl taint node node-0002 k2=v2:NoSchedule-
node/node-0002 untainted
[root@master ~]# kubectl taint node node-0003 k3=v3:NoExecute-
node/node-0003 untainted
[root@master ~]# kubectl describe nodes |grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: <none>
Taints: <none>
Taints: <none>
容忍策略
一般根据标签去做,所以必须绑定标签
容忍策略是什么,刚好与污点相反,某些时候我们就是要在有污点的节点上运行Pod,这种无视污点标签的调度方式称为容忍
为node设置污点
# 节点 node-0001 设置污点标签 k=v1:NoSchedule
[root@master ~]# kubectl taint node node-0001 k=v1:NoSchedule
node/node-0001 tainted
# 节点 node-0002 设置污点标签 k=v2:NoSchedule
[root@master ~]# kubectl taint node node-0002 k=v2:NoSchedule
node/node-0002 tainted
# 节点 node-0003 设置污点标签 k=v1:NoExecute
[root@master ~]# kubectl taint node node-0003 k=v1:NoExecute
node/node-0003 tainted
[root@master ~]# kubectl describe nodes |grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: k=v1:NoSchedule
Taints: k=v2:NoSchedule
Taints: k=v1:NoExecute
精确匹配(Equal)
精确匹配策略
# 容忍 k=v1:NoSchedule 污点
[root@master ~]# vim myphp.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: myphp
spec:
tolerations:
- operator: "Equal" # 完全匹配键值对
key: "k" # 键
value: "v1" # 值
effect: "NoSchedule" # 污点标签
containers:
- name: php
image: myos:phpfpm
resources:
requests:
cpu: 800m
[root@master ~]# for i in php{1..3};do sed "s,myphp,${i}," myphp.yaml ;done|kubectl apply -f -
pod/php1 created
pod/php2 created
pod/php3 created
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 6s 10.244.1.10 node-0001
php2 1/1 Running 0 6s 10.244.1.11 node-0001
php3 1/1 Pending 0 6s <none> <none>
[root@master ~]# kubectl delete pod php{1..3}
pod "php1" deleted
pod "php2" deleted
pod "php3" deleted
模糊匹配(Exists)
模糊匹配策略
# 容忍 k=*:NoSchedule 污点
[root@master ~]# vim myphp.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: myphp
spec:
tolerations:
- operator: "Exists" # 部分匹配,存在即可
key: "k" # 键
effect: "NoSchedule" # 污点标签
containers:
- name: php
image: myos:phpfpm
resources:
requests:
cpu: 800m
[root@master ~]# for i in php{1..3};do sed "s,myphp,${i}," myphp.yaml ;done|kubectl apply -f -
pod/php1 created
pod/php2 created
pod/php3 created
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 6s 10.244.1.12 node-0001
php2 1/1 Running 0 6s 10.244.2.21 node-0002
php3 1/1 Running 0 6s 10.244.2.22 node-0002
[root@master ~]# kubectl delete pod php{1..3}
pod "php1" deleted
pod "php2" deleted
pod "php3" deleted
所有污点标签
# 容忍所有 node 上的污点
[root@master ~]# vim myphp.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: myphp
spec:
tolerations:
- operator: "Exists" # 模糊匹配
key: "k" # 键
effect: # 没有设置污点标签代表所有
containers:
- name: php
image: myos:phpfpm
resources:
requests:
cpu: 800m
[root@master ~]# for i in php{1..3};do sed "s,myphp,${i}," myphp.yaml ;done|kubectl apply -f -
pod/php1 created
pod/php2 created
pod/php3 created
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 36s 10.244.1.15 node-0001
php2 1/1 Running 0 36s 10.244.2.16 node-0002
php3 1/1 Running 0 36s 10.244.3.18 node-0003
[root@master ~]# kubectl delete pod php{1..3}
pod "php1" deleted
pod "php2" deleted
pod "php3" deleted
抢占与优先级
优先级表示一个Pod相对于其他Pod的重要性
优先级可以保证重要的Pod被调度运行
当资源比较紧张才用优先级
如何使用优先级和抢占
配置优先级类PriorityClass
创建Pod时为其设置对应的优先级
优先级概述
PriorityClass是一个全局资源对象,定义了从优先级类名称到优先级整数值的映射。优先级在value字段中指定,可以设置小于10亿的整数值,值越大,优先级越高。
PriorityClass还有两个可选字段:
-globalDefault用于设置默认优先级状态,如果没有任何优先级设置Pod的优先级为零
-description用来配置描述性信息,告诉用户优先级的用途
优先级概述
优先级策略:
非抢占优先(插队):在调度阶段优先进行调度分配,一旦容器调度完成就不可抢占,资源不足时,只能等待,
抢占优先(杀死):强制调度一个pod,如果资源不足无法被调度,调度程序会抢占(删除)较低优先级的Pod的资源,来保证高优先级Pod的运行
非抢占优先级
preemptionPolicy: Never #非抢占
value: 500 #值
description: non-preemptive #描述
# 定义优先级(队列优先)
[root@master ~]# vim mypriority.yaml
---
kind: PriorityClass
apiVersion: scheduling.k8s.io/v1
metadata:
name: high-non
globalDefault: false
preemptionPolicy: Never
value: 1000
description: non-preemptive
---
kind: PriorityClass
apiVersion: scheduling.k8s.io/v1
metadata:
name: low-non
globalDefault: false
preemptionPolicy: Never
value: 500
description: non-preemptive
[root@master ~]# kubectl apply -f mypriority.yaml
priorityclass.scheduling.k8s.io/high-non created
priorityclass.scheduling.k8s.io/low-non created
[root@master ~]# kubectl get priorityclasses.scheduling.k8s.io
NAME VALUE GLOBAL-DEFAULT AGE
high-non 1000 false 12s
low-non 500 false 12s
system-cluster-critical 2000000000 false 45h
system-node-critical 2000001000 false 45h
pod无,中,高优先级配置
# 无优先级的 Pod
[root@master ~]# cat php1.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: php1
spec:
nodeSelector:
kubernetes.io/hostname: node-0002
containers:
- name: php
image: myos:phpfpm
resources:
requests:
cpu: "1500m"
# 低优先级 Pod
[root@master ~]# cat php2.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: php2
spec:
nodeSelector:
kubernetes.io/hostname: node-0002
priorityClassName: low-non # 优先级名称
containers:
- name: php
image: myos:phpfpm
resources:
requests:
cpu: "1500m"
# 高优先级 Pod
[root@master ~]# cat php3.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: php3
spec:
nodeSelector:
kubernetes.io/hostname: node-0002
priorityClassName: high-non # 优先级名称
containers:
- name: php
image: myos:phpfpm
resources:
requests:
cpu: "1500m"
验证非抢占优先
[root@master ~]# kubectl apply -f php1.yaml
pod/php1 created
[root@master ~]# kubectl apply -f php2.yaml
pod/php2 created
[root@master ~]# kubectl apply -f php3.yaml
pod/php3 created
[root@master ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
php1 1/1 Running 0 9s
php2 0/1 Pending 0 6s
php3 0/1 Pending 0 4s
[root@master ~]# kubectl delete pod php1
pod "php1" deleted
[root@master ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
php2 0/1 Pending 0 20s
php3 1/1 Running 0 18s
# 清理实验 Pod
[root@master ~]# kubectl delete pod php2 php3
pod "php2" deleted
pod "php3" deleted
抢占策略
[root@master ~]# vim mypriority.yaml
---
kind: PriorityClass
apiVersion: scheduling.k8s.io/v1
metadata:
name: high
globalDefault: false
preemptionPolicy: PreemptLowerPriority
value: 1000
description: non-preemptive
---
kind: PriorityClass
apiVersion: scheduling.k8s.io/v1
metadata:
name: low
globalDefault: false
preemptionPolicy: PreemptLowerPriority
value: 500
description: non-preemptive
[root@master ~]# kubectl apply -f mypriority.yaml
priorityclass.scheduling.k8s.io/high created
priorityclass.scheduling.k8s.io/low created
[root@master ~]# kubectl get priorityclasses.scheduling.k8s.io
NAME VALUE GLOBAL-DEFAULT AGE
high 1000 false 12s
low 500 false 12s
system-cluster-critical 2000000000 false 45h
system-node-critical 2000001000 false 45h
验证抢占策略
# 默认优先级 Pod
[root@master ~]# kubectl apply -f php1.yaml
pod/php1 created
[root@master ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
php1 1/1 Running 0 6s
# 高优先级 Pod
[root@master ~]# sed 's,-non,,' php3.yaml |kubectl apply -f -
pod/php3 created
[root@master ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
php3 1/1 Running 0 9s
# 低优先级 Pod
[root@master ~]# sed 's,-non,,' php2.yaml |kubectl apply -f -
pod/php2 created
[root@master ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
php2 0/1 Pending 0 3s
php3 1/1 Running 0 9s
# 清理实验 Pod
[root@master ~]# kubectl delete pod php2 php3
pod "php2" deleted
pod "php3" deleted
[root@master ~]# kubectl delete -f mypriority.yaml
priorityclass.scheduling.k8s.io "high-non" deleted
priorityclass.scheduling.k8s.io "low-non" deleted
priorityclass.scheduling.k8s.io "high" deleted
priorityclass.scheduling.k8s.io "low" deleted
总结:有非抢占只是插队看优先级查,非抢占只是把当前运行的杀了,然后看优先级,低了你就想杀也杀不了
pod安全
特权容器
容器是通过名称空间技术隔离的,但是有时候我们需要突破隔离限制,获取更高的权限,这类容器称为特权容器
运行特权容器有风险
安全性
Pod安全策略是集群级别的资源,它能够控制Pod运行的行为,以及它具有访问什么的能力。
使用Pod安全策略服务器版本必须不低于版本v1,22
确保PodSecurity
apiServer是系统核心服务,如果出现故障,K8s将无法管理和维护,在修改之前备份资源文件
Pod安全策略
privileged 不受限制
baseline 弱限制性,禁止已知的策略提升权限
restricted 非常严格的限制性策略,遵循当前的保护Pod的最佳实践
Pod准入控制标签(MODE)
enforce 策略违例会导致Pod被拒绝
audit 策略违例会触发审计日志,但是Pod仍可以被接受
warn 策略违例会触发用户可见的警告信息,但是Pod仍然是被接受的
特权容器
更改容器主机名 和 /etc/hosts 文件
[root@master ~]# vim root.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: root
spec:
terminationGracePeriodSeconds: 0
restartPolicy: Always
hostname: myhost # 特权,修改主机名
hostAliases: # 修改 /etc/hosts
- ip: 192.168.1.30 # IP 地址
hostnames: # 名称键值对
- registry # 主机名
containers:
- name: linux
image: myos:v2009
imagePullPolicy: IfNotPresent
command: ["/bin/bash"]
args:
- -c
- |
while true;do
echo "Hello World."
sleep 5
done
[root@master ~]# kubectl apply -f root.yaml
pod/root created
[root@master ~]# kubectl exec -it root -- /bin/bash
[root@myhost html]# hostname
myhost
[root@myhost html]# cat /etc/hosts
... ...
# Entries added by HostAliases.
192.168.1.30 registry
[root@master ~]# kubectl delete pod root
pod "root" deleted
root特权容器
[root@master ~]# vim root.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: root
spec:
terminationGracePeriodSeconds: 0
restartPolicy: Always
hostPID: true # 特权,共享系统进程
hostNetwork: true # 特权,共享主机网络
containers:
- name: linux
image: myos:v2009
imagePullPolicy: IfNotPresent
securityContext: # 安全上下文值
privileged: true # root特权容器
command: ["/bin/bash"]
args:
- -c
- |
while true;do
echo "Hello World."
sleep 5
done
[root@master ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
root 1/1 Running 0 26s
[root@master ~]# kubectl exec -it root -- /bin/bash
[root@node-0001 /]#
# 系统进程特权
[root@node-0001 /]# pstree -p
systemd(1)-+-NetworkManager(510)-+-dhclient(548)
| |-{NetworkManager}(522)
| `-{NetworkManager}(524)
|-agetty(851)
|-chronyd(502)
|-containerd(531)-+-{containerd}(555)
... ...
# 网络特权
[root@node-0001 /]# ifconfig eth0
eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.1.51 netmask 255.255.255.0 broadcast 192.168.1.255
ether fa:16:3e:70:c8:fa txqueuelen 1000 (Ethernet)
... ...
# root用户特权
[root@node-0001 /]# mkdir /sysroot
[root@node-0001 /]# mount /dev/sda1 /sysroot
[root@node-0001 /]# chroot /sysroot
sh-4.2# mount -t proc proc /proc
sh-4.2# : 此处已经是 node 节点上的 root 用户了
# 删除特权容器
[root@master ~]# kubectl delete pod root
pod "root" deleted
Pod安全策略
[root@master ~]# sed '36i - --feature-gates=PodSecurity=true' -i /etc/kubernetes/manifests/kube-apiserver.yaml
[root@master ~]# systemctl restart kubelet
# 生产环境设置严格的准入控制
[root@master ~]# kubectl create namespace myprod
namespace/myprod created
[root@master ~]# kubectl label namespaces myprod pod-security.kubernetes.io/enforce=restricted
namespace/myprod labeled
# 测试环境测试警告提示
[root@master ~]# kubectl create namespace mytest
namespace/mytest created
[root@master ~]# kubectl label namespaces mytest pod-security.kubernetes.io/warn=baseline
namespace/mytest labeled
# 创建特权容器
[root@master ~]# kubectl -n myprod apply -f root.yaml
Error from server (Failure): error when creating "root.yaml": host namespaces (hostNetwork=true, hostPID=true), privileged (container "linux" must not set securityContext.privileged=true), allowPrivilegeEscalation != false (container "linux" must set securityContext.allowPrivilegeEscalation=false), unrestricted capabilities (container "linux" must set securityContext.capabilities.drop=["ALL"]), runAsNonRoot != true (pod or container "linux" must set securityContext.runAsNonRoot=true), seccompProfile (pod or container "linux" must set securityContext.seccompProfile.type to "RuntimeDefault" or "Localhost")
[root@master ~]#
[root@master ~]# kubectl -n myprod get pods
No resources found in myprod namespace.
[root@master ~]# kubectl -n mytest apply -f root.yaml
Warning: would violate "latest" version of "baseline" PodSecurity profile: host namespaces (hostNetwork=true, hostPID=true), privileged (container "linux" must not set securityContext.privileged=true)
pod/root created
[root@master ~]#
[root@master ~]# kubectl -n mytest get pods
NAME READY STATUS RESTARTS AGE
root 1/1 Running 0 7s
[root@master ~]#
符合安全规则的Pod
[root@master ~]# vim nonroot.yaml --- kind: Pod apiVersion: v1 metadata: name: nonroot spec: terminationGracePeriodSeconds: 0 restartPolicy: Always containers: - name: linux image: myos:v2009 imagePullPolicy: IfNotPresent securityContext: allowPrivilegeEscalation: false runAsNonRoot: true runAsUser: 99 seccompProfile: type: "RuntimeDefault" capabilities: drop: ["ALL"] command: ["/bin/bash"] args: - -c - | while true;do echo "Hello World." sleep 30 done [root@master ~]# kubectl -n myprod apply -f nonroot.yaml pod/nonroot created [root@master ~]# kubectl -n myprod get pods NAME READY STATUS RESTARTS AGE nonroot 1/1 Running 0 6s [root@master ~]# kubectl -n myprod exec -it nonroot -- id uid=99(nobody) gid=99(nobody) groups=99(nobody)