您现在的位置是:首页 >技术教程 >k8s巡检脚本网站首页技术教程
k8s巡检脚本
#!/bin/bash
#检查kubectl是否已经安装
if ! command -v kubectl &> /dev/null
then
echo -n "kubectl 未安装,请先安装kubectl"
exit
fi
echo -e “开始集群状态信息收集/”
#检查集群状态:
echo -n “检查集群正常状态:”
kubectl cluster-info |grep -w running |wc -l
echo -n “检查集群异常状态:”
kubectl cluster-info |grep -w Pending |wc -l
#检查节点状态:
echo -n “检查节点正常状态:”
kubectl get nodes |grep -w Ready |wc -l
echo -n “检查节点异常状态:”
kubectl get nodes |grep -w NotReady |wc -l
echo -e “开始组件状态信息收集/”
#检查Kubernetes组件状态:
echo -n “检查Kubernetes组件正常状态:”
kubectl get componentstatuses 2>/dev/null|grep -w Healthy |wc -l
echo -n “检查Kubernetes组件异常状态:”
kubectl get componentstatuses 2>/dev/null|grep -w Unhealthy |wc -l
#检查命名空间
echo -n “检查命名正常空间:”
kubectl get namespaces |grep -w Active|wc -l
#检查Pod状态:
echo -n “检查Pod正常状态:”
kubectl get pods --all-namespaces |grep -w Running |wc -l
echo -n “检查Pod异常状态:”
kubectl get pods --all-namespaces |grep -e ‘Pending’ -e ‘Terminating’ |wc -l
#检查副本ReplicaSet状态:
echo -n “检查副本ReplicaSet状态:”
kubectl get replicasets --all-namespaces |awk ‘NR!=1 {print}’|wc -l
#检查Deployment状态:
echo -n “检查部署Deployment状态:”
kubectl get deployments --all-namespaces |awk ‘NR!=1 {print}’|wc -l
#检查Service状态:
echo -n “检查服务Service状态:”
kubectl get services --all-namespaces |awk ‘NR!=1 {print}’|wc -l
#检查入口Ingress状态:
echo -n “检查入口Ingress状态:”
kubectl get ingress --all-namespaces |awk ‘NR!=1 {print}’|wc -l
echo -e “=开始存储卷信息收集=/”
#检查存储卷状态:
echo -n “检查存储卷正常使用状态:”
kubectl get pv |grep -w Available |wc -l
echo -n “检查存储卷绑定状态:”
kubectl get pv |grep -w Bound |wc -l
echo -n “检查存储卷解绑状态:”
kubectl get pv |grep -w Released |wc -l
echo -n “检查存储卷故障状态:”
kubectl get pv |grep -w Failed |wc -l
#检查存储卷声明状态:
echo -n “检查存储卷声明绑定状态:”
kubectl get pvc --all-namespaces |grep -w Bound |wc -l
echo -n “检查存储卷声明pv解绑状态:”
kubectl get pvc --all-namespaces |grep -w Pending |wc -l
echo -n “检查存储卷声明pv断开状态:”
kubectl get pvc --all-namespaces |grep -w Lost |wc -l
echo -n “检查存储卷声明pv无法绑定状态:”
kubectl get pvc --all-namespaces |grep -w Failed |wc -l
echo -e “=开始事务信息收集=/”
#检查配置映射状态:
echo -n “检查配置映射状态:”
kubectl get configmaps --all-namespaces|awk ‘NR!=1 {print}’|wc -l
#检查事件状态:
echo -n “检查1h事件状态:”
kubectl get events --all-namespaces |awk ‘NR!=1 {print}’|wc -l
echo -e “开始资源使用信息收集/”
#检查资源使用情况
echo “检查资源CPU超过90%的node:”
kubectl top node 2>/dev/null | awk ‘{print $1,$3,$5}’|sed ‘s@%@@g’ |awk ‘{if ($3 > ‘90’) printKaTeX parse error: Expected 'EOF', got '}' at position 2: 1}̲'|awk 'NR!=1 {p…’ && echo “$result” ||echo “0”
echo “检查资源内存超过90%的node:”
kubectl top node 2>/dev/null | awk ‘{print $1,$3,$5}’|sed ‘s@%@@g’ |awk ‘{if ($5 > ‘90’) printKaTeX parse error: Expected 'EOF', got '}' at position 2: 1}̲'|awk 'NR!=1 {p…’ && echo “$result” ||echo “0”
echo -n “检查资源CPU(cores)大于100的pod:”
kubectl top pods 2>/dev/null --all-namespaces |awk ‘{print $1,$2,$3}’|sed ‘s@m@@g’ |awk ‘{if ($3 > ‘100’) print$1,$2}’|awk ‘NR!=1 {print}’ |wc -l
echo -n “检查资源内存(bytes)大于1000的pod:”
kubectl top pods 2>/dev/null --all-namespaces |awk ‘{print $1,$2,$4}’|sed ‘s@Mi@@g’ |awk ‘{if ($3 > ‘1000’) print$1,$2}’|awk ‘NR!=1 {print}’|wc -l