Difference between revisions of "OpenShift v4x health check"
Line 1: | Line 1: | ||
+ | =Links= | ||
+ | * [https://docs.openshift.com/container-platform/3.9/day_two_guide/environment_health_checks.html https://docs.openshift.com/container-platform/3.9/day_two_guide/environment_health_checks.html] | ||
+ | https://docs.openshift.com/container-platform/4.4/backup_and_restore/replacing-unhealthy-etcd-member.html | ||
+ | =Health Checks= | ||
+ | ==Nodes== | ||
+ | [chris@control(zabbix-dev/system:admin) ~]$ '''oc get nodes -o wide''' | ||
+ | NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME | ||
+ | master01 '''Ready''' master,worker 40d v1.17.1 192.168.100.221 <none> RHEL CoreOS 44.81.202005062110-0 (Ootpa) 4.18.0-147.8.1.el8_1.x86_64 cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8 | ||
+ | master02 '''Ready''' master,worker 40d v1.17.1 192.168.100.222 <none> RHEL CoreOS 44.81.202005062110-0 (Ootpa) 4.18.0-147.8.1.el8_1.x86_64 cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8 | ||
+ | master03 '''Ready''' master,worker 40d v1.17.1 192.168.100.223 <none> RHEL CoreOS 44.81.202005062110-0 (Ootpa) 4.18.0-147.8.1.el8_1.x86_64 cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8 | ||
+ | worker01 '''Ready''' worker 40d v1.17.1 192.168.100.231 <none> RHEL CoreOS 44.81.202005062110-0 (Ootpa) 4.18.0-147.8.1.el8_1.x86_64 cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8 | ||
+ | worker02 '''Ready''' worker 40d v1.17.1 192.168.100.232 <none> RHEL CoreOS 44.81.202005062110-0 (Ootpa) 4.18.0-147.8.1.el8_1.x86_64 cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8 | ||
+ | ==etcd== | ||
+ | ===v3.9=== | ||
+ | [root@master(zabbix/admin) ~]# '''source /etc/etcd/etcd.conf''' | ||
+ | [root@master(zabbix/admin) ~]# '''etcdctl --cert-file=$ETCD_PEER_CERT_FILE --key-file=$ETCD_PEER_KEY_FILE --ca-file=/etc/etcd/ca.crt --endpoints=$ETCD_LISTEN_CLIENT_URLS cluster-health''' | ||
+ | member da1c9720d5fee664 is healthy: got healthy result from https://192.168.223.74:2379 | ||
+ | '''cluster is healthy''' | ||
+ | ===v4.4=== | ||
+ | [chris@control(zabbix-dev/system:admin) ~]$ '''oc get etcd -o=jsonpath='{range .items[0].status.conditions[?(@.type=="EtcdMembersAvailable")]}{.message}{"\n"}'''' | ||
+ | '''master02,master01,master03 members are available''', have not started, are unhealthy, are unknown | ||
+ | ==router== | ||
+ | ===v3.9=== | ||
+ | [root@master(zabbix/admin) ~]# '''oc -n default get deploymentconfigs/router''' | ||
+ | NAME REVISION '''DESIRED CURRENT''' TRIGGERED BY | ||
+ | router 1 '''1 1''' config | ||
+ | ==registry== | ||
+ | ===v3.9=== | ||
+ | [root@master(zabbix/admin) ~]# '''oc -n default get deploymentconfigs/docker-registry''' | ||
+ | NAME REVISION '''DESIRED CURRENT''' TRIGGERED BY | ||
+ | docker-registry 1 '''1 1''' config | ||
+ | ===v4.4=== | ||
+ | [chris@control(zabbix-dev/system:admin) ~]$ '''oc get all -n openshift-image-registry''' | ||
+ | NAME '''READY''' STATUS '''RESTARTS''' AGE | ||
+ | pod/cluster-image-registry-operator-7bff4c7595-hkbqx '''2/2''' Running '''0''' 2d20h | ||
+ | pod/image-registry-6b6745b4f9-wqwdx '''1/1''' Running '''0''' 2d22h | ||
+ | pod/node-ca-6wgpw '''1/1''' Running '''0''' 2d23h | ||
+ | pod/node-ca-gjmhw '''1/1''' Running '''0''' 2d23h | ||
+ | pod/node-ca-gnp7n '''1/1''' Running '''0''' 2d23h | ||
+ | pod/node-ca-gtvt9 '''1/1''' Running '''0''' 2d23h | ||
+ | pod/node-ca-ps7v9 '''1/1''' Running '''0''' 2d23h | ||
+ | NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE | ||
+ | service/image-registry ClusterIP 172.30.229.236 <none> 5000/TCP 40d | ||
+ | service/image-registry-operator ClusterIP None <none> 60000/TCP 40d | ||
+ | |||
+ | NAME '''DESIRED''' CURRENT '''READY''' UP-TO-DATE AVAILABLE NODE SELECTOR AGE | ||
+ | daemonset.apps/node-ca 5 5 5 5 5 kubernetes.io/os=linux 40d | ||
+ | |||
+ | NAME READY UP-TO-DATE AVAILABLE AGE | ||
+ | deployment.apps/cluster-image-registry-operator 1/1 1 1 40d | ||
+ | deployment.apps/image-registry 1/1 1 1 40d | ||
+ | |||
+ | NAME '''DESIRED''' CURRENT '''READY''' AGE | ||
+ | replicaset.apps/cluster-image-registry-operator-6f78cddbbc 0 0 0 4d5h | ||
+ | replicaset.apps/cluster-image-registry-operator-7bff4c7595 '''1''' 1 '''1''' 2d23h | ||
+ | replicaset.apps/cluster-image-registry-operator-86476f46bc 0 0 0 6d7h | ||
+ | replicaset.apps/cluster-image-registry-operator-f9697f69d 0 0 0 40d | ||
+ | replicaset.apps/cluster-image-registry-operator-fc9dfb566 0 0 0 3d3h | ||
+ | replicaset.apps/image-registry-58cc7948d8 0 0 0 3d3h | ||
+ | replicaset.apps/image-registry-688fb696dc 0 0 0 40d | ||
+ | replicaset.apps/image-registry-6948d8479b 0 0 0 4d5h | ||
+ | replicaset.apps/image-registry-6b6745b4f9 '''1''' 1 '''1''' 2d23h | ||
+ | replicaset.apps/image-registry-7bbdbc5dc7 0 0 0 6d7h | ||
+ | replicaset.apps/image-registry-9dc4885b 0 0 0 6d7h | ||
+ | replicaset.apps/image-registry-d4cf5448b 0 0 0 40d | ||
+ | replicaset.apps/image-registry-f488f9578 0 0 0 6d7h | ||
+ | replicaset.apps/image-registry-f5647c6d8 0 0 0 40d | ||
+ | |||
+ | NAME SCHEDULE SUSPEND ACTIVE LAST SCHEDULE AGE | ||
+ | cronjob.batch/image-pruner 0 0 * * * True 0 <none> 2d23h | ||
+ | |||
+ | |||
+ | |||
+ | |||
+ | ==v4 ClusterOperators== | ||
+ | [chris@control(zabbix-dev/system:admin) ~]$ '''oc -n default get clusteroperators''' | ||
+ | NAME VERSION AVAILABLE PROGRESSING '''DEGRADED''' SINCE | ||
+ | authentication 4.4.4 True False '''False''' 35d | ||
+ | cloud-credential 4.4.4 True False '''False''' 40d | ||
+ | cluster-autoscaler 4.4.4 True False '''False''' 40d | ||
+ | console 4.4.4 True False '''False''' 33h | ||
+ | csi-snapshot-controller 4.4.4 True False '''False''' 33h | ||
+ | dns 4.4.4 True False '''False''' 33h | ||
+ | etcd 4.4.4 True False '''False''' 2d20h | ||
+ | image-registry 4.4.4 True False '''False''' 33h | ||
+ | ingress 4.4.4 True False '''False''' 33h | ||
+ | insights 4.4.4 True False '''False''' 40d | ||
+ | kube-apiserver 4.4.4 True False '''False''' 40d | ||
+ | kube-controller-manager 4.4.4 True False '''False''' 2d23h | ||
+ | kube-scheduler 4.4.4 True False '''False''' 2d23h | ||
+ | kube-storage-version-migrator 4.4.4 True False '''False''' 33h | ||
+ | machine-api 4.4.4 True False '''False''' 40d | ||
+ | machine-config 4.4.4 True False '''False''' 2d19h | ||
+ | marketplace 4.4.4 True False '''False''' 2d19h | ||
+ | monitoring 4.4.4 True False '''False''' 2d10h | ||
+ | network 4.4.4 True False '''False''' 40d | ||
+ | node-tuning 4.4.4 True False '''False''' 33h | ||
+ | openshift-apiserver 4.4.4 True False '''False''' 33h | ||
+ | openshift-controller-manager 4.4.4 True False '''False''' 33h | ||
+ | openshift-samples 4.4.4 True False '''False''' 7m37s | ||
+ | operator-lifecycle-manager 4.4.4 True False '''False''' 40d | ||
+ | operator-lifecycle-manager-catalog 4.4.4 True False '''False''' 40d | ||
+ | operator-lifecycle-manager-packageserver 4.4.4 True False '''False''' 33h | ||
+ | service-ca 4.4.4 True False '''False''' 40d | ||
+ | service-catalog-apiserver 4.4.4 True False '''False''' 40d | ||
+ | service-catalog-controller-manager 4.4.4 True False '''False''' 40d | ||
+ | storage 4.4.4 True False '''False''' 2d23h | ||
Line 16: | Line 123: | ||
[[Category:OpenShift]] | [[Category:OpenShift]] | ||
[[Category:V43]] | [[Category:V43]] | ||
+ | [[Category:V39]] | ||
[[Category:ReferenceCards]] | [[Category:ReferenceCards]] |
Revision as of 12:17, 26 May 2020
Contents
1 Links
2 Health Checks
2.1 Nodes
[chris@control(zabbix-dev/system:admin) ~]$ oc get nodes -o wide NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME master01 Ready master,worker 40d v1.17.1 192.168.100.221 <none> RHEL CoreOS 44.81.202005062110-0 (Ootpa) 4.18.0-147.8.1.el8_1.x86_64 cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8 master02 Ready master,worker 40d v1.17.1 192.168.100.222 <none> RHEL CoreOS 44.81.202005062110-0 (Ootpa) 4.18.0-147.8.1.el8_1.x86_64 cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8 master03 Ready master,worker 40d v1.17.1 192.168.100.223 <none> RHEL CoreOS 44.81.202005062110-0 (Ootpa) 4.18.0-147.8.1.el8_1.x86_64 cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8 worker01 Ready worker 40d v1.17.1 192.168.100.231 <none> RHEL CoreOS 44.81.202005062110-0 (Ootpa) 4.18.0-147.8.1.el8_1.x86_64 cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8 worker02 Ready worker 40d v1.17.1 192.168.100.232 <none> RHEL CoreOS 44.81.202005062110-0 (Ootpa) 4.18.0-147.8.1.el8_1.x86_64 cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8
2.2 etcd
2.2.1 v3.9
[root@master(zabbix/admin) ~]# source /etc/etcd/etcd.conf [root@master(zabbix/admin) ~]# etcdctl --cert-file=$ETCD_PEER_CERT_FILE --key-file=$ETCD_PEER_KEY_FILE --ca-file=/etc/etcd/ca.crt --endpoints=$ETCD_LISTEN_CLIENT_URLS cluster-health member da1c9720d5fee664 is healthy: got healthy result from https://192.168.223.74:2379 cluster is healthy
2.2.2 v4.4
[chris@control(zabbix-dev/system:admin) ~]$ oc get etcd -o=jsonpath='{range .items[0].status.conditions[?(@.type=="EtcdMembersAvailable")]}{.message}{"\n"}' master02,master01,master03 members are available, have not started, are unhealthy, are unknown
2.3 router
2.3.1 v3.9
[root@master(zabbix/admin) ~]# oc -n default get deploymentconfigs/router NAME REVISION DESIRED CURRENT TRIGGERED BY router 1 1 1 config
2.4 registry
2.4.1 v3.9
[root@master(zabbix/admin) ~]# oc -n default get deploymentconfigs/docker-registry NAME REVISION DESIRED CURRENT TRIGGERED BY docker-registry 1 1 1 config
2.4.2 v4.4
[chris@control(zabbix-dev/system:admin) ~]$ oc get all -n openshift-image-registry NAME READY STATUS RESTARTS AGE pod/cluster-image-registry-operator-7bff4c7595-hkbqx 2/2 Running 0 2d20h pod/image-registry-6b6745b4f9-wqwdx 1/1 Running 0 2d22h pod/node-ca-6wgpw 1/1 Running 0 2d23h pod/node-ca-gjmhw 1/1 Running 0 2d23h pod/node-ca-gnp7n 1/1 Running 0 2d23h pod/node-ca-gtvt9 1/1 Running 0 2d23h pod/node-ca-ps7v9 1/1 Running 0 2d23h
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE service/image-registry ClusterIP 172.30.229.236 <none> 5000/TCP 40d service/image-registry-operator ClusterIP None <none> 60000/TCP 40d
NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE daemonset.apps/node-ca 5 5 5 5 5 kubernetes.io/os=linux 40d
NAME READY UP-TO-DATE AVAILABLE AGE deployment.apps/cluster-image-registry-operator 1/1 1 1 40d deployment.apps/image-registry 1/1 1 1 40d
NAME DESIRED CURRENT READY AGE replicaset.apps/cluster-image-registry-operator-6f78cddbbc 0 0 0 4d5h replicaset.apps/cluster-image-registry-operator-7bff4c7595 1 1 1 2d23h replicaset.apps/cluster-image-registry-operator-86476f46bc 0 0 0 6d7h replicaset.apps/cluster-image-registry-operator-f9697f69d 0 0 0 40d replicaset.apps/cluster-image-registry-operator-fc9dfb566 0 0 0 3d3h replicaset.apps/image-registry-58cc7948d8 0 0 0 3d3h replicaset.apps/image-registry-688fb696dc 0 0 0 40d replicaset.apps/image-registry-6948d8479b 0 0 0 4d5h replicaset.apps/image-registry-6b6745b4f9 1 1 1 2d23h replicaset.apps/image-registry-7bbdbc5dc7 0 0 0 6d7h replicaset.apps/image-registry-9dc4885b 0 0 0 6d7h replicaset.apps/image-registry-d4cf5448b 0 0 0 40d replicaset.apps/image-registry-f488f9578 0 0 0 6d7h replicaset.apps/image-registry-f5647c6d8 0 0 0 40d
NAME SCHEDULE SUSPEND ACTIVE LAST SCHEDULE AGE cronjob.batch/image-pruner 0 0 * * * True 0 <none> 2d23h
2.5 v4 ClusterOperators
[chris@control(zabbix-dev/system:admin) ~]$ oc -n default get clusteroperators NAME VERSION AVAILABLE PROGRESSING DEGRADED SINCE authentication 4.4.4 True False False 35d cloud-credential 4.4.4 True False False 40d cluster-autoscaler 4.4.4 True False False 40d console 4.4.4 True False False 33h csi-snapshot-controller 4.4.4 True False False 33h dns 4.4.4 True False False 33h etcd 4.4.4 True False False 2d20h image-registry 4.4.4 True False False 33h ingress 4.4.4 True False False 33h insights 4.4.4 True False False 40d kube-apiserver 4.4.4 True False False 40d kube-controller-manager 4.4.4 True False False 2d23h kube-scheduler 4.4.4 True False False 2d23h kube-storage-version-migrator 4.4.4 True False False 33h machine-api 4.4.4 True False False 40d machine-config 4.4.4 True False False 2d19h marketplace 4.4.4 True False False 2d19h monitoring 4.4.4 True False False 2d10h network 4.4.4 True False False 40d node-tuning 4.4.4 True False False 33h openshift-apiserver 4.4.4 True False False 33h openshift-controller-manager 4.4.4 True False False 33h openshift-samples 4.4.4 True False False 7m37s operator-lifecycle-manager 4.4.4 True False False 40d operator-lifecycle-manager-catalog 4.4.4 True False False 40d operator-lifecycle-manager-packageserver 4.4.4 True False False 33h service-ca 4.4.4 True False False 40d service-catalog-apiserver 4.4.4 True False False 40d service-catalog-controller-manager 4.4.4 True False False 40d storage 4.4.4 True False False 2d23h