OpenShift v4x health check

From Bitbull Wiki
Revision as of 12:17, 26 May 2020 by Chris (talk | contribs)
Jump to navigation Jump to search

1 Links

https://docs.openshift.com/container-platform/4.4/backup_and_restore/replacing-unhealthy-etcd-member.html

2 Health Checks

2.1 Nodes

[chris@control(zabbix-dev/system:admin) ~]$ oc get nodes -o wide
NAME       STATUS   ROLES           AGE   VERSION   INTERNAL-IP       EXTERNAL-IP   OS-IMAGE                                   KERNEL-VERSION                CONTAINER-RUNTIME
master01   Ready    master,worker   40d   v1.17.1   192.168.100.221   <none>        RHEL CoreOS 44.81.202005062110-0 (Ootpa)   4.18.0-147.8.1.el8_1.x86_64   cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8
master02   Ready    master,worker   40d   v1.17.1   192.168.100.222   <none>        RHEL CoreOS 44.81.202005062110-0 (Ootpa)   4.18.0-147.8.1.el8_1.x86_64   cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8
master03   Ready    master,worker   40d   v1.17.1   192.168.100.223   <none>        RHEL CoreOS 44.81.202005062110-0 (Ootpa)   4.18.0-147.8.1.el8_1.x86_64   cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8
worker01   Ready    worker          40d   v1.17.1   192.168.100.231   <none>        RHEL CoreOS 44.81.202005062110-0 (Ootpa)   4.18.0-147.8.1.el8_1.x86_64   cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8
worker02   Ready    worker          40d   v1.17.1   192.168.100.232   <none>        RHEL CoreOS 44.81.202005062110-0 (Ootpa)   4.18.0-147.8.1.el8_1.x86_64   cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8


2.2 etcd

2.2.1 v3.9

[root@master(zabbix/admin) ~]# source /etc/etcd/etcd.conf
[root@master(zabbix/admin) ~]# etcdctl --cert-file=$ETCD_PEER_CERT_FILE --key-file=$ETCD_PEER_KEY_FILE   --ca-file=/etc/etcd/ca.crt --endpoints=$ETCD_LISTEN_CLIENT_URLS cluster-health
member da1c9720d5fee664 is healthy: got healthy result from https://192.168.223.74:2379
cluster is healthy

2.2.2 v4.4

[chris@control(zabbix-dev/system:admin) ~]$ oc get etcd -o=jsonpath='{range .items[0].status.conditions[?(@.type=="EtcdMembersAvailable")]}{.message}{"\n"}'
master02,master01,master03 members are available,  have not started,  are unhealthy,  are unknown


2.3 router

2.3.1 v3.9

[root@master(zabbix/admin) ~]# oc -n default get deploymentconfigs/router
NAME      REVISION   DESIRED   CURRENT   TRIGGERED BY
router    1          1         1         config


2.4 registry

2.4.1 v3.9

[root@master(zabbix/admin) ~]# oc -n default get deploymentconfigs/docker-registry
NAME              REVISION   DESIRED   CURRENT   TRIGGERED BY
docker-registry   1          1         1         config

2.4.2 v4.4

[chris@control(zabbix-dev/system:admin) ~]$ oc get all -n openshift-image-registry NAME READY STATUS RESTARTS AGE pod/cluster-image-registry-operator-7bff4c7595-hkbqx 2/2 Running 0 2d20h pod/image-registry-6b6745b4f9-wqwdx 1/1 Running 0 2d22h pod/node-ca-6wgpw 1/1 Running 0 2d23h pod/node-ca-gjmhw 1/1 Running 0 2d23h pod/node-ca-gnp7n 1/1 Running 0 2d23h pod/node-ca-gtvt9 1/1 Running 0 2d23h pod/node-ca-ps7v9 1/1 Running 0 2d23h

NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE service/image-registry ClusterIP 172.30.229.236 <none> 5000/TCP 40d service/image-registry-operator ClusterIP None <none> 60000/TCP 40d

NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE daemonset.apps/node-ca 5 5 5 5 5 kubernetes.io/os=linux 40d

NAME READY UP-TO-DATE AVAILABLE AGE deployment.apps/cluster-image-registry-operator 1/1 1 1 40d deployment.apps/image-registry 1/1 1 1 40d

NAME DESIRED CURRENT READY AGE replicaset.apps/cluster-image-registry-operator-6f78cddbbc 0 0 0 4d5h replicaset.apps/cluster-image-registry-operator-7bff4c7595 1 1 1 2d23h replicaset.apps/cluster-image-registry-operator-86476f46bc 0 0 0 6d7h replicaset.apps/cluster-image-registry-operator-f9697f69d 0 0 0 40d replicaset.apps/cluster-image-registry-operator-fc9dfb566 0 0 0 3d3h replicaset.apps/image-registry-58cc7948d8 0 0 0 3d3h replicaset.apps/image-registry-688fb696dc 0 0 0 40d replicaset.apps/image-registry-6948d8479b 0 0 0 4d5h replicaset.apps/image-registry-6b6745b4f9 1 1 1 2d23h replicaset.apps/image-registry-7bbdbc5dc7 0 0 0 6d7h replicaset.apps/image-registry-9dc4885b 0 0 0 6d7h replicaset.apps/image-registry-d4cf5448b 0 0 0 40d replicaset.apps/image-registry-f488f9578 0 0 0 6d7h replicaset.apps/image-registry-f5647c6d8 0 0 0 40d

NAME SCHEDULE SUSPEND ACTIVE LAST SCHEDULE AGE cronjob.batch/image-pruner 0 0 * * * True 0 <none> 2d23h



2.5 v4 ClusterOperators

[chris@control(zabbix-dev/system:admin) ~]$ oc -n default get clusteroperators
NAME                                       VERSION   AVAILABLE   PROGRESSING   DEGRADED   SINCE
authentication                             4.4.4     True        False         False      35d
cloud-credential                           4.4.4     True        False         False      40d
cluster-autoscaler                         4.4.4     True        False         False      40d
console                                    4.4.4     True        False         False      33h
csi-snapshot-controller                    4.4.4     True        False         False      33h
dns                                        4.4.4     True        False         False      33h
etcd                                       4.4.4     True        False         False      2d20h
image-registry                             4.4.4     True        False         False      33h
ingress                                    4.4.4     True        False         False      33h
insights                                   4.4.4     True        False         False      40d
kube-apiserver                             4.4.4     True        False         False      40d
kube-controller-manager                    4.4.4     True        False         False      2d23h
kube-scheduler                             4.4.4     True        False         False      2d23h
kube-storage-version-migrator              4.4.4     True        False         False      33h
machine-api                                4.4.4     True        False         False      40d
machine-config                             4.4.4     True        False         False      2d19h
marketplace                                4.4.4     True        False         False      2d19h
monitoring                                 4.4.4     True        False         False      2d10h
network                                    4.4.4     True        False         False      40d
node-tuning                                4.4.4     True        False         False      33h
openshift-apiserver                        4.4.4     True        False         False      33h
openshift-controller-manager               4.4.4     True        False         False      33h
openshift-samples                          4.4.4     True        False         False      7m37s
operator-lifecycle-manager                 4.4.4     True        False         False      40d
operator-lifecycle-manager-catalog         4.4.4     True        False         False      40d
operator-lifecycle-manager-packageserver   4.4.4     True        False         False      33h
service-ca                                 4.4.4     True        False         False      40d
service-catalog-apiserver                  4.4.4     True        False         False      40d
service-catalog-controller-manager         4.4.4     True        False         False      40d
storage                                    4.4.4     True        False         False      2d23h