OpenShift v4x health check

From Bitbull Wiki
Revision as of 12:18, 26 May 2020 by Chris (talk | contribs) (→‎v4.4)
Jump to navigation Jump to search

1 Links

https://docs.openshift.com/container-platform/4.4/backup_and_restore/replacing-unhealthy-etcd-member.html

2 Health Checks

2.1 Nodes

[chris@control(zabbix-dev/system:admin) ~]$ oc get nodes -o wide
NAME       STATUS   ROLES           AGE   VERSION   INTERNAL-IP       EXTERNAL-IP   OS-IMAGE                                   KERNEL-VERSION                CONTAINER-RUNTIME
master01   Ready    master,worker   40d   v1.17.1   192.168.100.221   <none>        RHEL CoreOS 44.81.202005062110-0 (Ootpa)   4.18.0-147.8.1.el8_1.x86_64   cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8
master02   Ready    master,worker   40d   v1.17.1   192.168.100.222   <none>        RHEL CoreOS 44.81.202005062110-0 (Ootpa)   4.18.0-147.8.1.el8_1.x86_64   cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8
master03   Ready    master,worker   40d   v1.17.1   192.168.100.223   <none>        RHEL CoreOS 44.81.202005062110-0 (Ootpa)   4.18.0-147.8.1.el8_1.x86_64   cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8
worker01   Ready    worker          40d   v1.17.1   192.168.100.231   <none>        RHEL CoreOS 44.81.202005062110-0 (Ootpa)   4.18.0-147.8.1.el8_1.x86_64   cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8
worker02   Ready    worker          40d   v1.17.1   192.168.100.232   <none>        RHEL CoreOS 44.81.202005062110-0 (Ootpa)   4.18.0-147.8.1.el8_1.x86_64   cri-o://1.17.4-8.dev.rhaos4.4.git5f5c5e4.el8


2.2 etcd

2.2.1 v3.9

[root@master(zabbix/admin) ~]# source /etc/etcd/etcd.conf
[root@master(zabbix/admin) ~]# etcdctl --cert-file=$ETCD_PEER_CERT_FILE --key-file=$ETCD_PEER_KEY_FILE   --ca-file=/etc/etcd/ca.crt --endpoints=$ETCD_LISTEN_CLIENT_URLS cluster-health
member da1c9720d5fee664 is healthy: got healthy result from https://192.168.223.74:2379
cluster is healthy

2.2.2 v4.4

[chris@control(zabbix-dev/system:admin) ~]$ oc get etcd -o=jsonpath='{range .items[0].status.conditions[?(@.type=="EtcdMembersAvailable")]}{.message}{"\n"}'
master02,master01,master03 members are available,  have not started,  are unhealthy,  are unknown


2.3 router

2.3.1 v3.9

[root@master(zabbix/admin) ~]# oc -n default get deploymentconfigs/router
NAME      REVISION   DESIRED   CURRENT   TRIGGERED BY
router    1          1         1         config


2.4 registry

2.4.1 v3.9

[root@master(zabbix/admin) ~]# oc -n default get deploymentconfigs/docker-registry
NAME              REVISION   DESIRED   CURRENT   TRIGGERED BY
docker-registry   1          1         1         config

2.4.2 v4.4

[chris@control(zabbix-dev/system:admin) ~]$ oc get all -n openshift-image-registry
NAME                                                   READY   STATUS    RESTARTS   AGE
pod/cluster-image-registry-operator-7bff4c7595-hkbqx   2/2     Running   0          2d20h
pod/image-registry-6b6745b4f9-wqwdx                    1/1     Running   0          2d22h
pod/node-ca-6wgpw                                      1/1     Running   0          2d23h
pod/node-ca-gjmhw                                      1/1     Running   0          2d23h
pod/node-ca-gnp7n                                      1/1     Running   0          2d23h
pod/node-ca-gtvt9                                      1/1     Running   0          2d23h
pod/node-ca-ps7v9                                      1/1     Running   0          2d23h

NAME                              TYPE        CLUSTER-IP       EXTERNAL-IP   PORT(S)     AGE
service/image-registry            ClusterIP   172.30.229.236   <none>        5000/TCP    40d
service/image-registry-operator   ClusterIP   None             <none>        60000/TCP   40d

NAME                     DESIRED   CURRENT   READY   UP-TO-DATE   AVAILABLE   NODE SELECTOR            AGE
daemonset.apps/node-ca   5         5         5       5            5           kubernetes.io/os=linux   40d

NAME                                              READY   UP-TO-DATE   AVAILABLE   AGE
deployment.apps/cluster-image-registry-operator   1/1     1            1           40d
deployment.apps/image-registry                    1/1     1            1           40d

NAME                                                         DESIRED   CURRENT   READY   AGE
replicaset.apps/cluster-image-registry-operator-6f78cddbbc   0         0         0       4d5h
replicaset.apps/cluster-image-registry-operator-7bff4c7595   1         1         1       2d23h
replicaset.apps/cluster-image-registry-operator-86476f46bc   0         0         0       6d7h
replicaset.apps/cluster-image-registry-operator-f9697f69d    0         0         0       40d
replicaset.apps/cluster-image-registry-operator-fc9dfb566    0         0         0       3d3h
replicaset.apps/image-registry-58cc7948d8                    0         0         0       3d3h
replicaset.apps/image-registry-688fb696dc                    0         0         0       40d
replicaset.apps/image-registry-6948d8479b                    0         0         0       4d5h
replicaset.apps/image-registry-6b6745b4f9                    1         1         1       2d23h
replicaset.apps/image-registry-7bbdbc5dc7                    0         0         0       6d7h
replicaset.apps/image-registry-9dc4885b                      0         0         0       6d7h
replicaset.apps/image-registry-d4cf5448b                     0         0         0       40d
replicaset.apps/image-registry-f488f9578                     0         0         0       6d7h
replicaset.apps/image-registry-f5647c6d8                     0         0         0       40d

NAME                         SCHEDULE    SUSPEND   ACTIVE   LAST SCHEDULE   AGE
cronjob.batch/image-pruner   0 0 * * *   True      0        <none>          2d23h

2.5 v4 ClusterOperators

[chris@control(zabbix-dev/system:admin) ~]$ oc -n default get clusteroperators
NAME                                       VERSION   AVAILABLE   PROGRESSING   DEGRADED   SINCE
authentication                             4.4.4     True        False         False      35d
cloud-credential                           4.4.4     True        False         False      40d
cluster-autoscaler                         4.4.4     True        False         False      40d
console                                    4.4.4     True        False         False      33h
csi-snapshot-controller                    4.4.4     True        False         False      33h
dns                                        4.4.4     True        False         False      33h
etcd                                       4.4.4     True        False         False      2d20h
image-registry                             4.4.4     True        False         False      33h
ingress                                    4.4.4     True        False         False      33h
insights                                   4.4.4     True        False         False      40d
kube-apiserver                             4.4.4     True        False         False      40d
kube-controller-manager                    4.4.4     True        False         False      2d23h
kube-scheduler                             4.4.4     True        False         False      2d23h
kube-storage-version-migrator              4.4.4     True        False         False      33h
machine-api                                4.4.4     True        False         False      40d
machine-config                             4.4.4     True        False         False      2d19h
marketplace                                4.4.4     True        False         False      2d19h
monitoring                                 4.4.4     True        False         False      2d10h
network                                    4.4.4     True        False         False      40d
node-tuning                                4.4.4     True        False         False      33h
openshift-apiserver                        4.4.4     True        False         False      33h
openshift-controller-manager               4.4.4     True        False         False      33h
openshift-samples                          4.4.4     True        False         False      7m37s
operator-lifecycle-manager                 4.4.4     True        False         False      40d
operator-lifecycle-manager-catalog         4.4.4     True        False         False      40d
operator-lifecycle-manager-packageserver   4.4.4     True        False         False      33h
service-ca                                 4.4.4     True        False         False      40d
service-catalog-apiserver                  4.4.4     True        False         False      40d
service-catalog-controller-manager         4.4.4     True        False         False      40d
storage                                    4.4.4     True        False         False      2d23h