kubernetes hardway cluster update

2021-12-05T00:00:00.000Z

kubernetes hardway cluster upgrade

from v1.21.0 to v1.22.0

controller

  • update components
    • kube-apiserver
    • kube-controller-manager
    • kube-scheduler
    • kubectl
    • etcd

check version

/usr/local/bin/kube-apiserver --version
/usr/local/bin/kube-controller-manager --version
/usr/local/bin/kube-scheduler --version

check service and stop

sudo systemctl status kube-apiserver kube-controller-manager kube-scheduler
sudo systemctl stop kube-apiserver kube-controller-manager kube-scheduler

download new version

wget -q --show-progress --https-only --timestamping \
  "https://storage.googleapis.com/kubernetes-release/release/v1.22.0/bin/linux/amd64/kube-apiserver" \
  "https://storage.googleapis.com/kubernetes-release/release/v1.22.0/bin/linux/amd64/kube-controller-manager" \
  "https://storage.googleapis.com/kubernetes-release/release/v1.22.0/bin/linux/amd64/kube-scheduler" \
  "https://storage.googleapis.com/kubernetes-release/release/v1.22.0/bin/linux/amd64/kubectl"
chmod +x kube-apiserver kube-controller-manager kube-scheduler kubectl
sudo cp kube-apiserver kube-controller-manager kube-scheduler kubectl /usr/local/bin/

start

sudo systemctl start kube-apiserver kube-controller-manager kube-scheduler
sudo systemctl status kube-apiserver kube-controller-manager kube-scheduler

check server version

$ kubectl version --short                                                                                                                                                              []
Client Version: v1.21.3
Server Version: v1.22.0

etcd

update etcd version from v3.4.15 to v3.5.1

check etcd status and check version

sudo ETCDCTL_API=3 etcdctl member list \
  --endpoints=https://127.0.0.1:2379 \
  --cacert=/etc/etcd/ca.pem \
  --cert=/etc/etcd/kubernetes.pem \
  --key=/etc/etcd/kubernetes-key.pem

etcd --version

sudo systemctl status etcd

stop etcd and download and start etcd

sudo systemctl stop etcd
wget -q --show-progress --https-only --timestamping \
  "https://github.com/etcd-io/etcd/releases/download/v3.5.1/etcd-v3.5.1-linux-amd64.tar.gz"
tar -xvf etcd-v3.5.1-linux-amd64.tar.gz etcd-v3.5.1-linux-amd64/
sudo mv etcd-v3.5.1-linux-amd64/etcd* /usr/local/bin/
sudo systemctl start etcd

Worker

  • update components
    • kubelet
    • kube-proxy
    • kubect;

drain pods

kubectl drain $node_name
## you can use --force option if pod can't delete
  • pre

    $ kubectl get pod --all-namespaces -o=wide
    NAMESPACE      NAME                                       READY   STATUS    RESTARTS       AGE    IP           NODE      NOMINATED NODE   READINESS GATES
    default        basic-discovery-f699b74cd-2kvz7            1/1     Running   0              89m    10.200.2.5   worker2   <none>           <none>
    default        basic-pd-0                                 0/1     Pending   0              89m    <none>       <none>    <none>           <none>
    default        basic-pd-1                                 0/1     Pending   0              89m    <none>       <none>    <none>           <none>
    default        basic-pd-2                                 0/1     Pending   0              89m    <none>       <none>    <none>           <none>
    default        busybox                                    1/1     Running   12 (36m ago)   12h    10.200.2.2   worker2   <none>           <none>
    kube-system    coredns-8494f9c688-b97q4                   1/1     Running   0              12h    10.200.1.2   worker1   <none>           <none>
    kube-system    coredns-8494f9c688-mgrzw                   1/1     Running   0              12h    10.200.3.2   worker3   <none>           <none>
    tidb-admin     tidb-controller-manager-7c79b4567c-8xbfx   1/1     Running   0              12h    10.200.2.4   worker2   <none>           <none>
    tidb-admin     tidb-scheduler-59885789bc-ttzs4            2/2     Running   0              12h    10.200.3.3   worker3   <none>           <none>
    tidb-cluster   basic-discovery-656c6647bd-4tdcg           1/1     Running   0              127m   10.200.1.4   worker1   <none>           <none>
    tidb-cluster   basic-monitor-0                            0/3     Pending   0              126m   <none>       <none>    <none>           <none>
    tidb-cluster   basic-pd-0                                 0/1     Pending   0              127m   <none>       <none>    <none>           <none>
    tidb-cluster   basic-pd-1                                 0/1     Pending   0              127m   <none>       <none>    <none>           <none>
    tidb-cluster   basic-pd-2                                 0/1     Pending   0              127m   <none>       <none>    <none>           <none>
    
  • after drain

    kubectl get pod --all-namespaces -o=wide
    NAMESPACE      NAME                                       READY   STATUS    RESTARTS       AGE    IP           NODE      NOMINATED NODE   READINESS GATES
    default        basic-discovery-f699b74cd-2kvz7            1/1     Running   0              91m    10.200.2.5   worker2   <none>           <none>
    default        basic-pd-0                                 0/1     Pending   0              91m    <none>       <none>    <none>           <none>
    default        basic-pd-1                                 0/1     Pending   0              91m    <none>       <none>    <none>           <none>
    default        basic-pd-2                                 0/1     Pending   0              91m    <none>       <none>    <none>           <none>
    default        busybox                                    1/1     Running   12 (39m ago)   12h    10.200.2.2   worker2   <none>           <none>
    kube-system    coredns-8494f9c688-4rkzk                   1/1     Running   0              82s    10.200.3.4   worker3   <none>           <none>
    kube-system    coredns-8494f9c688-mgrzw                   1/1     Running   0              12h    10.200.3.2   worker3   <none>           <none>
    tidb-admin     tidb-controller-manager-7c79b4567c-8xbfx   1/1     Running   0              12h    10.200.2.4   worker2   <none>           <none>
    tidb-admin     tidb-scheduler-59885789bc-ttzs4            2/2     Running   0              12h    10.200.3.3   worker3   <none>           <none>
    tidb-cluster   basic-discovery-656c6647bd-lbnhn           1/1     Running   0              82s    10.200.2.6   worker2   <none>           <none>
    tidb-cluster   basic-monitor-0                            0/3     Pending   0              129m   <none>       <none>    <none>           <none>
    tidb-cluster   basic-pd-0                                 0/1     Pending   0              129m   <none>       <none>    <none>           <none>
    tidb-cluster   basic-pd-1                                 0/1     Pending   0              129m   <none>       <none>    <none>           <none>
    tidb-cluster   basic-pd-2                                 0/1     Pending   0              129m   <none>       <none>    <none>           <none>
    

check version

/usr/local/bin/kubelet --version
/usr/local/bin/kube-proxy --version

stop and check

sudo systemctl stop kubelet kube-proxy
sudo systemctl status kubelet kube-proxy

###
$ kubectl get node
NAME      STATUS                        ROLES    AGE   VERSION
worker1   NotReady,SchedulingDisabled   <none>   13h   v1.21.0
worker2   Ready                         <none>   13h   v1.21.0
worker3   Ready                         <none>   13h   v1.21.0

download

wget -q --show-progress --https-only --timestamping \
  https://storage.googleapis.com/kubernetes-release/release/v1.22.0/bin/linux/amd64/kubectl \
  https://storage.googleapis.com/kubernetes-release/release/v1.22.0/bin/linux/amd64/kube-proxy \
  https://storage.googleapis.com/kubernetes-release/release/v1.22.0/bin/linux/amd64/kubelet
chmod +x kube-proxy kubelet kubectl
sudo cp kube-proxy kubelet kubectl /usr/local/bin/

restart

sudo systemctl start kubelet kube-proxy
sudo systemctl status kubelet kube-proxy

###
kubectl get nodes
NAME      STATUS                     ROLES    AGE   VERSION
worker1   Ready,SchedulingDisabled   <none>   13h   v1.22.0
worker2   Ready                      <none>   13h   v1.21.0
worker3   Ready                      <none>   13h   v1.21.0

uncron

kubectl uncordon $node_name

###
kubectl get nodes
NAME      STATUS   ROLES    AGE   VERSION
worker1   Ready    <none>   13h   v1.22.0
worker2   Ready    <none>   13h   v1.21.0
worker3   Ready    <none>   13h   v1.21.0

memo

to reschedule pod it can use.

if don’t do anything no pod go to new node, without creating new pods.

https://github.com/kubernetes-sigs/descheduler

Description