基于 kubeadm 搭建集群

kubeadm + containerd + docker

测试环境:Ubuntu20.04

master

环境准备

# Container runtimes
# 容器运行时 使用 containerd (也是Docker开发的) 而不是 Dockerd,因为后续会放弃 Dockerd 的方式
# Docker公司也把containerd和runc拆出来变成了开源项目。这背后是不是巨头们强迫的就说不好了。
# 安装和配置的先决条件
cat > /etc/modules-load.d/containerd.conf << EOF
overlay
br_netfilter
EOF

# 加载
modprobe overlay
modprobe br_netfilter

# 设置必需的 sysctl 参数
cat > /etc/sysctl.d/99-kubernetes-cri.conf << EOF
net.bridge.bridge-nf-call-iptables  = 1
net.ipv4.ip_forward                 = 1
net.bridge.bridge-nf-call-ip6tables = 1
EOF

# 应用 sysctl 参数而无需重新启动
sysctl --system

# 关闭swap,kubeadm 会对其判断
swapoff -a

docker安装

# https://docs.docker.com/engine/install/#server
# 卸载老版本
apt-get remove docker docker-engine docker.io containerd runc

# 安装依赖包
apt-get install -y apt-transport-https ca-certificates curl gnupg lsb-release

# 添加 key
curl https://download.docker.com/linux/ubuntu/gpg | apt-key add -

# 新增源
cat > /etc/apt/sources.list.d/docker.list << EOF
deb https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable
EOF

# 安装 cri
apt-get update
apt-get install -y docker-ce docker-ce-cli containerd.io

# 配置一下 cgroupdriver
[ -f /etc/docker/daemon.json ] || cat > /etc/docker/daemon.json << EOF
{
    "exec-opts": ["native.cgroupdriver=systemd"]
}
EOF

# 重启
systemctl restart docker

# 测试
# 还是启动了管理进程:
# /usr/bin/dockerd -H fd:// --containerd=/run/containerd/containerd.sock
docker run hello-world

master组件安装

# https://blog.csdn.net/weixin_33979363/article/details/89583143
# https://kubernetes.io/zh/docs/setup/production-environment/tools/kubeadm/install-kubeadm/
# apt-get update && apt-get install -y apt-transport-https
curl https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg | apt-key add -
cat > /etc/apt/sources.list.d/kubernetes.list << EOF
deb https://mirrors.aliyun.com/kubernetes/apt/ kubernetes-xenial main
EOF

# kubeadm 不能帮你安装或者管理 kubelet 或 kubectl,kubeadm 只是用来初始化集群的指令
apt-get update
apt-get install -y kubelet kubectl kubeadm

# kubelet 自动补全
grep -q kubectl ~/.bashrc || cat >> ~/.bashrc << 'EOF'
# .bashrc
source <(kubectl completion bash)
EOF

# 加载
source ~/.bashrc

# 锁定版本
apt-mark hold kubelet kubectl kubeadm

# kubelet 现在每隔几秒就会重启,因为它陷入了一个等待 kubeadm 指令的死循环(因为没有加入集群和指定本地配置文件)
tail -n 10  /var/log/syslog

镜像准备

# 查看镜像(主要是查看 coredns 的版本)
kubeadm config images list --image-repository registry.aliyuncs.com/google_containers

# 拉取镜像(coredns 会下载失败)
kubeadm config images pull --image-repository registry.aliyuncs.com/google_containers

# coredns 需要单独拉取
docker pull coredns/coredns:1.8.0
docker tag coredns/coredns:1.8.0 registry.aliyuncs.com/google_containers/coredns/coredns:v1.8.0

初始化集群

# 初始化 kubeadm
kubeadm init \
  --apiserver-advertise-address=10.0.26.180 \
  --image-repository registry.aliyuncs.com/google_containers \
  --service-cidr=10.26.0.0/16 \
  --pod-network-cidr=172.26.0.0/16

# 配置
grep -q KUBECONFIG ~/.bashrc || echo 'export KUBECONFIG=/etc/kubernetes/admin.conf' >> ~/.bashrc
source ~/.bashrc
# 没有安装 Pod 网络附加组件,coredns 将一直是 pendding 状态
kubectl get pods -A

# 修改 coredns 的源(否则漂移到其它node节点也会报错)
# image: registry.aliyuncs.com/google_containers/coredns/coredns:v1.8.0
# 改为:coredns/coredns:1.8.0
# 注意版本没有 v 关键字
kubectl edit deployments.apps coredns -n kube-system

# 安装 Pod 网络附加组件(必须)
# 不安装的话,coredns 将一直是 pendding 状态
wget https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
# 修改: "Network": "172.26.0.0/16", 对应的是 pod-network-cidr 的值
sed -i 's#"Network": "10.244.0.0/16"#"Network": "172.26.0.0/16"#g' kube-flannel.yml
grep Network kube-flannel.yml
kubectl apply -f kube-flannel.yml

# 确保所有 pods 正常运行
kubectl get pods -A -o wide

dashboard

# 部署 Dashboard UI
kubectl apply -f https://raw.githubusercontent.com/kubernetes/dashboard/v2.2.0/aio/deploy/recommended.yaml

# 暴露端口,修改type: ClusterIP->type: NodePort
kubectl -n kubernetes-dashboard edit service kubernetes-dashboard

# 查看生效(获取端口)
kubectl get pods,svc -n kubernetes-dashboard

# 授权
kubectl create serviceaccount dashboard-admin -n kube-system
kubectl create clusterrolebinding dashboard-admin --clusterrole=cluster-admin --serviceaccount=kube-system:dashboard-admin

# 查看 token
kubectl describe secrets -n kube-system $(kubectl -n kube-system get secret | awk '/dashboard-admin/{print $1}')

# 访问(后续任意 node 也可以访问)
https://10.0.26.180:31888/

# 新增token超时31天参数
# Deployments --> kubernetes-dashboard --> 编辑 --> '--namespace=kubernetes-dashboard' 下
- '--token-ttl=2678400'

node(docker版)

环境准备

# 安装和配置的先决条件
cat > /etc/modules-load.d/containerd.conf << EOF
overlay
br_netfilter
EOF

# 加载
modprobe overlay
modprobe br_netfilter

# 设置必需的 sysctl 参数,这些参数在重新启动后仍然存在。
cat > /etc/sysctl.d/99-kubernetes-cri.conf << EOF
net.bridge.bridge-nf-call-iptables  = 1
net.ipv4.ip_forward                 = 1
net.bridge.bridge-nf-call-ip6tables = 1
EOF

# 应用 sysctl 参数而无需重新启动
sysctl --system

# 关闭swap,kubeadm 会对其判断
swapoff -a

docker安装

# https://docs.docker.com/engine/install/#server
# 卸载老版本
apt-get remove docker docker-engine docker.io containerd runc

# 安装依赖包
apt-get install -y apt-transport-https ca-certificates curl gnupg lsb-release

# 添加 key
curl https://download.docker.com/linux/ubuntu/gpg | apt-key add -

# 新增源
cat > /etc/apt/sources.list.d/docker.list << EOF
deb https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable
EOF

# 安装 cri
apt-get update
apt-get install -y docker-ce docker-ce-cli containerd.io

# 配置一下 cgroupdriver
[ -f  /etc/docker/daemon.json ] || cat > /etc/docker/daemon.json << EOF
{
    "exec-opts": ["native.cgroupdriver=systemd"]
}
EOF

# 重启
systemctl restart docker

# 测试
# 还是启动了管理进程:
# /usr/bin/dockerd -H fd:// --containerd=/run/containerd/containerd.sock
docker run hello-world

node组件安装

# 添加 k8s 源
curl https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg | apt-key add -
cat > /etc/apt/sources.list.d/kubernetes.list << EOF
deb https://mirrors.aliyun.com/kubernetes/apt/ kubernetes-xenial main
EOF

# kubeadm 不能 帮你安装或者管理 kubelet 或 kubectl,kubeadm 只是用来初始化集群的指令
apt-get update
apt-get install -y kubelet kubectl kubeadm

# 锁定版本
apt-mark hold kubelet kubectl kubeadm

# kubelet 现在每隔几秒就会重启,因为它陷入了一个等待 kubeadm 指令的死循环(因为没有加入集群和指定本地配置文件)
tail -n 10  /var/log/syslog

node节点加入

# 加入前没有配置,所有报错正常
# "Failed to load kubelet config file" err="failed to load Kubelet config file /var/lib/kubelet/config.yaml
kubeadm join 10.0.26.180:6443 --token lk2e6n.393mxe5a38burwhz \
    --discovery-token-ca-cert-hash sha256:328d6d3e5e3df4ec441e977158af262669a4a149636abff41fd67b3611ace625

# 查看节点状态
kubectl get pods -A -o wide

node(containerd版)

官方文档:Dockershim Deprecation FAQ | Kubernetes

Containerd 的前世今生和级入门教程

2017 年, Docker 将自身从 v1.11 起开始引入的容器运行时 containerd 捐给了 CNCF 目前 Kubelet 使用一个名为 dockershim 的模块,该模块实现了对 Docker 的 CRI 支持,1.23+版本将删除dockershim dockershim一直都是 Kubernetes 社区为了能让 Docker 成为其支持的容器运行时,所维护的一个兼容程序

架构图

docker架构 和 containerd 1.0 cri-containerd 架构:

containerd 1.1 cri-containerd 新的架构:

环境准备

# 安装和配置的先决条件
cat > /etc/modules-load.d/containerd.conf << EOF
overlay
br_netfilter
EOF
# 加载
modprobe overlay
modprobe br_netfilter

# 设置必需的 sysctl 参数,这些参数在重新启动后仍然存在。
cat > /etc/sysctl.d/99-kubernetes-cri.conf << EOF
net.bridge.bridge-nf-call-iptables  = 1
net.ipv4.ip_forward                 = 1
net.bridge.bridge-nf-call-ip6tables = 1
EOF

# 应用 sysctl 参数而无需重新启动
sysctl --system

# 关闭swap,kubeadm 会对其判断
swapoff -a

containerd安装

# https://blog.csdn.net/tao12345666333/article/details/110914360
apt-get update
apt-get install containerd runc cri-tools

# 新增配置
# https://github.com/containerd/containerd/blob/master/docs/man/containerd-config.toml.5.md
mkdir -p /etc/containerd
[ -f /etc/containerd/config.toml ] || (containerd config default > /etc/containerd/config.toml)

# 新增配置参数,使用 systemd cgroup 驱动程序
# https://kubernetes.io/zh/docs/setup/production-environment/container-runtimes/
# 1.3.3 版本添加如下
# [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
#   ...
#   [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
#     SystemdCgroup = true

# 1.4+ 版本:sed -i.1 's/SystemdCgroup = false/SystemdCgroup = true/g' /etc/containerd/config.toml
sed -i '/containerd.runtimes.runc]/a \          [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]\n            SystemdCgroup = true' /etc/containerd/config.toml
sed -i 's#k8s.gcr.io#registry.aliyuncs.com/google_containers#g' /etc/containerd/config.toml

# 重启
systemctl restart containerd

# 验证是否开启 systemd cgroup 驱动程序
# 目前有 bug,显示是:SystemdCgroup: false
grep SystemdCgroup /var/log/syslog
# crictl info | grep systemdCgroup

node组件安装

# 添加 k8s 源
curl https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg | apt-key add -
cat > /etc/apt/sources.list.d/kubernetes.list << EOF
deb https://mirrors.aliyun.com/kubernetes/apt/ kubernetes-xenial main
EOF

# kubeadm 不能 帮你安装或者管理 kubelet 或 kubectl,kubeadm 只是用来初始化集群的指令
apt-get update
apt-get install -y kubelet kubectl kubeadm
systemctl enable kubelet && systemctl start kubelet

# 锁定版本
apt-mark hold kubelet kubectl kubeadm

# 管理工具配置文件(后续使用nerdctl)
[ -f /etc/crictl.yaml ] || cat > /etc/crictl.yaml << EOF
runtime-endpoint: unix:///run/containerd/containerd.sock
image-endpoint: unix:///run/containerd/containerd.sock
timeout: 2
debug: false
pull-image-on-create: false
EOF

# 测试管理工具
crictl ps

# kubelet 现在每隔几秒就会重启,因为它陷入了一个等待 kubeadm 指令的死循环(因为没有加入集群和指定本地配置文件)
tail -n 10  /var/log/syslog

node节点加入

# 加入前没有配置,所有报错正常
# "Failed to load kubelet config file" err="failed to load Kubelet config file /var/lib/kubelet/config.yaml
kubeadm join 10.0.26.180:6443 --token lk2e6n.393mxe5a38burwhz \
    --discovery-token-ca-cert-hash sha256:328d6d3e5e3df4ec441e977158af262669a4a149636abff41fd67b3611ace625

containerd 客户端工具 nerdctl

containerd –> containerdctl

# containerd 客户端工具 nerdctl
# http://www.rhce.cc/2901.html
cd /usr/local/src/
wget https://github.com/containerd/nerdctl/releases/download/v0.8.3/nerdctl-0.8.3-linux-amd64.tar.gz
[ -f /usr/bin/nerdctl ] || (tar xzf nerdctl-0.8.3-linux-amd64.tar.gz && cp -v nerdctl /usr/bin/)

# 自动补全
grep -q nerdctl ~/.bashrc || echo 'source <(nerdctl completion bash)' >> ~/.bashrc
source ~/.bashrc

# 测试
nerdctl ps

运行时查看

# http://www.rhce.cc/2872.html
root@k8s-master-01:~# kubectl get nodes -o wide
NAME            STATUS   ......   CONTAINER-RUNTIME
k8s-master-01   Ready    ......   docker://20.10.6
k8s-node-01     Ready    ......   docker://20.10.6 				# 这个代表使用的是 docker 运行时
k8s-node-02     Ready    ......   containerd://1.3.3-0ubuntu2.3 # 这个代表使用的是 containerd 运行时

测试

# 看 coredns 能不能在 containerd 的节点上面运行
kubectl delete pod -n kube-system coredns-b5648d655-jvz6t
kubectl get pod -o wide -A
kubectl delete pod -n kube-system coredns-b5648d655-d2rth
kubectl get pod -o wide -A

参考