kubelet源码分析 添加 /删除pod (SyncPod、SyncTerminatingPod、SyncTerminatedPod)

kubelet版本v1.27.1

前面讲过删除pod和添加pod,都是在kubelet文件的的HandlePodAdditions函数和pod_workers.go文件中主要流程。 这篇文章是当pod_workers.go流程结束后,重新触发kubelet.go文件的SyncPod、SyncTerminatingPod、SyncTerminatedPod函数进行处理。

这三个函数主要是对kubelet这一层最后的校验,然后将要调用CRI部分进行创建容器了。

kubesphere 控制界面中删除容器组删除不了是怎么回事 kubelet删除pod_kubelet

这篇就是managePodLoop(1.27版本改名为podWorkerLoop)后的syncPod的主要功能

一、SyncPod

这里也是我的PR介绍的地方,TODO:113606是kubelet主管发起的的issue,解决方案中链接指向了我的PR

这是整体函数,其中的分支函数会在下面步骤中介绍

步骤1

做一些链路追踪,记录pod情况

记录第一次看到pod的情况

如果正在创建,记录pod工作者的启动延迟

将pod信息和podstatus(缓存对象信息)转换为status运行的信息,步骤2

将pod的ip设置会podips(如果使用主机网络generateAPIPodStatus函数中会设置)

如果状态是运行完成或者失败,不允许改变pod的状态了,直接记录状态并且返回

检查pod是否可以运行,这个函数在添加pod中有类似的

如果不允许运行,记录最终状态,并且记录原因

将最新的信息记录到status_manager管理器中,(后面文章会讲到这个函数,比较大的组件,作用是与api-server同步最新信息)

如果是是不允许运行的pod,进行删除。向下调用killpod函数(后续文章讲解)

如果网络插件还没有准备好,只有在使用主机网络的情况下才启动pod,否则返回错误

确保kubelet知道pod所使用的参考秘密或配置映射。

创建pcm用来cgroup的创建;如果这个pod准备删除了,则不会创建和更新;

检查这个容器是否第一次运行;如果pod不是第一次运行, 并且cgroup也没有创建。则需要删除pod;删除掉pod后,如果设置的重启策略不是RestartPolicyNever则重新进行创建;检查是否创建成功了

是否为静态pod;如果静态pod有镜像。如果镜像存在并且准备删除,则直接删除这个镜像;如果已经删除了并且没有镜像了,重新创建镜像

创建pod的挂载目录

添加探针

v1.27版本新特性,如果pod开启了InPlacePodVerticalScaling。则可以修改pod的request资源,不会重启pod。InPlacePodVerticalScaling新特性文章会介绍这里

开始同步pod(调用容器运行时,最终调用CRI创建容器)后续文章介绍

再次验证pod是否进行了资源的扩展,如果扩展则更新缓存

func (kl *Kubelet) SyncPod(_ context.Context, updateType kubetypes.SyncPodType, pod, mirrorPod *v1.Pod, podStatus *kubecontainer.PodStatus) (isTerminal bool, err error) {
	// TODO(#113606): connect this with the incoming context parameter, which comes from the pod worker.
	//做一些链路追踪,记录pod情况
	ctx, otelSpan := kl.tracer.Start(context.TODO(), "syncPod", trace.WithAttributes(
		attribute.String("k8s.pod.uid", string(pod.UID)),
		attribute.String("k8s.pod", klog.KObj(pod).String()),
		attribute.String("k8s.pod.name", pod.Name),
		attribute.String("k8s.pod.update_type", updateType.String()),
		attribute.String("k8s.namespace.name", pod.Namespace),
	))
	klog.V(4).InfoS("SyncPod enter", "pod", klog.KObj(pod), "podUID", pod.UID)
	defer func() {
		klog.V(4).InfoS("SyncPod exit", "pod", klog.KObj(pod), "podUID", pod.UID, "isTerminal", isTerminal)
		otelSpan.End()
	}()
	//记录第一次看到pod的情况
	var firstSeenTime time.Time
	if firstSeenTimeStr, ok := pod.Annotations[kubetypes.ConfigFirstSeenAnnotationKey]; ok {
		firstSeenTime = kubetypes.ConvertToTimestamp(firstSeenTimeStr).Get()
	}
	//如果正在创建,记录pod工作者的启动延迟
	if updateType == kubetypes.SyncPodCreate {
		if !firstSeenTime.IsZero() {	
		metrics.PodWorkerStartDuration.Observe(metrics.SinceInSeconds(firstSeenTime))
		} else {
			klog.V(3).InfoS("First seen time not recorded for pod",
				"podUID", pod.UID,
				"pod", klog.KObj(pod))
		}
	}
	//将pod信息和podstatus(缓存对象信息)转换为status运行的信息,步骤2
	apiPodStatus := kl.generateAPIPodStatus(pod, podStatus, false)
	//将pod的ip设置会podips(如果使用主机网络generateAPIPodStatus函数中会设置)
	podStatus.IPs = make([]string, 0, len(apiPodStatus.PodIPs))
	for _, ipInfo := range apiPodStatus.PodIPs {
		podStatus.IPs = append(podStatus.IPs, ipInfo.IP)
	}
	if len(podStatus.IPs) == 0 && len(apiPodStatus.PodIP) > 0 {
		podStatus.IPs = []string{apiPodStatus.PodIP}
	}
	//如果状态是运行完成或者失败,不允许改变pod的状态了,直接记录状态并且返回
	if apiPodStatus.Phase == v1.PodSucceeded || apiPodStatus.Phase == v1.PodFailed {
		kl.statusManager.SetPodStatus(pod, apiPodStatus)
		isTerminal = true
		return isTerminal, nil
	}
	//检查pod是否可以运行,这个函数在[添加pod]()中有类似的
	runnable := kl.canRunPod(pod)
	if !runnable.Admit {
	    //如果不允许运行,记录最终状态,并且记录原因
		if apiPodStatus.Phase != v1.PodFailed && apiPodStatus.Phase != v1.PodSucceeded {
			apiPodStatus.Phase = v1.PodPending
		}
		apiPodStatus.Reason = runnable.Reason
		apiPodStatus.Message = runnable.Message
		const waitingReason = "Blocked"
		for _, cs := range apiPodStatus.InitContainerStatuses {
			if cs.State.Waiting != nil {
				cs.State.Waiting.Reason = waitingReason
			}
		}
		for _, cs := range apiPodStatus.ContainerStatuses {
			if cs.State.Waiting != nil {
				cs.State.Waiting.Reason = waitingReason
			}
		}
	}
	//将最新的信息记录到statusManager中,(后续文章会讲到这个函数,比较大的组件,作用是与api-server同步最新信息)
	kl.statusManager.SetPodStatus(pod, apiPodStatus)
	if !runnable.Admit {
		klog.V(2).InfoS("Pod is not runnable and must have running containers stopped", "pod", klog.KObj(pod), "podUID", pod.UID, "message", runnable.Message)
		var syncErr error
		//转换成pod类型
		p := kubecontainer.ConvertPodStatusToRunningPod(kl.getRuntime().Type(), podStatus)
		//这个pod不可创建,进行删除。向下调用killpod函数(后续文章讲解)
		if err := kl.killPod(ctx, pod, p, nil); err != nil {
			kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err)
			syncErr = fmt.Errorf("error killing pod: %v", err)
			utilruntime.HandleError(syncErr)
		} else {
			syncErr = fmt.Errorf("pod cannot be run: %s", runnable.Message)
		}
		return false, syncErr
	}
	//如果网络插件还没有准备好,只有在使用主机网络的情况下才启动pod,否则返回错误
	if err := kl.runtimeState.networkErrors(); err != nil && !kubecontainer.IsHostNetworkPod(pod) {
		kl.recorder.Eventf(pod, v1.EventTypeWarning, events.NetworkNotReady, "%s: %v", NetworkNotReadyErrorMsg, err)
		return false, fmt.Errorf("%s: %v", NetworkNotReadyErrorMsg, err)
	}
	//确保kubelet知道pod所使用的参考秘密或配置映射。
	if !kl.podWorkers.IsPodTerminationRequested(pod.UID) {
		if kl.secretManager != nil {
			kl.secretManager.RegisterPod(pod)
		}
		if kl.configMapManager != nil {
			kl.configMapManager.RegisterPod(pod)
		}
	}
	//创建pcm用来cgroup的创建
	pcm := kl.containerManager.NewPodContainerManager()
	//如果这个pod准备删除了,则不会创建和更新
	if !kl.podWorkers.IsPodTerminationRequested(pod.UID) {
		firstSync := true
		//检查这个容器是否第一次运行
		for _, containerStatus := range apiPodStatus.ContainerStatuses {
			if containerStatus.State.Running != nil {
				firstSync = false
				break
			}
		}
		podKilled := false
		//如果pod不是第一次运行, 并且cgroup也没有创建。则需要删除pod
		if !pcm.Exists(pod) && !firstSync {
			p := kubecontainer.ConvertPodStatusToRunningPod(kl.getRuntime().Type(), podStatus)
			if err := kl.killPod(ctx, pod, p, nil); err == nil {
				podKilled = true
			} else {
				klog.ErrorS(err, "KillPod failed", "pod", klog.KObj(pod), "podStatus", podStatus)
			}
		}
		//删除掉pod后,如果设置的重启策略不是RestartPolicyNever则重新进行创建
		if !(podKilled && pod.Spec.RestartPolicy == v1.RestartPolicyNever) {
			if !pcm.Exists(pod) {
				if err := kl.containerManager.UpdateQOSCgroups(); err != nil {
					klog.V(2).InfoS("Failed to update QoS cgroups while syncing pod", "pod", klog.KObj(pod), "err", err)
				}
				//检查是否创建成功了
				if err := pcm.EnsureExists(pod); err != nil {
					kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToCreatePodContainer, "unable to ensure pod container exists: %v", err)
					return false, fmt.Errorf("failed to ensure that the pod: %v cgroups exist and are correctly applied: %v", pod.UID, err)
				}
			}
		}
	}
	//是否为静态pod
	if kubetypes.IsStaticPod(pod) {
		deleted := false
		//如果静态pod有镜像。如果镜像存在并且准备删除,则直接删除这个镜像
		if mirrorPod != nil {
			if mirrorPod.DeletionTimestamp != nil || !kl.podManager.IsMirrorPodOf(mirrorPod, pod) {
				klog.InfoS("Trying to delete pod", "pod", klog.KObj(pod), "podUID", mirrorPod.ObjectMeta.UID)
				podFullName := kubecontainer.GetPodFullName(pod)
				var err error
				deleted, err = kl.podManager.DeleteMirrorPod(podFullName, &mirrorPod.ObjectMeta.UID)
				if deleted {
					klog.InfoS("Deleted mirror pod because it is outdated", "pod", klog.KObj(mirrorPod))
				} else if err != nil {
					klog.ErrorS(err, "Failed deleting mirror pod", "pod", klog.KObj(mirrorPod))
				}
			}
		}
		//如果已经删除了并且没有镜像了,重新创建镜像
		if mirrorPod == nil || deleted {
			node, err := kl.GetNode()
			if err != nil || node.DeletionTimestamp != nil {
				klog.V(4).InfoS("No need to create a mirror pod, since node has been removed from the cluster", "node", klog.KRef("", string(kl.nodeName)))
			} else {
				klog.V(4).InfoS("Creating a mirror pod for static pod", "pod", klog.KObj(pod))
				if err := kl.podManager.CreateMirrorPod(pod); err != nil {
					klog.ErrorS(err, "Failed creating a mirror pod for", "pod", klog.KObj(pod))
				}
			}
		}
	}
	//创建pod的挂载目录
	if err := kl.makePodDataDirs(pod); err != nil {
		kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToMakePodDataDirectories, "error making pod data directories: %v", err)
		klog.ErrorS(err, "Unable to make pod data directories for pod", "pod", klog.KObj(pod))
		return false, err
	}
	if !kl.podWorkers.IsPodTerminationRequested(pod.UID) {
	    //等待挂载完成
		if err := kl.volumeManager.WaitForAttachAndMount(pod); err != nil {
			kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedMountVolume, "Unable to attach or mount volumes: %v", err)
			klog.ErrorS(err, "Unable to attach or mount volumes for pod; skipping pod", "pod", klog.KObj(pod))
			return false, err
		}
	}
	//获得sercrets的名字
	pullSecrets := kl.getPullSecretsForPod(pod)
	//添加探针
	kl.probeManager.AddPod(pod)
	//这里是v1.27版本新特性,如果pod开启了InPlacePodVerticalScaling。则可以修改pod的request资源,不会重启pod。后续文章会介绍这里
	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
		if kl.podWorkers.CouldHaveRunningContainers(pod.UID) && !kubetypes.IsStaticPod(pod) {
			pod = kl.handlePodResourcesResize(pod)
		}
	}
	//开始同步pod(调用容器运行时,最终调用CRI创建容器)后续文章介绍
	result := kl.containerRuntime.SyncPod(ctx, pod, podStatus, pullSecrets, kl.backOff)
	kl.reasonCache.Update(pod.UID, result)
	if err := result.Error(); err != nil {
		for _, r := range result.SyncResults {
			if r.Error != kubecontainer.ErrCrashLoopBackOff && r.Error != images.ErrImagePullBackOff {
				return false, err
			}
		}
		return false, nil
	}
	//再次验证pod是否进行了资源的扩展,如果扩展则更新缓存
	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) && isPodResizeInProgress(pod, &apiPodStatus) {
		runningPod := kubecontainer.ConvertPodStatusToRunningPod(kl.getRuntime().Type(), podStatus)
		if err, _ := kl.pleg.UpdateCache(&runningPod, pod.UID); err != nil {
			klog.ErrorS(err, "Failed to update pod cache", "pod", klog.KObj(pod))
			return false, err
		}
	}
	return false, nil
}

步骤2. generateAPIPodStatus函数 文件位置,pkg/kubelet/kubelet_pods.go

  • 获得历史状态
  • 进行状态转换,步骤3
  • 确定就地升级的资源大小
  • 判断最终的Phase状态
  • 如果历史或者换成状态是完成或者失败。则当前状态等同,不允许修改
  • 如果状态相同,直接存入历史的错误原因
  • 如果是被驱逐的pod,记录原因
  • 更新探针
  • 记录几种必要条件(中断条件,容器条件,就绪条件,init容器条件等)
  • 如果使用了主机网络,设置主机网络,同时如果podip为空将podip替换成主机ip。
func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus, podIsTerminal bool) v1.PodStatus {
	klog.V(3).InfoS("Generating pod status", "podIsTerminal", podIsTerminal, "pod", klog.KObj(pod))
	//获得历史状态
	oldPodStatus, found := kl.statusManager.GetPodStatus(pod.UID)
	if !found {
		oldPodStatus = pod.Status
	}
	//进行状态转换,步骤3
	s := kl.convertStatusToAPIStatus(pod, podStatus, oldPodStatus)
	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
	    //确定就地升级的资源大小
		s.Resize = kl.determinePodResizeStatus(pod, s)
	}
	allStatus := append(append([]v1.ContainerStatus{}, s.ContainerStatuses...), s.InitContainerStatuses...)
	//判断最终的Phase状态
	s.Phase = getPhase(pod, allStatus, podIsTerminal)
	klog.V(4).InfoS("Got phase for pod", "pod", klog.KObj(pod), "oldPhase", oldPodStatus.Phase, "phase", s.Phase)
	//如果历史或者换成状态是完成或者失败。则当前状态等同,不允许修改
	if s.Phase != v1.PodFailed && s.Phase != v1.PodSucceeded {
		switch {
		case oldPodStatus.Phase == v1.PodFailed || oldPodStatus.Phase == v1.PodSucceeded:
			klog.V(4).InfoS("Status manager phase was terminal, updating phase to match", "pod", klog.KObj(pod), "phase", oldPodStatus.Phase)
			s.Phase = oldPodStatus.Phase
		case pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded:
			klog.V(4).InfoS("API phase was terminal, updating phase to match", "pod", klog.KObj(pod), "phase", pod.Status.Phase)
			s.Phase = pod.Status.Phase
		}
	}
    //如果状态相同,直接存入历史的错误原因
	if s.Phase == oldPodStatus.Phase {
		// preserve the reason and message which is associated with the phase
		s.Reason = oldPodStatus.Reason
		s.Message = oldPodStatus.Message
		if len(s.Reason) == 0 {
			s.Reason = pod.Status.Reason
		}
		if len(s.Message) == 0 {
			s.Message = pod.Status.Message
		}
	}
	for _, podSyncHandler := range kl.PodSyncHandlers {
	    //如果是被驱逐的pod,记录原因
		if result := podSyncHandler.ShouldEvict(pod); result.Evict {
			s.Phase = v1.PodFailed
			s.Reason = result.Reason
			s.Message = result.Message
			break
		}
	}
	if pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded {
		if s.Phase != pod.Status.Phase {
			klog.ErrorS(nil, "Pod attempted illegal phase transition", "pod", klog.KObj(pod), "originalStatusPhase", pod.Status.Phase, "apiStatusPhase", s.Phase, "apiStatus", s)
			s.Phase = pod.Status.Phase
		}
	}
	//更新探针
	kl.probeManager.UpdatePodStatus(pod.UID, s)
	//记录几种必要条件(中断条件,容器条件,就绪条件,init容器条件等)
	s.Conditions = make([]v1.PodCondition, 0, len(pod.Status.Conditions)+1)
	for _, c := range pod.Status.Conditions {
		if !kubetypes.PodConditionByKubelet(c.Type) {
			s.Conditions = append(s.Conditions, c)
		}
	}

	if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
		cType := v1.DisruptionTarget
		if _, condition := podutil.GetPodConditionFromList(oldPodStatus.Conditions, cType); condition != nil {
			s.Conditions = utilpod.ReplaceOrAppendPodCondition(s.Conditions, condition)
		}
	}
	if utilfeature.DefaultFeatureGate.Enabled(features.PodHasNetworkCondition) {
		s.Conditions = append(s.Conditions, status.GeneratePodHasNetworkCondition(pod, podStatus))
	}
	s.Conditions = append(s.Conditions, status.GeneratePodInitializedCondition(&pod.Spec, s.InitContainerStatuses, s.Phase))
	s.Conditions = append(s.Conditions, status.GeneratePodReadyCondition(&pod.Spec, s.Conditions, s.ContainerStatuses, s.Phase))
	s.Conditions = append(s.Conditions, status.GenerateContainersReadyCondition(&pod.Spec, s.ContainerStatuses, s.Phase))
	s.Conditions = append(s.Conditions, v1.PodCondition{
		Type:   v1.PodScheduled,
		Status: v1.ConditionTrue,
	})
	//如果使用了主机网络,设置主机网络,同时如果podip为空将podip替换成主机ip。
	if kl.kubeClient != nil {
		hostIPs, err := kl.getHostIPsAnyWay()
		if err != nil {
			klog.V(4).InfoS("Cannot get host IPs", "err", err)
		} else {
			s.HostIP = hostIPs[0].String()
			if kubecontainer.IsHostNetworkPod(pod) {
				if s.PodIP == "" {
					s.PodIP = hostIPs[0].String()
					s.PodIPs = []v1.PodIP{{IP: s.PodIP}}
				}
				if len(hostIPs) == 2 && len(s.PodIPs) == 1 {
					s.PodIPs = append(s.PodIPs, v1.PodIP{IP: hostIPs[1].String()})
				}
			}
		}
	}

	return *s
}

步骤3.convertStatusToAPIStatus 这里主要设置pod.Status的几个重要的条件

  • 设置pod的ips
  • 设置QOS等级
  • 设置容器和初始化容器的status状态。步骤4
  • 设置临时容器
unc (kl *Kubelet) convertStatusToAPIStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus, oldPodStatus v1.PodStatus) *v1.PodStatus {
	var apiPodStatus v1.PodStatus
    //设置pod的ips
	podIPs := make([]string, len(podStatus.IPs))
	copy(podIPs, podStatus.IPs)

	podIPs = kl.sortPodIPs(podIPs)
	for _, ip := range podIPs {
		apiPodStatus.PodIPs = append(apiPodStatus.PodIPs, v1.PodIP{IP: ip})
	}
	if len(apiPodStatus.PodIPs) > 0 {
		apiPodStatus.PodIP = apiPodStatus.PodIPs[0].IP
	}

	// 设置QOS等级
	apiPodStatus.QOSClass = v1qos.GetPodQOS(pod)
   //设置容器和初始化容器的status状态。步骤4
	apiPodStatus.ContainerStatuses = kl.convertToAPIContainerStatuses(
		pod, podStatus,
		oldPodStatus.ContainerStatuses,
		pod.Spec.Containers,
		len(pod.Spec.InitContainers) > 0,
		false,
	)
	apiPodStatus.InitContainerStatuses = kl.convertToAPIContainerStatuses(
		pod, podStatus,
		oldPodStatus.InitContainerStatuses,
		pod.Spec.InitContainers,
		len(pod.Spec.InitContainers) > 0,
		true,
	)
	var ecSpecs []v1.Container
	for i := range pod.Spec.EphemeralContainers {
		ecSpecs = append(ecSpecs, v1.Container(pod.Spec.EphemeralContainers[i].EphemeralContainerCommon))
	}

	//设置临时容器
	apiPodStatus.EphemeralContainerStatuses = kl.convertToAPIContainerStatuses(
		pod, podStatus,
		oldPodStatus.EphemeralContainerStatuses,
		ecSpecs,
		len(pod.Spec.InitContainers) > 0,
		false,
	)

	return &apiPodStatus
}

步骤4.convertToAPIContainerStatuses设置容器的状态

  • 初始化两个钩子函数(不详细介绍了)
  • convertContainerStatus钩子函数是设置容器的运行状态
  • convertContainerStatusResources钩子函数是设置容器的资源配置
  • 将所有容器状态设置为默认的等待状态
  • 初始化一些status的必要信息
  • 遍历所有容器,如果容器在缓存中没有,但是在历史版本中存在,如果历史版本中是删除状态并且没有运行中,则记录错误信息"ContainerStatusUnknown"
  • 循环缓存容器,去当前容器比较,如果存在,拿到这个容器的历史版本,进行第一个钩子函数的调用,后的容器运行状态存入到当前容器版本中
  • 如果是init容器,如果容器退出并且退出码为0,则代表完成了不用管
  • 如果一个容器应该在下一个syncpod中重新启动,返回等待即可
  • 如果有失败记录,记录失败的原因
func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecontainer.PodStatus, previousStatus []v1.ContainerStatus, containers []v1.Container, hasInitContainers, isInitContainer bool) []v1.ContainerStatus {
    //convertContainerStatus钩子函数是设置容器的运行状态
	convertContainerStatus := func(cs *kubecontainer.Status, oldStatus *v1.ContainerStatus) *v1.ContainerStatus {
		cid := cs.ID.String()
		status := &v1.ContainerStatus{
			Name:         cs.Name,
			RestartCount: int32(cs.RestartCount),
			Image:        cs.Image,
			ImageID:      cs.ImageID,
			ContainerID:  cid,
		}
		switch {
		case cs.State == kubecontainer.ContainerStateRunning:
			status.State.Running = &v1.ContainerStateRunning{StartedAt: metav1.NewTime(cs.StartedAt)}
		case cs.State == kubecontainer.ContainerStateCreated:
			status.State.Waiting = &v1.ContainerStateWaiting{}
		case cs.State == kubecontainer.ContainerStateExited:
			status.State.Terminated = &v1.ContainerStateTerminated{
				ExitCode:    int32(cs.ExitCode),
				Reason:      cs.Reason,
				Message:     cs.Message,
				StartedAt:   metav1.NewTime(cs.StartedAt),
				FinishedAt:  metav1.NewTime(cs.FinishedAt),
				ContainerID: cid,
			}

		case cs.State == kubecontainer.ContainerStateUnknown &&
			oldStatus != nil && 
			oldStatus.State.Running != nil: 
			status.State.Terminated = &v1.ContainerStateTerminated{
				Reason:   "ContainerStatusUnknown",
				Message:  "The container could not be located when the pod was terminated",
				ExitCode: 137, // this code indicates an error
			}
			status.RestartCount = oldStatus.RestartCount + 1
		default:
			status.State.Waiting = &v1.ContainerStateWaiting{}
		}
		return status
	}
    /convertContainerStatusResources钩子函数是设置容器的资源配置
	convertContainerStatusResources := func(cName string, status *v1.ContainerStatus, cStatus *kubecontainer.Status, oldStatuses map[string]v1.ContainerStatus) *v1.ResourceRequirements {
		var requests, limits v1.ResourceList
		
		oldStatus, oldStatusFound := oldStatuses[cName]
		determineResource := func(rName v1.ResourceName, v1ContainerResource, oldStatusResource, resource v1.ResourceList) {
			if oldStatusFound {
				if oldStatus.State.Running == nil || status.ContainerID != oldStatus.ContainerID {
					if r, exists := v1ContainerResource[rName]; exists {
						resource[rName] = r.DeepCopy()
					}
				} else {
					if oldStatusResource != nil {
						if r, exists := oldStatusResource[rName]; exists {
							resource[rName] = r.DeepCopy()
						}
					}
				}
			}
		}
		container := kubecontainer.GetContainerSpec(pod, cName)
		found := false
		status.AllocatedResources, found = kl.statusManager.GetContainerResourceAllocation(string(pod.UID), cName)
		if !(container.Resources.Requests == nil && container.Resources.Limits == nil) && !found {
			klog.ErrorS(nil, "resource allocation not found in checkpoint store", "pod", pod.Name, "container", cName)
			if oldStatusFound {
				status.AllocatedResources = oldStatus.AllocatedResources
			}
		}
		if oldStatus.Resources == nil {
			oldStatus.Resources = &v1.ResourceRequirements{}
		}
		if container.Resources.Limits != nil {
			limits = make(v1.ResourceList)
			if cStatus.Resources != nil && cStatus.Resources.CPULimit != nil {
				limits[v1.ResourceCPU] = cStatus.Resources.CPULimit.DeepCopy()
			} else {
				determineResource(v1.ResourceCPU, container.Resources.Limits, oldStatus.Resources.Limits, limits)
			}
			if cStatus.Resources != nil && cStatus.Resources.MemoryLimit != nil {
				limits[v1.ResourceMemory] = cStatus.Resources.MemoryLimit.DeepCopy()
			} else {
				determineResource(v1.ResourceMemory, container.Resources.Limits, oldStatus.Resources.Limits, limits)
			}
			if ephemeralStorage, found := container.Resources.Limits[v1.ResourceEphemeralStorage]; found {
				limits[v1.ResourceEphemeralStorage] = ephemeralStorage.DeepCopy()
			}
		}
		if status.AllocatedResources != nil {
			requests = make(v1.ResourceList)
			if cStatus.Resources != nil && cStatus.Resources.CPURequest != nil {
				requests[v1.ResourceCPU] = cStatus.Resources.CPURequest.DeepCopy()
			} else {
				determineResource(v1.ResourceCPU, status.AllocatedResources, oldStatus.Resources.Requests, requests)
			}
			if memory, found := status.AllocatedResources[v1.ResourceMemory]; found {
				requests[v1.ResourceMemory] = memory.DeepCopy()
			}
			if ephemeralStorage, found := status.AllocatedResources[v1.ResourceEphemeralStorage]; found {
				requests[v1.ResourceEphemeralStorage] = ephemeralStorage.DeepCopy()
			}
		}
		resources := &v1.ResourceRequirements{
			Limits:   limits,
			Requests: requests,
		}
		return resources
	}
	//初始化历史版本的map记录
	oldStatuses := make(map[string]v1.ContainerStatus, len(containers))
	for _, status := range previousStatus {
		oldStatuses[status.Name] = status
	}
	//将所有容器状态设置为默认的等待状态
	statuses := make(map[string]*v1.ContainerStatus, len(containers))
	defaultWaitingState := v1.ContainerState{Waiting: &v1.ContainerStateWaiting{Reason: ContainerCreating}}
	if hasInitContainers {
		defaultWaitingState = v1.ContainerState{Waiting: &v1.ContainerStateWaiting{Reason: PodInitializing}}
	}
	//初始化一些status的必要信息
	for _, container := range containers {
		status := &v1.ContainerStatus{
			Name:  container.Name,
			Image: container.Image,
			State: defaultWaitingState,
		}
		oldStatus, found := oldStatuses[container.Name]
		if found {
			if oldStatus.State.Terminated != nil {
				status = &oldStatus
			} else {
				status.RestartCount = oldStatus.RestartCount
				status.LastTerminationState = oldStatus.LastTerminationState
			}
		}
		statuses[container.Name] = status
	}

	for _, container := range containers {
	    //遍历所有容器,如果容器在缓存中没有,但是在历史版本中存在,如果历史版本中是删除状态并且没有运行中,则记录错误信息"ContainerStatusUnknown"
		found := false
		for _, cStatus := range podStatus.ContainerStatuses {
			if container.Name == cStatus.Name {
				found = true
				break
			}
		}
		if found {
			continue
		}
		oldStatus, ok := oldStatuses[container.Name]
		if !ok {
			continue
		}
		if oldStatus.State.Terminated != nil {
			continue
		}
		if oldStatus.State.Running == nil {
			continue
		}
		status := statuses[container.Name]
		isDefaultWaitingStatus := status.State.Waiting != nil && status.State.Waiting.Reason == ContainerCreating
		if hasInitContainers {
			isDefaultWaitingStatus = status.State.Waiting != nil && status.State.Waiting.Reason == PodInitializing
		}
		if !isDefaultWaitingStatus {
			continue
		}
		if status.LastTerminationState.Terminated != nil {
			continue
		}
		status.LastTerminationState.Terminated = &v1.ContainerStateTerminated{
			Reason:   "ContainerStatusUnknown",
			Message:  "The container could not be located when the pod was deleted.  The container used to be Running",
			ExitCode: 137,
		}

		if pod.DeletionTimestamp == nil {
			status.RestartCount += 1
		}

		statuses[container.Name] = status
	}
	containerStatusesCopy := make([]*kubecontainer.Status, len(podStatus.ContainerStatuses))
	copy(containerStatusesCopy, podStatus.ContainerStatuses)
	sort.Sort(sort.Reverse(kubecontainer.SortContainerStatusesByCreationTime(containerStatusesCopy)))
	containerSeen := map[string]int{}
	//循环缓存容器,去当前容器比较,如果存在,拿到这个容器的历史版本,进行第一个钩子函数的调用,后的容器运行状态存入到当前容器版本中
	for _, cStatus := range containerStatusesCopy {
		cName := cStatus.Name
		if _, ok := statuses[cName]; !ok {
			continue
		}
		if containerSeen[cName] >= 2 {
			continue
		}
		var oldStatusPtr *v1.ContainerStatus
		if oldStatus, ok := oldStatuses[cName]; ok {
			oldStatusPtr = &oldStatus
		}
		status := convertContainerStatus(cStatus, oldStatusPtr)
		if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
			if status.State.Running != nil {
				status.Resources = convertContainerStatusResources(cName, status, cStatus, oldStatuses)
			}
		}
		if containerSeen[cName] == 0 {
			statuses[cName] = status
		} else {
			statuses[cName].LastTerminationState = status.State
		}
		containerSeen[cName] = containerSeen[cName] + 1
	}
	for _, container := range containers {
		if isInitContainer {
		    //如果是init容器,如果容器退出并且退出码为0,则代表完成了不用管
			s := podStatus.FindContainerStatusByName(container.Name)
			if s != nil && s.State == kubecontainer.ContainerStateExited && s.ExitCode == 0 {
				continue
			}
		}
		//如果一个容器应该在下一个syncpod中重新启动,返回等待即可
		if !kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) {
			continue
		}
		status := statuses[container.Name]
		//如果有失败记录,记录失败的原因
		reason, ok := kl.reasonCache.Get(pod.UID, container.Name)
		if !ok {
			continue
		}
		if status.State.Terminated != nil {
			status.LastTerminationState = status.State
		}
		status.State = v1.ContainerState{
			Waiting: &v1.ContainerStateWaiting{
				Reason:  reason.Err.Error(),
				Message: reason.Message,
			},
		}
		statuses[container.Name] = status
	}
	//排序一下返回
	if isInitContainer {
		return kubetypes.SortStatusesOfInitContainers(pod, statuses)
	}
	containerStatuses := make([]v1.ContainerStatus, 0, len(statuses))
	for _, status := range statuses {
		containerStatuses = append(containerStatuses, *status)
	}

	sort.Sort(kubetypes.SortedContainerStatuses(containerStatuses))
	return containerStatuses
}

二、SyncTerminatingPod

代码和syncpod基本一样,其中不同的是,syncpod函数中最终调用kuberuntime_manager.go中的syncpod。而SyncTerminatingPod调用的事killpod

func (kl *Kubelet) SyncTerminatingPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, gracePeriod *int64, podStatusFn func(*v1.PodStatus)) error {
	// TODO(#113606): connect this with the incoming context parameter, which comes from the pod worker.
	ctx, otelSpan := kl.tracer.Start(ctx, "syncTerminatingPod", trace.WithAttributes(
		attribute.String("k8s.pod.uid", string(pod.UID)),
		attribute.String("k8s.pod", klog.KObj(pod).String()),
		attribute.String("k8s.pod.name", pod.Name),
		attribute.String("k8s.namespace.name", pod.Namespace),
	))
	defer otelSpan.End()
	apiPodStatus := kl.generateAPIPodStatus(pod, podStatus, false)
	if podStatusFn != nil {
		podStatusFn(&apiPodStatus)
	}
	kl.statusManager.SetPodStatus(pod, apiPodStatus)

	kl.probeManager.StopLivenessAndStartup(pod)

	p := kubecontainer.ConvertPodStatusToRunningPod(kl.getRuntime().Type(), podStatus)
	if err := kl.killPod(ctx, pod, p, gracePeriod); err != nil {
		kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err)
		utilruntime.HandleError(err)
		return err
	}
	//****//
}

这两个函数的处理逻辑基本相同。最终调用了killpod。(其实syncpod函数中也会调用killpod,也就是后续的处理都是一个函数流程了,后面会统一介绍)

三、总结

这篇文章有点长,有很多其他函数,我只调出来了几个复杂的介绍。

其中介绍到了v1.27版本的pod资源原地扩缩容会在之后的InPlacePodVerticalScaling新特性文章中介绍。还有status_manager管理器,这个组件也需要一篇文章去介绍

这个SyncTerminatingPod函数和SyncTerminatedPod函数我就不全部贴出来了,流程和syncpod一样,其中syncpod包含了另外两个函数的主要流程killpod函数,所以不需要重复介绍。接下来的文章就会介绍killpod和syncpod两个函数在container runtime层的介绍了