• 获取node节点GPU/CPU/Mem 的capacity和request 数
func GetNodeResource(allPods []v1.Pod, node *v1.Node) (resource map[string]model.ResourceStatus, err error) {
var (
allocatedGPU int64
allocatedCPU int64
allocatedMem int64
capacityGPU int64
)
resource = map[string]model.ResourceStatus{}
val, ok := node.Status.Capacity[common.NvidiaGPUResource]
if !ok {
capacityGPU = 0
} else {
capacityGPU = val.Value()
}
for _, pod := range getPodsFromNode(allPods, node) {
allocatedGPU += gpuInPod(&pod)
allocatedCPU += cpuInPod(&pod)
allocatedMem += memInPod(&pod)
}
resource[common.ResourceGPU] = model.ResourceStatus{
Request: allocatedGPU,
Capacity: capacityGPU,
}
resource[common.ResourceCPU] = model.ResourceStatus{
Request: allocatedCPU,
Capacity: node.Status.Capacity.Cpu().Value(),
}
resource[common.ResourceMemory] = model.ResourceStatus{
Request: allocatedMem,
Capacity: node.Status.Capacity.Memory().Value(),
}
return resource, nil
}
// 获取节点上的pod
func getPodsFromNode(allPods []v1.Pod, node *v1.Node) (pods []v1.Pod) {
for _, pod := range allPods {
if pod.Spec.NodeName == node.Name {
pods = append(pods, pod)
}
}
return pods
}
// 获取所有的running的pod
func AllActivePods(clientSet *kubernetes.Clientset) ([]v1.Pod, error) {
allPods, err := clientSet.CoreV1().Pods("").List(context.TODO(), metav1.ListOptions{
FieldSelector: "status.phase=Running",
})
if err != nil {
log.Infof("get pods err, %s", err)
return nil, err
}
log.Infof("Running pod count %d", len(allPods.Items))
return allPods.Items, nil
}
// 获取pod上的gpu request
func gpuInPod(pod *v1.Pod) (gpuCount int64) {
containers := pod.Spec.Containers
for _, container := range containers {
val, ok := container.Resources.Limits[common.NvidiaGPUResource]
if !ok {
continue
}
gpuCount += val.Value()
}
return gpuCount
}
// 获取pod上的cpu request
func cpuInPod(pod *v1.Pod) (cpuCount int64) {
containers := pod.Spec.Containers
for _, container := range containers {
val, ok := container.Resources.Requests[common.ResourceCPU]
if !ok {
continue
}
cpuCount += val.Value()
}
return cpuCount
}
// 获取pod上的mem request
func memInPod(pod *v1.Pod) (memCount int64) {
containers := pod.Spec.Containers
for _, container := range containers {
val, ok := container.Resources.Requests[common.ResourceMemory]
if !ok {
continue
}
memCount += val.Value()
}
return memCount
}

func GetNodeRole(node *v1.Node) string {
if _, ok := node.Labels[common.MasterLabelRole]; ok {
return "master"
}
return "node"
}