Skip to content
This repository was archived by the owner on Sep 9, 2024. It is now read-only.

Commit 9d99832

Browse files
committed
Container deletion from the workload
When deleting pods that use the resources of a given power profile, the workload may be incorrectly updated, removing all existing container entries. This error can be observed whenever containers with the same name, but different pods, share resources of the same workload. This commit changes the container identification of a workload, searching for its UID instead of its name, as well as including workload information when admitting a powerpod. Signed-off-by: Eduardo Juliano Alberti <[email protected]>
1 parent 289c3c2 commit 9d99832

File tree

2 files changed

+31
-7
lines changed

2 files changed

+31
-7
lines changed

controllers/powerpod_controller.go

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,10 @@ func (r *PowerPodReconciler) Reconcile(c context.Context, req ctrl.Request) (ctr
9292
if !pod.ObjectMeta.DeletionTimestamp.IsZero() || pod.Status.Phase == corev1.PodSucceeded {
9393
// If the pod's deletion timestamp is not zero, then the pod has been deleted
9494

95-
powerPodState := r.State.GetPodFromState(pod.GetName(), pod.GetNamespace())
95+
powerPodState := r.State.GetPodFromStateUID(string(pod.GetUID()))
9696

97-
logger.V(5).Info("removing the pod from the internal state")
98-
if err = r.State.DeletePodFromState(pod.GetName(), pod.GetNamespace()); err != nil {
97+
logger.V(5).Info("Removing Pod from internal state", "Pod Name", pod.GetName(), "UID", pod.GetUID())
98+
if err = r.State.DeletePodFromStateUID(string(pod.GetUID())); err != nil {
9999
logger.Error(err, "error removing the pod from the internal state")
100100
return ctrl.Result{}, err
101101
}
@@ -105,14 +105,15 @@ func (r *PowerPodReconciler) Reconcile(c context.Context, req ctrl.Request) (ctr
105105
for _, container := range powerPodState.Containers {
106106
workload := container.Workload
107107
cpus := container.ExclusiveCPUs
108+
logger.V(5).Info("Removing", "Workload", workload, "CPUs", cpus)
108109
if _, exists := workloadToCPUsRemoved[workload]; exists {
109110
workloadToCPUsRemoved[workload] = append(workloadToCPUsRemoved[workload], cpus...)
110111
} else {
111112
workloadToCPUsRemoved[workload] = cpus
112113
}
113114
}
114115
for workloadName, cpus := range workloadToCPUsRemoved {
115-
logger.V(5).Info("retrieving the workload instance %s", workloadName)
116+
logger.V(5).Info("retrieving the workload instance", "Workload Name", workloadName)
116117
workload := &powerv1.PowerWorkload{}
117118
err = r.Get(context.TODO(), client.ObjectKey{
118119
Namespace: IntelPowerNamespace,
@@ -209,6 +210,7 @@ func (r *PowerPodReconciler) Reconcile(c context.Context, req ctrl.Request) (ctr
209210

210211
workloadContainer := container
211212
workloadContainer.Pod = pod.Name
213+
workloadContainer.Workload = workloadName
212214
containerList = append(containerList, workloadContainer)
213215
}
214216
for i, newContainer := range containerList {
@@ -280,9 +282,11 @@ func (r *PowerPodReconciler) getPowerProfileRequestsFromContainers(containers []
280282
containerID := getContainerID(pod, container.Name)
281283
coreIDs, err := r.PodResourcesClient.GetContainerCPUs(pod.GetName(), container.Name)
282284
if err != nil {
285+
logger.V(5).Info("Error getting CoreIDs.", "ContainerID", containerID)
283286
return map[string][]uint{}, []powerv1.Container{}, err
284287
}
285288
cleanCoreList := getCleanCoreList(coreIDs)
289+
logger.V(5).Info("Reserving cores to container.", "ContainerID", containerID, "Cores", cleanCoreList)
286290

287291
logger.V(5).Info("creating the power container")
288292
powerContainer := &powerv1.Container{}
@@ -347,7 +351,7 @@ func getNewWorkloadContainerList(nodeContainers []powerv1.Container, podStateCon
347351

348352
logger.V(5).Info("checking if there are new containers for the workload")
349353
for _, container := range nodeContainers {
350-
if !isContainerInList(container.Name, podStateContainers, logger) {
354+
if !isContainerInList(container.Name, container.Id, podStateContainers, logger) {
351355
newNodeContainers = append(newNodeContainers, container)
352356
}
353357
}
@@ -356,9 +360,9 @@ func getNewWorkloadContainerList(nodeContainers []powerv1.Container, podStateCon
356360
}
357361

358362
// Helper function - if container is in a list of containers
359-
func isContainerInList(name string, containers []powerv1.Container, logger *logr.Logger) bool {
363+
func isContainerInList(name string, uid string, containers []powerv1.Container, logger *logr.Logger) bool {
360364
for _, container := range containers {
361-
if container.Name == name {
365+
if container.Name == name && container.Id == uid {
362366
return true
363367
}
364368
}

pkg/podstate/podstate.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,16 @@ func (s *State) GetPodFromState(podName string, podNamespace string) powerv1.Gua
3838
return powerv1.GuaranteedPod{}
3939
}
4040

41+
func (s *State) GetPodFromStateUID(podUID string) powerv1.GuaranteedPod {
42+
for _, existingPod := range s.GuaranteedPods {
43+
if existingPod.UID == podUID {
44+
return existingPod
45+
}
46+
}
47+
48+
return powerv1.GuaranteedPod{}
49+
}
50+
4151
func (s *State) GetCPUsFromPodState(podState powerv1.GuaranteedPod) []uint {
4252
cpus := make([]uint, 0)
4353
for _, container := range podState.Containers {
@@ -47,6 +57,16 @@ func (s *State) GetCPUsFromPodState(podState powerv1.GuaranteedPod) []uint {
4757
return cpus
4858
}
4959

60+
func (s *State) DeletePodFromStateUID(podUID string) error {
61+
for i, pod := range s.GuaranteedPods {
62+
if pod.UID == podUID {
63+
s.GuaranteedPods = append(s.GuaranteedPods[:i], s.GuaranteedPods[i+1:]...)
64+
}
65+
}
66+
67+
return nil
68+
}
69+
5070
func (s *State) DeletePodFromState(podName string, podNamespace string) error {
5171
for i, pod := range s.GuaranteedPods {
5272
if pod.Name == podName && pod.Namespace == podNamespace {

0 commit comments

Comments
 (0)