Skip to content
This repository was archived by the owner on Jan 29, 2025. It is now read-only.

Commit 9568b99

Browse files
tkatilauniemimu
authored andcommitted
Align concatenate labels handling with gpu plugin
All labels end with an alpha numeric character and subsequent labels start with a Z char. Co-authored-by: Tuomas Katila <[email protected]> Co-authored-by: Ukri Niemimuukko <[email protected]>
1 parent 8e905f6 commit 9568b99

File tree

4 files changed

+30
-18
lines changed

4 files changed

+30
-18
lines changed

gpu-aware-scheduling/pkg/gpuscheduler/scheduler.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -778,9 +778,9 @@ func createGPUMaps(pod *v1.Pod, node *v1.Node, allGPUs []string) []map[string]bo
778778
}
779779

780780
_, singleNumaRequested := pod.Annotations[singleNumaAnnotationName]
781-
gpuNumaInformation, nodeHasNumaInfo := node.Labels[numaMappingLabel]
781+
gpuNumaInformation := concatenateSplitLabel(node, numaMappingLabel)
782782

783-
if singleNumaRequested && nodeHasNumaInfo {
783+
if singleNumaRequested && len(gpuNumaInformation) > 0 {
784784
numaGroups := strings.Split(gpuNumaInformation, "_")
785785

786786
for _, numaGroup := range numaGroups {

gpu-aware-scheduling/pkg/gpuscheduler/scheduler_test.go

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -688,9 +688,9 @@ func TestGetNodeGPUListFromGpuNumbers(t *testing.T) {
688688
node := v1.Node{
689689
ObjectMeta: metav1.ObjectMeta{
690690
Labels: map[string]string{
691-
gpuNumbersLabel: "0.1.2.",
692-
gpuNumbersLabel + "2": "5.8.9.",
693-
gpuNumbersLabel + "3": "10"},
691+
gpuNumbersLabel: "0.1.2",
692+
gpuNumbersLabel + "2": "Z.5.8.9",
693+
gpuNumbersLabel + "3": "Z.10"},
694694
},
695695
}
696696

@@ -897,7 +897,8 @@ func TestFilterWithXeLinkedDisabledTiles(t *testing.T) {
897897
Labels: map[string]string{
898898
"gpu.intel.com/gpu-numbers": "0.1.2.3",
899899
"gpu.intel.com/tiles": "4",
900-
xeLinksLabel: "0.0-1.0_1.0-0.0_2.1-3.2_3.2-2.1",
900+
xeLinksLabel: "0.0-1.0_1.0-0.0_2.1",
901+
xeLinksLabel + "2": "Z-3.2_3.2-2.1",
901902
},
902903
},
903904
Status: v1.NodeStatus{
@@ -1031,7 +1032,8 @@ func TestRunSchedulingLogicWithMultiContainerXelinkedTileResourceReq(t *testing.
10311032
testCases := []testCase{
10321033
{
10331034
extraLabels: map[string]string{
1034-
xeLinksLabel: "0.0-1.0_1.0-0.0_2.1-3.2_3.2-2.1",
1035+
xeLinksLabel: "0.0-1.0_1.0-0.0",
1036+
xeLinksLabel + "2": "Z_2.1-3.2_3.2-2.1",
10351037
},
10361038
extraAnnotations: map[string]string{xelinkAnnotationName: trueValueString},
10371039
description: "4 card xe-linked success case",
@@ -1042,8 +1044,9 @@ func TestRunSchedulingLogicWithMultiContainerXelinkedTileResourceReq(t *testing.
10421044
},
10431045
{
10441046
extraLabels: map[string]string{
1045-
xeLinksLabel: "0.0-1.0_1.0-0.0_2.1-3.2_3.2-2.1",
1046-
numaMappingLabel: "0-0.1_1-2.3",
1047+
xeLinksLabel: "0.0-1.0_1.0-0.0_2.1-3.2_3.2-2.1",
1048+
numaMappingLabel: "0-0.1_1",
1049+
numaMappingLabel + "2": "Z-2.3",
10471050
},
10481051
extraAnnotations: map[string]string{
10491052
xelinkAnnotationName: trueValueString,

gpu-aware-scheduling/pkg/gpuscheduler/utils.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ const (
2525
desiredIntBits = 16
2626
regexDesiredCount = 3
2727
regexXeLinkCount = 5
28+
labelControlChar = "Z"
2829
)
2930

3031
// Globals for compiled regexps. No other global types here!
@@ -213,12 +214,20 @@ func isGPUInPCIGroup(gpuName, pciGroupGPUName string, node *v1.Node) bool {
213214

214215
// concatenateSplitLabel returns the given label value and concatenates any
215216
// additional values for label names with a running number postfix starting with "2".
217+
// Subsequent values should start with the control character 'Z'.
216218
func concatenateSplitLabel(node *v1.Node, labelName string) string {
217219
postFix := 2
218220
value := node.Labels[labelName]
219221

220222
for continuingLabelValue, ok := node.Labels[labelName+strconv.Itoa(postFix)]; ok; {
221-
value += continuingLabelValue
223+
if !strings.HasPrefix(continuingLabelValue, labelControlChar) {
224+
klog.Warningf("concatenated chuck has invalid prefix: %s", continuingLabelValue[:len(labelControlChar)])
225+
226+
return ""
227+
} else {
228+
value += continuingLabelValue[len(labelControlChar):]
229+
}
230+
222231
postFix++
223232
continuingLabelValue, ok = node.Labels[labelName+strconv.Itoa(postFix)]
224233
}
@@ -348,7 +357,7 @@ func reorderPreferredTilesFirst(tiles []int, preferred []int) []int {
348357
func getXeLinkedTiles(gpuName string, node *v1.Node) map[int]bool {
349358
xeLinkedTiles := map[int]bool{}
350359

351-
xeLinkLabelValue := node.Labels[xeLinksLabel]
360+
xeLinkLabelValue := concatenateSplitLabel(node, xeLinksLabel)
352361
lZeroDeviceID := gpuNameToLZeroDeviceID(gpuName, node)
353362

354363
if lZeroDeviceID == -1 || xeLinkLabelValue == "" {
@@ -410,7 +419,7 @@ func parseXeLink(link string) (linkInfo, error) {
410419
}
411420

412421
func getXeLinkedGPUInfo(gpuName string, tileIndex int, node *v1.Node) (string, int) {
413-
xeLinkLabelValue := node.Labels[xeLinksLabel]
422+
xeLinkLabelValue := concatenateSplitLabel(node, xeLinksLabel)
414423
lZeroDeviceID := gpuNameToLZeroDeviceID(gpuName, node)
415424

416425
if lZeroDeviceID == -1 || xeLinkLabelValue == "" {
@@ -456,7 +465,7 @@ func lZeroDeviceIDToGpuName(lZeroID int, node *v1.Node) string {
456465
}
457466

458467
func numSortedGpuNums(node *v1.Node) []string {
459-
gpuNums := node.Labels[gpuNumbersLabel]
468+
gpuNums := concatenateSplitLabel(node, gpuNumbersLabel)
460469

461470
gpuNumSlice := strings.Split(gpuNums, ".")
462471

gpu-aware-scheduling/pkg/gpuscheduler/utils_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,9 @@ func TestPCIGroups(t *testing.T) {
130130

131131
Convey("When the GPU belongs to a PCI Group with multiple group labels", t, func() {
132132
node := getMockNode(1, 1)
133-
node.Labels[pciGroupLabel] = "0.1_2.3.4_"
134-
node.Labels[pciGroupLabel+"2"] = "5.6_7.8_11.12_"
135-
node.Labels[pciGroupLabel+"3"] = "9.10"
133+
node.Labels[pciGroupLabel] = "0.1_2.3.4"
134+
node.Labels[pciGroupLabel+"2"] = "Z_5.6_7.8_11.12"
135+
node.Labels[pciGroupLabel+"3"] = "Z_9.10"
136136
So(getPCIGroup(node, "card6"), ShouldResemble, []string{"5", "6"})
137137
So(getPCIGroup(node, "card9"), ShouldResemble, []string{"9", "10"})
138138
So(getPCIGroup(node, "card20"), ShouldResemble, []string{})
@@ -326,8 +326,8 @@ func TestConcatenateSplitLabel(t *testing.T) {
326326
Convey("When the label is split, it can be concatenated", t, func() {
327327
node := getMockNode(1, 1)
328328
node.Labels[pciGroupLabel] = "foo"
329-
node.Labels[pciGroupLabel+"2"] = "bar"
330-
node.Labels[pciGroupLabel+"3"] = "ber"
329+
node.Labels[pciGroupLabel+"2"] = "Zbar"
330+
node.Labels[pciGroupLabel+"3"] = "Zber"
331331
result := concatenateSplitLabel(node, pciGroupLabel)
332332
So(result, ShouldEqual, "foobarber")
333333
})

0 commit comments

Comments
 (0)