intel
diff --git a/‎.github/workflows/end-to-end-test.yaml‎
Lines changed: 4 additions & 2 deletions b/‎.github/workflows/end-to-end-test.yaml‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎.github/workflows/go-build-and-test.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/go-build-and-test.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/static-analysis.yaml‎
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/static-analysis.yaml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎gpu-aware-scheduling/README.md‎
Lines changed: 21 additions & 10 deletions b/‎gpu-aware-scheduling/README.md‎
Lines changed: 21 additions & 10 deletions
diff --git a/‎gpu-aware-scheduling/cmd/gas-scheduler-extender/main.go‎
Lines changed: 8 additions & 6 deletions b/‎gpu-aware-scheduling/cmd/gas-scheduler-extender/main.go‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎gpu-aware-scheduling/deploy/gas-deployment.yaml‎
Lines changed: 3 additions & 0 deletions b/‎gpu-aware-scheduling/deploy/gas-deployment.yaml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎gpu-aware-scheduling/deploy/images/Dockerfile_gpu-extender‎
Lines changed: 8 additions & 30 deletions b/‎gpu-aware-scheduling/deploy/images/Dockerfile_gpu-extender‎
Lines changed: 8 additions & 30 deletions
diff --git a/‎gpu-aware-scheduling/deploy/images/container_README.md‎
Lines changed: 14 additions & 0 deletions b/‎gpu-aware-scheduling/deploy/images/container_README.md‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎gpu-aware-scheduling/docs/usage.md‎
Lines changed: 10 additions & 2 deletions b/‎gpu-aware-scheduling/docs/usage.md‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎gpu-aware-scheduling/go.mod‎
Lines changed: 45 additions & 10 deletions b/‎gpu-aware-scheduling/go.mod‎
Lines changed: 45 additions & 10 deletions
@@ -11,10 +11,12 @@ jobs:
       - name: Set up Go version
         uses: actions/setup-go@v1
         with:
-         go-version: 1.16
+         go-version: 1.17
       - name: Get tools for cluster installation
         run: ./.github/scripts/e2e_get_tools.sh
       - name: Set up cluster with TAS and custom metrics
         run: ./.github/scripts/e2e_setup_cluster.sh
       - name: Run end to end tests
-        run: cd .github/e2e/&& go test -v e2e_test.go
+        run: cd .github/e2e/&& go test -v e2e_test.go
+      - name: Clean up
+        run: ./.github/scripts/e2e_teardown_cluster.sh && ./.github/scripts/e2e_cleanup.sh
@@ -15,7 +15,7 @@ jobs:
       - name: Set up Go
         uses: actions/setup-go@v2
         with:
-          go-version: 1.16
+          go-version: 1.17
 
       - name: Build
         run: make test
@@ -27,7 +27,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        go-version: [ 1.16.x]
+        go-version: [ 1.17.x]
     steps:
       - uses: actions/checkout@v2
 
 
@@ -19,16 +19,16 @@ jobs:
     name: Hadolint
     steps:
     - uses: actions/checkout@v2
-    - run: wget -q https://github.com/hadolint/hadolint/releases/download/v2.7.0/hadolint-Linux-x86_64 -O hadolint; chmod +x hadolint ; find . -type f \( -name "Dockerfile*" \) -print0 | xargs -n 1 -0 ./hadolint ;
+    - run: wget -q https://github.com/hadolint/hadolint/releases/download/v2.8.0/hadolint-Linux-x86_64 -O hadolint; chmod +x hadolint ; find . -type f \( -name "Dockerfile*" \) -print0 | xargs -n 1 -0 ./hadolint ;
   gofmt-imports:
     runs-on: ubuntu-latest
     name: Go Fmt and Go Import
     steps:
       - uses: actions/checkout@v2
       - uses: actions/setup-go@v2
         with:
-          go-version: 1.16
-      - run: go get golang.org/x/tools/cmd/goimports; test -z $(goimports -l .) && test -z $(gofmt -l .)
+          go-version: 1.17
+      - run: go install golang.org/x/tools/cmd/goimports@v0.1.9; test -z $(goimports -l .) && test -z $(gofmt -l .)
 
   golangci-TAS:
     strategy:
@@ -40,7 +40,7 @@ jobs:
       - uses: actions/checkout@v2
       - uses: actions/setup-go@v2
         with:
-          go-version: 1.16
+          go-version: 1.17
       - name: golangci-lint-TAS
         uses: golangci/golangci-lint-action@v2
         with:
 
@@ -1,7 +1,7 @@
 # GPU Aware Scheduling
 GPU Aware Scheduling (GAS) allows using GPU resources such as memory amount for scheduling decisions in Kubernetes. It is used to optimize scheduling decisions when the POD resource requirements include the use of several GPUS or fragments of GPUs on a node, instead of traditionally mapping a GPU to a pod.
 
-GPU Aware Scheduling is deployed in a single pod on a Kubernetes Cluster. 
+GPU Aware Scheduling is deployed in a single pod on a Kubernetes Cluster.
 
 **This software is a pre-production alpha version and should not be deployed to production servers.**
 
@@ -22,8 +22,9 @@ GAS tries to be agnostic about resource types. It doesn't try to have an underst
 
 GAS heavily utilizes annotations. It itself annotates PODs after making filtering decisions on them, with a precise timestamp at annotation named "gas-ts". The timestamp can then be used for figuring out the time-order of the GAS-made scheduling decision for example during the GPU-plugin resource allocation phase, if the GPU-plugin wants to know the order of GPU-resource consuming POD deploying inside the node. Another annotation which GAS adds is "gas-container-cards". It will have the names of the cards selected for the containers. Containers are separated by "|", and card names are separated by ",". Thus a two-container POD in which both containers use 2 GPUs, could get an annotation "card0,card1|card2,card3". These annotations are then consumed by the Intel GPU device plugin.
 
-GAS also expects labels to be in place for the nodes, in order to be able to keep book of the cluster GPU resource status. Nodes with GPUs shall be labeled with label name "gpu.intel.com/cards" and value shall be in form "card0.card1.card2.card3"... where the card names match with the intel GPUs which are currently found under /sys/class/drm folder, and the dot serves as separator. GAS expects all GPUs of the same node to be homogeneous in their resource capacity, and calculates the GPU extended resource capacity as evenly distributed to the GPUs listed by that label.
+Along with the "gas-container-cards" annotation there can be a "gas-container-tiles" annotation. This annotation is created when a container requests tile resources (gpu.intel.com/tiles). The gtX marking for tiles follows the sysfs entries under /sys/class/drm/cardX/gt/ where the "cardX" can be any card in the system. "gas-container-tiles" annotation marks the card+tile combos assigned to each container. For example a two container pod's annotation could be "card0:gt0+gt1|card0:gt2+gt3" where each container gets two tiles from the same GPU. The tile annotation is then converted to corresponding environment variables by the GPU plugin.
 
+GAS also expects labels to be in place for the nodes, in order to be able to keep book of the cluster GPU resource status. Nodes with GPUs shall be labeled with label name "gpu.intel.com/cards" and value shall be in form "card0.card1.card2.card3"... where the card names match with the intel GPUs which are currently found under /sys/class/drm folder, and the dot serves as separator. GAS expects all GPUs of the same node to be homogeneous in their resource capacity, and calculates the GPU extended resource capacity as evenly distributed to the GPUs listed by that label.
 
 ## Usage with NFD and the GPU-plugin
 A worked example for GAS is available [here](docs/usage.md)
@@ -37,17 +38,18 @@ You should follow extender configuration instructions from the
 use GPU Aware Scheduling configurations, which can be found in the [deploy/extender-configuration](deploy/extender-configuration) folder.
 
 #### Deploy GAS
-GPU Aware Scheduling uses go modules. It requires Go 1.16 with modules enabled in order to build. GAS has been tested with Kubernetes 1.22.
+GPU Aware Scheduling uses go modules. It requires Go 1.17 with modules enabled in order to build. GAS has been tested with Kubernetes 1.22.
 A yaml file for GAS is contained in the deploy folder along with its service and RBAC roles and permissions.
 
-**Note:** If run without the unsafe flag a secret called extender-secret will need to be created with the cert and key for the TLS endpoint.
-GAS will not deploy if there is no secret available with the given deployment file.
+A secret called extender-secret will need to be created with the cert and key for the TLS endpoint. GAS will not deploy if there is no
+secret available with the given deployment file.
 
 A secret can be created with:
 
 ``
-kubectl create secret tls extender-secret --cert /etc/kubernetes/<PATH_TO_CERT> --key /etc/kubernetes/<PATH_TO_KEY> 
+kubectl create secret tls extender-secret --cert /etc/kubernetes/<PATH_TO_CERT> --key /etc/kubernetes/<PATH_TO_KEY>
 ``
+
 In order to build in your host:
 
 ``make build``
@@ -75,15 +77,18 @@ name |type | description| usage | default|
 |cert| string | location of the cert file for the TLS endpoint | --cert=/root/cert.txt| /etc/kubernetes/pki/ca.key
 |key| string | location of the key file for the TLS endpoint| --key=/root/key.txt | /etc/kubernetes/pki/ca.key
 |cacert| string | location of the ca certificate for the TLS endpoint| --key=/root/cacert.txt | /etc/kubernetes/pki/ca.crt
-|unsafe| bool | whether or not to listen on a TLS endpoint with the scheduler extender | --unsafe=true| false
 |enableAllowlist| bool | enable POD-annotation based GPU allowlist feature | --enableAllowlist| false
 |enableDenylist| bool | enable POD-annotation based GPU denylist feature | --enableDenylist| false
+|balancedResource| string | enable named resource balancing between GPUs | --balancedResource| ""
+
+#### Balanced resource (optional)
+GAS can be configured to balance named resources so that the resource requests are distributed as evenly as possible between the GPUs. For example if the balanced resource is set to "tiles" and the containers request 1 tile each, the first container could get tile from "card0", the second from "card1", the third again from "card0" and so on.
 
 ## Adding the resource to make a deployment use GAS Scheduler Extender
 
-For example,  in a deployment file: 
+For example, in a deployment file:
 ```
-apiVersion: extensions/v1beta1
+apiVersion: apps/v1
 kind: Deployment
 metadata:
   name: demo-app
@@ -93,7 +98,7 @@ spec:
   replicas: 1
   selector:
     matchLabels:
-      app: demo 
+      app: demo
   template:
     metadata:
       labels:
@@ -123,6 +128,12 @@ GAS Scheduler Extender is set up to use in-Cluster config in order to access the
 Additionally GAS Scheduler Extender listens on a TLS endpoint which requires a cert and a key to be supplied.
 These are passed to the executable using command line flags. In the provided deployment these certs are added in a Kubernetes secret which is mounted in the pod and passed as flags to the executable from there.
 
+## License
+
+[Apache License, Version 2.0](./LICENSE). All of the source code required to build the GPU Aware Scheduling is available under Open Source
+licenses. The source code files identify external Go modules used. The binary is distributed as a container image on
+[DockerHub](https://hub.docker.com/r/intel/gpu-extender). The container image contains license texts under folder `/licenses`.
+
 ## Communication and contribution
 
 Report a bug by [filing a new issue](https://github.com/intel/platform-aware-scheduling/issues).
 
@@ -2,6 +2,7 @@ package main
 
 import (
 	"flag"
+	"os"
 
 	"github.com/intel/platform-aware-scheduling/extender"
 	"github.com/intel/platform-aware-scheduling/gpu-aware-scheduling/pkg/gpuscheduler"
@@ -10,28 +11,29 @@ import (
 
 func main() {
 	var (
-		kubeConfig, port, certFile, keyFile, caFile string
-		unsafe, enableAllowlist, enableDenylist     bool
+		kubeConfig, port, certFile, keyFile, caFile, balancedRes string
+		enableAllowlist, enableDenylist                          bool
 	)
 
 	flag.StringVar(&kubeConfig, "kubeConfig", "/root/.kube/config", "location of kubernetes config file")
 	flag.StringVar(&port, "port", "9001", "port on which the scheduler extender will listen")
 	flag.StringVar(&certFile, "cert", "/etc/kubernetes/pki/ca.crt", "cert file extender will use for authentication")
 	flag.StringVar(&keyFile, "key", "/etc/kubernetes/pki/ca.key", "key file extender will use for authentication")
 	flag.StringVar(&caFile, "cacert", "/etc/kubernetes/pki/ca.crt", "ca file extender will use for authentication")
-	flag.BoolVar(&unsafe, "unsafe", false, "unsafe instances of GPU aware scheduler will be served over simple http.")
 	flag.BoolVar(&enableAllowlist, "enableAllowlist", false, "enable allowed GPUs annotation (csv list of names)")
 	flag.BoolVar(&enableDenylist, "enableDenylist", false, "enable denied GPUs annotation (csv list of names)")
+	flag.StringVar(&balancedRes, "balancedResource", "", "enable resource balacing within a node")
 	klog.InitFlags(nil)
 	flag.Parse()
 
 	kubeClient, _, err := extender.GetKubeClient(kubeConfig)
 	if err != nil {
-		panic(err)
+		klog.Error("couldn't get kube client, cannot continue: ", err.Error())
+		os.Exit(1)
 	}
 
-	gasscheduler := gpuscheduler.NewGASExtender(kubeClient, enableAllowlist, enableDenylist)
+	gasscheduler := gpuscheduler.NewGASExtender(kubeClient, enableAllowlist, enableDenylist, balancedRes)
 	sch := extender.Server{Scheduler: gasscheduler}
-	sch.StartServer(port, certFile, keyFile, caFile, unsafe)
+	sch.StartServer(port, certFile, keyFile, caFile, false)
 	klog.Flush()
 }
@@ -33,6 +33,9 @@ spec:
           readOnlyRootFilesystem: true
           runAsNonRoot: true
           runAsUser: 10001
+          allowPrivilegeEscalation: false
+          seccompProfile:
+            type: RuntimeDefault
         volumeMounts:
         - name: certs
           mountPath: /gas/cert
 
@@ -1,33 +1,11 @@
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-FROM golang:1.16-alpine as user_builder
-RUN adduser -D -u 10001 gas
-
-FROM golang:1.16-alpine as builder
-ARG DIR=gpu-aware-scheduling
-ARG SRC_ROOT=/src_root
-COPY . ${SRC_ROOT}
-
-RUN mkdir -p /install_root/etc
-COPY --from=user_builder /etc/passwd /install_root/etc/passwd
-
-WORKDIR ${SRC_ROOT}/${DIR}
-RUN CGO_ENABLED=0 GO111MODULE=on go build -ldflags="-s -w" -o /install_root/extender ./cmd/gas-scheduler-extender \
-    && install -D ${SRC_ROOT}/${DIR}/LICENSE /install_root/usr/local/share/package-licenses/gpu-aware-scheduling/LICENSE \
-    && scripts/copy-modules-licenses.sh ./cmd/gas-scheduler-extender /install_root/usr/local/share/
+# SPDX-License-Identifier: Apache-2.0
+
+FROM golang:1.17.7-alpine as builder
+COPY . /src_root
+WORKDIR /src_root/gpu-aware-scheduling
+RUN mkdir -p /install_root/etc && adduser -D -u 10001 gas && tail -1 /etc/passwd > /install_root/etc/passwd \
+    && CGO_ENABLED=0 GO111MODULE=on go build -ldflags="-s -w" -o /install_root/extender ./cmd/gas-scheduler-extender \
+    && GO111MODULE=on go run github.com/google/[email protected] save "./cmd/gas-scheduler-extender" --save_path /install_root/licenses
 
 FROM scratch
 WORKDIR /
 
@@ -0,0 +1,14 @@
+GPU Aware Scheduling (GAS) is a K8s extender which allows using GPU resources such as memory amount for
+scheduling decisions in Kubernetes. It also supports telemetry based node labels for controlling GPU usage.
+
+For further information check github at:
+
+https://github.com/intel/platform-aware-scheduling
+
+https://github.com/intel/platform-aware-scheduling/tree/master/gpu-aware-scheduling
+
+LEGAL NOTICE:  By accessing, downloading or using this software and any required dependent software (the
+“Software Package”), you agree to the terms and conditions of the software license agreements for the
+Software Package, which may also include notices, disclaimers, or license terms for third party software
+included with the Software Package. Please refer to the “third-party-programs.txt” or other similarly-named
+text file for additional details.
@@ -16,10 +16,10 @@ Basically all versions starting with [v0.6.0](https://github.com/kubernetes-sigs
 
 For picking up the labels printed by the hook installed by the GPU-plugin initcontainer, deploy nfd master with this kind of command in its yaml:
 ```
-command: ["nfd-master", "--resource-labels=gpu.intel.com/memory.max,gpu.intel.com/millicores", "--extra-label-ns=gpu.intel.com"]
+command: ["nfd-master", "--resource-labels=gpu.intel.com/memory.max,gpu.intel.com/millicores,gpu.intel.com/tiles", "--extra-label-ns=gpu.intel.com"]
 ```
 
-The above would promote two labels, "memory.max" and "millicores" to extended resources of the node that produces the labels.
+The above would promote three labels, "memory.max", "millicores" and "tiles" to extended resources of the node that produces the labels.
 
 If you want to enable i915 capability scanning, the nfd worker needs to read debugfs, and therefore it needs to run as privileged, like this:
 ```
@@ -63,6 +63,14 @@ Your PODs then, needs to ask for some GPU-resources. Like this:
             gpu.intel.com/memory.max: 10M
 ```
 
+Or like this for tiles:
+```
+        resources:
+          limits:
+            gpu.intel.com/i915: 1
+            gpu.intel.com/tiles: 2
+```
+
 A complete example pod yaml is located in [docs/example](./example)
 
 ## Node Label support
 
@@ -1,18 +1,53 @@
 module github.com/intel/platform-aware-scheduling/gpu-aware-scheduling
 
-go 1.16
+go 1.17
 
 require (
-	github.com/intel/platform-aware-scheduling/extender v0.0.0-00010101000000-000000000000
-	github.com/smartystreets/goconvey v1.7.0
+	github.com/intel/platform-aware-scheduling/extender v0.1.0
+	github.com/smartystreets/goconvey v1.7.2
 	github.com/stretchr/testify v1.7.0
-	k8s.io/api v0.22.2
-	k8s.io/apimachinery v0.22.2
-	k8s.io/client-go v0.22.2
-	k8s.io/klog/v2 v2.30.0
+	k8s.io/api v0.23.3
+	k8s.io/apimachinery v0.23.3
+	k8s.io/client-go v0.23.3
+	k8s.io/klog/v2 v2.40.1
 )
 
-replace (
-	github.com/intel/platform-aware-scheduling/extender => ../extender
-	github.com/intel/platform-aware-scheduling/gpu-aware-scheduling => ../gpu-aware-scheduling
+require (
+	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/evanphx/json-patch v5.6.0+incompatible // indirect
+	github.com/go-logr/logr v1.2.2 // indirect
+	github.com/gogo/protobuf v1.3.2 // indirect
+	github.com/golang/protobuf v1.5.2 // indirect
+	github.com/google/go-cmp v0.5.7 // indirect
+	github.com/google/gofuzz v1.2.0 // indirect
+	github.com/googleapis/gnostic v0.5.5 // indirect
+	github.com/gopherjs/gopherjs v0.0.0-20220104163920-15ed2e8cf2bd // indirect
+	github.com/imdario/mergo v0.3.12 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/jtolds/gls v4.20.0+incompatible // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/pkg/errors v0.9.1 // indirect
+	github.com/pmezard/go-difflib v1.0.0 // indirect
+	github.com/smartystreets/assertions v1.2.1 // indirect
+	github.com/spf13/pflag v1.0.5 // indirect
+	github.com/stretchr/objx v0.3.0 // indirect
+	golang.org/x/net v0.0.0-20220127074510-2fabfed7e28f // indirect
+	golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect
+	golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 // indirect
+	golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
+	golang.org/x/text v0.3.7 // indirect
+	golang.org/x/time v0.0.0-20211116232009-f0f3c7e86c11 // indirect
+	google.golang.org/appengine v1.6.7 // indirect
+	google.golang.org/protobuf v1.27.1 // indirect
+	gopkg.in/inf.v0 v0.9.1 // indirect
+	gopkg.in/yaml.v2 v2.4.0 // indirect
+	gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
+	k8s.io/kube-openapi v0.0.0-20220124234850-424119656bbf // indirect
+	k8s.io/utils v0.0.0-20220127004650-9b3446523e65 // indirect
+	sigs.k8s.io/json v0.0.0-20211208200746-9f7c6b3444d2 // indirect
+	sigs.k8s.io/structured-merge-diff/v4 v4.2.1 // indirect
+	sigs.k8s.io/yaml v1.3.0 // indirect
 )
+
+replace github.com/intel/platform-aware-scheduling/gpu-aware-scheduling => ../gpu-aware-scheduling