Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.16.4
helm.sh/resource-policy: keep
name: dynamographdeploymentscalingadapters.nvidia.com
spec:
group: nvidia.com
names:
kind: DynamoGraphDeploymentScalingAdapter
listKind: DynamoGraphDeploymentScalingAdapterList
plural: dynamographdeploymentscalingadapters
shortNames:
- dgdsa
singular: dynamographdeploymentscalingadapter
scope: Namespaced
versions:
- additionalPrinterColumns:
- description: DynamoGraphDeployment name
jsonPath: .spec.dgdRef.name
name: DGD
type: string
- description: Service name
jsonPath: .spec.dgdRef.service
name: SERVICE
type: string
- description: Current replicas
jsonPath: .status.replicas
name: REPLICAS
type: integer
- jsonPath: .metadata.creationTimestamp
name: AGE
type: date
name: v1alpha1
schema:
openAPIV3Schema:
description: |-
DynamoGraphDeploymentScalingAdapter provides a scaling interface for individual services
within a DynamoGraphDeployment. It implements the Kubernetes scale
subresource, enabling integration with HPA, KEDA, and custom autoscalers.

The adapter acts as an intermediary between autoscalers and the DGD,
ensuring that only the adapter controller modifies the DGD's service replicas.
This prevents conflicts when multiple autoscaling mechanisms are in play.
properties:
apiVersion:
description: |-
APIVersion defines the versioned schema of this representation of an object.
Servers should convert recognized schemas to the latest internal value, and
may reject unrecognized values.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
type: string
kind:
description: |-
Kind is a string value representing the REST resource this object represents.
Servers may infer this from the endpoint the client submits requests to.
Cannot be updated.
In CamelCase.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
type: string
metadata:
type: object
spec:
description: DynamoGraphDeploymentScalingAdapterSpec defines the desired state of DynamoGraphDeploymentScalingAdapter
properties:
dgdRef:
description: DGDRef references the DynamoGraphDeployment and the specific service to scale.
properties:
name:
description: Name of the DynamoGraphDeployment
minLength: 1
type: string
service:
description: Service is the key name of the service within the DGD's spec.services map to scale
minLength: 1
type: string
required:
- name
- service
type: object
replicas:
description: |-
Replicas is the desired number of replicas for the target service.
This field is modified by external autoscalers (HPA/KEDA/Planner) or manually by users.
format: int32
minimum: 0
type: integer
required:
- dgdRef
- replicas
type: object
status:
description: DynamoGraphDeploymentScalingAdapterStatus defines the observed state of DynamoGraphDeploymentScalingAdapter
properties:
lastScaleTime:
description: LastScaleTime is the last time the adapter scaled the target service.
format: date-time
type: string
replicas:
description: |-
Replicas is the current number of replicas for the target service.
This is synced from the DGD's service replicas and is required for the scale subresource.
format: int32
type: integer
selector:
description: |-
Selector is a label selector string for the pods managed by this adapter.
Required for HPA compatibility via the scale subresource.
type: string
type: object
type: object
served: true
storage: true
subresources:
scale:
labelSelectorPath: .status.selector
specReplicasPath: .spec.replicas
statusReplicasPath: .status.replicas
status: {}
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ rules:
- dynamocomponentdeployments
- dynamographdeploymentrequests
- dynamographdeployments
- dynamographdeploymentscalingadapters
- dynamomodels
verbs:
- create
Expand All @@ -393,6 +394,7 @@ rules:
- dynamocomponentdeployments/status
- dynamographdeploymentrequests/status
- dynamographdeployments/status
- dynamographdeploymentscalingadapters/status
- dynamomodels/status
verbs:
- get
Expand Down
9 changes: 0 additions & 9 deletions deploy/cloud/operator/api/v1alpha1/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package v1alpha1

import (
autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
)
Expand Down Expand Up @@ -53,14 +52,6 @@ type VolumeMount struct {
UseAsCompilationCache bool `json:"useAsCompilationCache,omitempty"`
}

type Autoscaling struct {
Enabled bool `json:"enabled,omitempty"`
MinReplicas int `json:"minReplicas,omitempty"`
MaxReplicas int `json:"maxReplicas,omitempty"`
Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
}

type SharedMemorySpec struct {
Disabled bool `json:"disabled,omitempty"`
Size resource.Quantity `json:"size,omitempty"`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,6 @@ type DynamoComponentDeploymentSharedSpec struct {
// Resources requested and limits for this component, including CPU, memory,
// GPUs/devices, and any runtime-specific resources.
Resources *Resources `json:"resources,omitempty"`
// Autoscaling config for this component (replica range, target utilization, etc.).
Autoscaling *Autoscaling `json:"autoscaling,omitempty"`
// Envs defines additional environment variables to inject into the component containers.
Envs []corev1.EnvVar `json:"envs,omitempty"`
// EnvFromSecret references a Secret whose key/value pairs will be exposed as
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package v1alpha1

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// DynamoGraphDeploymentScalingAdapterSpec defines the desired state of DynamoGraphDeploymentScalingAdapter
type DynamoGraphDeploymentScalingAdapterSpec struct {
// Replicas is the desired number of replicas for the target service.
// This field is modified by external autoscalers (HPA/KEDA/Planner) or manually by users.
// +kubebuilder:validation:Required
// +kubebuilder:validation:Minimum=0
Replicas int32 `json:"replicas"`

// DGDRef references the DynamoGraphDeployment and the specific service to scale.
// +kubebuilder:validation:Required
DGDRef DynamoGraphDeploymentServiceRef `json:"dgdRef"`
}

// DynamoGraphDeploymentServiceRef identifies a specific service within a DynamoGraphDeployment
type DynamoGraphDeploymentServiceRef struct {
// Name of the DynamoGraphDeployment
// +kubebuilder:validation:Required
// +kubebuilder:validation:MinLength=1
Name string `json:"name"`

// Service is the key name of the service within the DGD's spec.services map to scale
// +kubebuilder:validation:Required
// +kubebuilder:validation:MinLength=1
Service string `json:"service"`
}

// DynamoGraphDeploymentScalingAdapterStatus defines the observed state of DynamoGraphDeploymentScalingAdapter
type DynamoGraphDeploymentScalingAdapterStatus struct {
// Replicas is the current number of replicas for the target service.
// This is synced from the DGD's service replicas and is required for the scale subresource.
// +optional
Replicas int32 `json:"replicas,omitempty"`

// Selector is a label selector string for the pods managed by this adapter.
// Required for HPA compatibility via the scale subresource.
// +optional
Selector string `json:"selector,omitempty"`

// LastScaleTime is the last time the adapter scaled the target service.
// +optional
LastScaleTime *metav1.Time `json:"lastScaleTime,omitempty"`
}

// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:subresource:scale:specpath=.spec.replicas,statuspath=.status.replicas,selectorpath=.status.selector
// +kubebuilder:printcolumn:name="DGD",type="string",JSONPath=".spec.dgdRef.name",description="DynamoGraphDeployment name"
// +kubebuilder:printcolumn:name="SERVICE",type="string",JSONPath=".spec.dgdRef.service",description="Service name"
// +kubebuilder:printcolumn:name="REPLICAS",type="integer",JSONPath=".status.replicas",description="Current replicas"
// +kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp"
// +kubebuilder:resource:shortName={dgdsa}

// DynamoGraphDeploymentScalingAdapter provides a scaling interface for individual services
// within a DynamoGraphDeployment. It implements the Kubernetes scale
// subresource, enabling integration with HPA, KEDA, and custom autoscalers.
//
// The adapter acts as an intermediary between autoscalers and the DGD,
// ensuring that only the adapter controller modifies the DGD's service replicas.
// This prevents conflicts when multiple autoscaling mechanisms are in play.
type DynamoGraphDeploymentScalingAdapter struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec DynamoGraphDeploymentScalingAdapterSpec `json:"spec,omitempty"`
Status DynamoGraphDeploymentScalingAdapterStatus `json:"status,omitempty"`
}

// +kubebuilder:object:root=true

// DynamoGraphDeploymentScalingAdapterList contains a list of DynamoGraphDeploymentScalingAdapter
type DynamoGraphDeploymentScalingAdapterList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []DynamoGraphDeploymentScalingAdapter `json:"items"`
}

func init() {
SchemeBuilder.Register(&DynamoGraphDeploymentScalingAdapter{}, &DynamoGraphDeploymentScalingAdapterList{})
}
Loading
Loading