Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,13 @@ spec:
(such as Pod, Service, and Ingress when applicable).
type: object
autoscaling:
description: Autoscaling config for this component (replica range, target utilization, etc.).
description: |-
Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
for migration guidance. This field will be removed in a future API version.
properties:
behavior:
description: |-
HorizontalPodAutoscalerBehavior configures the scaling behavior of the target
in both Up and Down directions (scaleUp and scaleDown fields respectively).
description: 'Deprecated: This field is ignored.'
properties:
scaleDown:
description: |-
Expand Down Expand Up @@ -231,10 +232,13 @@ spec:
type: object
type: object
enabled:
description: 'Deprecated: This field is ignored.'
type: boolean
maxReplicas:
description: 'Deprecated: This field is ignored.'
type: integer
metrics:
description: 'Deprecated: This field is ignored.'
items:
description: |-
MetricSpec specifies how to scale based on a single metric
Expand Down Expand Up @@ -665,6 +669,7 @@ spec:
type: object
type: array
minReplicas:
description: 'Deprecated: This field is ignored.'
type: integer
type: object
backendFramework:
Expand Down Expand Up @@ -10184,8 +10189,12 @@ spec:
type: integer
type: object
replicas:
description: Replicas is the desired number of Pods for this component when autoscaling is not used.
description: |-
Replicas is the desired number of Pods for this component.
When scalingAdapter is enabled (default), this field is managed by the
DynamoGraphDeploymentScalingAdapter and should not be modified directly.
format: int32
minimum: 0
type: integer
resources:
description: |-
Expand Down Expand Up @@ -10264,6 +10273,20 @@ spec:
type: string
type: object
type: object
scalingAdapter:
description: |-
ScalingAdapter configures whether this service uses the DynamoGraphDeploymentScalingAdapter.
When enabled (default), replicas are managed via DGDSA and external autoscalers can scale
the service using the Scale subresource. When disabled, replicas can be modified directly.
properties:
disable:
default: false
description: |-
Disable indicates whether the ScalingAdapter should be disabled for this service.
When false (default), a DGDSA is created and owns the replicas field.
When true, no DGDSA is created and replicas can be modified directly in the DGD.
type: boolean
type: object
serviceName:
description: The name of the component
type: string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,12 +219,13 @@ spec:
(such as Pod, Service, and Ingress when applicable).
type: object
autoscaling:
description: Autoscaling config for this component (replica range, target utilization, etc.).
description: |-
Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
for migration guidance. This field will be removed in a future API version.
properties:
behavior:
description: |-
HorizontalPodAutoscalerBehavior configures the scaling behavior of the target
in both Up and Down directions (scaleUp and scaleDown fields respectively).
description: 'Deprecated: This field is ignored.'
properties:
scaleDown:
description: |-
Expand Down Expand Up @@ -373,10 +374,13 @@ spec:
type: object
type: object
enabled:
description: 'Deprecated: This field is ignored.'
type: boolean
maxReplicas:
description: 'Deprecated: This field is ignored.'
type: integer
metrics:
description: 'Deprecated: This field is ignored.'
items:
description: |-
MetricSpec specifies how to scale based on a single metric
Expand Down Expand Up @@ -807,6 +811,7 @@ spec:
type: object
type: array
minReplicas:
description: 'Deprecated: This field is ignored.'
type: integer
type: object
componentType:
Expand Down Expand Up @@ -10319,8 +10324,12 @@ spec:
type: integer
type: object
replicas:
description: Replicas is the desired number of Pods for this component when autoscaling is not used.
description: |-
Replicas is the desired number of Pods for this component.
When scalingAdapter is enabled (default), this field is managed by the
DynamoGraphDeploymentScalingAdapter and should not be modified directly.
format: int32
minimum: 0
type: integer
resources:
description: |-
Expand Down Expand Up @@ -10399,6 +10408,20 @@ spec:
type: string
type: object
type: object
scalingAdapter:
description: |-
ScalingAdapter configures whether this service uses the DynamoGraphDeploymentScalingAdapter.
When enabled (default), replicas are managed via DGDSA and external autoscalers can scale
the service using the Scale subresource. When disabled, replicas can be modified directly.
properties:
disable:
default: false
description: |-
Disable indicates whether the ScalingAdapter should be disabled for this service.
When false (default), a DGDSA is created and owns the replicas field.
When true, no DGDSA is created and replicas can be modified directly in the DGD.
type: boolean
type: object
serviceName:
description: The name of the component
type: string
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.16.4
helm.sh/resource-policy: keep
name: dynamographdeploymentscalingadapters.nvidia.com
spec:
group: nvidia.com
names:
kind: DynamoGraphDeploymentScalingAdapter
listKind: DynamoGraphDeploymentScalingAdapterList
plural: dynamographdeploymentscalingadapters
shortNames:
- dgdsa
singular: dynamographdeploymentscalingadapter
scope: Namespaced
versions:
- additionalPrinterColumns:
- description: DynamoGraphDeployment name
jsonPath: .spec.dgdRef.name
name: DGD
type: string
- description: Service name
jsonPath: .spec.dgdRef.serviceName
name: SERVICE
type: string
- description: Current replicas
jsonPath: .status.replicas
name: REPLICAS
type: integer
- jsonPath: .metadata.creationTimestamp
name: AGE
type: date
name: v1alpha1
schema:
openAPIV3Schema:
description: |-
DynamoGraphDeploymentScalingAdapter provides a scaling interface for individual services
within a DynamoGraphDeployment. It implements the Kubernetes scale
subresource, enabling integration with HPA, KEDA, and custom autoscalers.

The adapter acts as an intermediary between autoscalers and the DGD,
ensuring that only the adapter controller modifies the DGD's service replicas.
This prevents conflicts when multiple autoscaling mechanisms are in play.
properties:
apiVersion:
description: |-
APIVersion defines the versioned schema of this representation of an object.
Servers should convert recognized schemas to the latest internal value, and
may reject unrecognized values.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
type: string
kind:
description: |-
Kind is a string value representing the REST resource this object represents.
Servers may infer this from the endpoint the client submits requests to.
Cannot be updated.
In CamelCase.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
type: string
metadata:
type: object
spec:
description: DynamoGraphDeploymentScalingAdapterSpec defines the desired state of DynamoGraphDeploymentScalingAdapter
properties:
dgdRef:
description: DGDRef references the DynamoGraphDeployment and the specific service to scale.
properties:
name:
description: Name of the DynamoGraphDeployment
minLength: 1
type: string
serviceName:
description: ServiceName is the key name of the service within the DGD's spec.services map to scale
minLength: 1
type: string
required:
- name
- serviceName
type: object
replicas:
description: |-
Replicas is the desired number of replicas for the target service.
This field is modified by external autoscalers (HPA/KEDA/Planner) or manually by users.
format: int32
minimum: 0
type: integer
required:
- dgdRef
- replicas
type: object
status:
description: DynamoGraphDeploymentScalingAdapterStatus defines the observed state of DynamoGraphDeploymentScalingAdapter
properties:
lastScaleTime:
description: LastScaleTime is the last time the adapter scaled the target service.
format: date-time
type: string
replicas:
description: |-
Replicas is the current number of replicas for the target service.
This is synced from the DGD's service replicas and is required for the scale subresource.
format: int32
type: integer
selector:
description: |-
Selector is a label selector string for the pods managed by this adapter.
Required for HPA compatibility via the scale subresource.
type: string
type: object
type: object
served: true
storage: true
subresources:
scale:
labelSelectorPath: .status.selector
specReplicasPath: .spec.replicas
statusReplicasPath: .status.replicas
status: {}
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ rules:
- dynamocomponentdeployments
- dynamographdeploymentrequests
- dynamographdeployments
- dynamographdeploymentscalingadapters
- dynamomodels
verbs:
- create
Expand All @@ -393,6 +394,7 @@ rules:
- dynamocomponentdeployments/status
- dynamographdeploymentrequests/status
- dynamographdeployments/status
- dynamographdeploymentscalingadapters/status
- dynamomodels/status
verbs:
- get
Expand Down
30 changes: 25 additions & 5 deletions deploy/cloud/operator/api/v1alpha1/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,20 @@ type VolumeMount struct {
UseAsCompilationCache bool `json:"useAsCompilationCache,omitempty"`
}

// Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
// with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
// for migration guidance. This field will be removed in a future API version.
type Autoscaling struct {
Enabled bool `json:"enabled,omitempty"`
MinReplicas int `json:"minReplicas,omitempty"`
MaxReplicas int `json:"maxReplicas,omitempty"`
Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
// Deprecated: This field is ignored.
Enabled bool `json:"enabled,omitempty"`
// Deprecated: This field is ignored.
MinReplicas int `json:"minReplicas,omitempty"`
// Deprecated: This field is ignored.
MaxReplicas int `json:"maxReplicas,omitempty"`
// Deprecated: This field is ignored.
Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
// Deprecated: This field is ignored.
Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
}

type SharedMemorySpec struct {
Expand Down Expand Up @@ -115,3 +123,15 @@ type ExtraPodSpec struct {
*corev1.PodSpec `json:",inline"`
MainContainer *corev1.Container `json:"mainContainer,omitempty"`
}

// ScalingAdapter configures whether a service uses the DynamoGraphDeploymentScalingAdapter
// for replica management. When enabled (default), the DGDSA owns the replicas field and
// external autoscalers (HPA, KEDA, Planner) can control scaling via the Scale subresource.
type ScalingAdapter struct {
// Disable indicates whether the ScalingAdapter should be disabled for this service.
// When false (default), a DGDSA is created and owns the replicas field.
// When true, no DGDSA is created and replicas can be modified directly in the DGD.
// +optional
// +kubebuilder:default=false
Disable bool `json:"disable,omitempty"`
}
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ type DynamoComponentDeploymentSharedSpec struct {
// Resources requested and limits for this component, including CPU, memory,
// GPUs/devices, and any runtime-specific resources.
Resources *Resources `json:"resources,omitempty"`
// Autoscaling config for this component (replica range, target utilization, etc.).
// Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
// with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
// for migration guidance. This field will be removed in a future API version.
Autoscaling *Autoscaling `json:"autoscaling,omitempty"`
// Envs defines additional environment variables to inject into the component containers.
Envs []corev1.EnvVar `json:"envs,omitempty"`
Expand Down Expand Up @@ -108,10 +110,18 @@ type DynamoComponentDeploymentSharedSpec struct {
LivenessProbe *corev1.Probe `json:"livenessProbe,omitempty"`
// ReadinessProbe to signal when the container is ready to receive traffic.
ReadinessProbe *corev1.Probe `json:"readinessProbe,omitempty"`
// Replicas is the desired number of Pods for this component when autoscaling is not used.
// Replicas is the desired number of Pods for this component.
// When scalingAdapter is enabled (default), this field is managed by the
// DynamoGraphDeploymentScalingAdapter and should not be modified directly.
// +kubebuilder:validation:Minimum=0
Replicas *int32 `json:"replicas,omitempty"`
// Multinode is the configuration for multinode components.
Multinode *MultinodeSpec `json:"multinode,omitempty"`
// ScalingAdapter configures whether this service uses the DynamoGraphDeploymentScalingAdapter.
// When enabled (default), replicas are managed via DGDSA and external autoscalers can scale
// the service using the Scale subresource. When disabled, replicas can be modified directly.
// +optional
ScalingAdapter *ScalingAdapter `json:"scalingAdapter,omitempty"`
}

type MultinodeSpec struct {
Expand Down
Loading
Loading