Skip to content

Commit 09f2314

Browse files
feat: add scaling adapter (#4699)
Signed-off-by: Julien Mancuso <[email protected]>
1 parent 1f9b69b commit 09f2314

33 files changed

+2939
-264
lines changed

deploy/cloud/helm/crds/templates/nvidia.com_dynamocomponentdeployments.yaml

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,12 +77,13 @@ spec:
7777
(such as Pod, Service, and Ingress when applicable).
7878
type: object
7979
autoscaling:
80-
description: Autoscaling config for this component (replica range, target utilization, etc.).
80+
description: |-
81+
Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
82+
with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
83+
for migration guidance. This field will be removed in a future API version.
8184
properties:
8285
behavior:
83-
description: |-
84-
HorizontalPodAutoscalerBehavior configures the scaling behavior of the target
85-
in both Up and Down directions (scaleUp and scaleDown fields respectively).
86+
description: 'Deprecated: This field is ignored.'
8687
properties:
8788
scaleDown:
8889
description: |-
@@ -231,10 +232,13 @@ spec:
231232
type: object
232233
type: object
233234
enabled:
235+
description: 'Deprecated: This field is ignored.'
234236
type: boolean
235237
maxReplicas:
238+
description: 'Deprecated: This field is ignored.'
236239
type: integer
237240
metrics:
241+
description: 'Deprecated: This field is ignored.'
238242
items:
239243
description: |-
240244
MetricSpec specifies how to scale based on a single metric
@@ -665,6 +669,7 @@ spec:
665669
type: object
666670
type: array
667671
minReplicas:
672+
description: 'Deprecated: This field is ignored.'
668673
type: integer
669674
type: object
670675
backendFramework:
@@ -10184,8 +10189,12 @@ spec:
1018410189
type: integer
1018510190
type: object
1018610191
replicas:
10187-
description: Replicas is the desired number of Pods for this component when autoscaling is not used.
10192+
description: |-
10193+
Replicas is the desired number of Pods for this component.
10194+
When scalingAdapter is enabled (default), this field is managed by the
10195+
DynamoGraphDeploymentScalingAdapter and should not be modified directly.
1018810196
format: int32
10197+
minimum: 0
1018910198
type: integer
1019010199
resources:
1019110200
description: |-
@@ -10264,6 +10273,20 @@ spec:
1026410273
type: string
1026510274
type: object
1026610275
type: object
10276+
scalingAdapter:
10277+
description: |-
10278+
ScalingAdapter configures whether this service uses the DynamoGraphDeploymentScalingAdapter.
10279+
When enabled (default), replicas are managed via DGDSA and external autoscalers can scale
10280+
the service using the Scale subresource. When disabled, replicas can be modified directly.
10281+
properties:
10282+
disable:
10283+
default: false
10284+
description: |-
10285+
Disable indicates whether the ScalingAdapter should be disabled for this service.
10286+
When false (default), a DGDSA is created and owns the replicas field.
10287+
When true, no DGDSA is created and replicas can be modified directly in the DGD.
10288+
type: boolean
10289+
type: object
1026710290
serviceName:
1026810291
description: The name of the component
1026910292
type: string

deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeployments.yaml

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -219,12 +219,13 @@ spec:
219219
(such as Pod, Service, and Ingress when applicable).
220220
type: object
221221
autoscaling:
222-
description: Autoscaling config for this component (replica range, target utilization, etc.).
222+
description: |-
223+
Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
224+
with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
225+
for migration guidance. This field will be removed in a future API version.
223226
properties:
224227
behavior:
225-
description: |-
226-
HorizontalPodAutoscalerBehavior configures the scaling behavior of the target
227-
in both Up and Down directions (scaleUp and scaleDown fields respectively).
228+
description: 'Deprecated: This field is ignored.'
228229
properties:
229230
scaleDown:
230231
description: |-
@@ -373,10 +374,13 @@ spec:
373374
type: object
374375
type: object
375376
enabled:
377+
description: 'Deprecated: This field is ignored.'
376378
type: boolean
377379
maxReplicas:
380+
description: 'Deprecated: This field is ignored.'
378381
type: integer
379382
metrics:
383+
description: 'Deprecated: This field is ignored.'
380384
items:
381385
description: |-
382386
MetricSpec specifies how to scale based on a single metric
@@ -807,6 +811,7 @@ spec:
807811
type: object
808812
type: array
809813
minReplicas:
814+
description: 'Deprecated: This field is ignored.'
810815
type: integer
811816
type: object
812817
componentType:
@@ -10319,8 +10324,12 @@ spec:
1031910324
type: integer
1032010325
type: object
1032110326
replicas:
10322-
description: Replicas is the desired number of Pods for this component when autoscaling is not used.
10327+
description: |-
10328+
Replicas is the desired number of Pods for this component.
10329+
When scalingAdapter is enabled (default), this field is managed by the
10330+
DynamoGraphDeploymentScalingAdapter and should not be modified directly.
1032310331
format: int32
10332+
minimum: 0
1032410333
type: integer
1032510334
resources:
1032610335
description: |-
@@ -10399,6 +10408,20 @@ spec:
1039910408
type: string
1040010409
type: object
1040110410
type: object
10411+
scalingAdapter:
10412+
description: |-
10413+
ScalingAdapter configures whether this service uses the DynamoGraphDeploymentScalingAdapter.
10414+
When enabled (default), replicas are managed via DGDSA and external autoscalers can scale
10415+
the service using the Scale subresource. When disabled, replicas can be modified directly.
10416+
properties:
10417+
disable:
10418+
default: false
10419+
description: |-
10420+
Disable indicates whether the ScalingAdapter should be disabled for this service.
10421+
When false (default), a DGDSA is created and owns the replicas field.
10422+
When true, no DGDSA is created and replicas can be modified directly in the DGD.
10423+
type: boolean
10424+
type: object
1040210425
serviceName:
1040310426
description: The name of the component
1040410427
type: string
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
---
17+
apiVersion: apiextensions.k8s.io/v1
18+
kind: CustomResourceDefinition
19+
metadata:
20+
annotations:
21+
controller-gen.kubebuilder.io/version: v0.16.4
22+
helm.sh/resource-policy: keep
23+
name: dynamographdeploymentscalingadapters.nvidia.com
24+
spec:
25+
group: nvidia.com
26+
names:
27+
kind: DynamoGraphDeploymentScalingAdapter
28+
listKind: DynamoGraphDeploymentScalingAdapterList
29+
plural: dynamographdeploymentscalingadapters
30+
shortNames:
31+
- dgdsa
32+
singular: dynamographdeploymentscalingadapter
33+
scope: Namespaced
34+
versions:
35+
- additionalPrinterColumns:
36+
- description: DynamoGraphDeployment name
37+
jsonPath: .spec.dgdRef.name
38+
name: DGD
39+
type: string
40+
- description: Service name
41+
jsonPath: .spec.dgdRef.serviceName
42+
name: SERVICE
43+
type: string
44+
- description: Current replicas
45+
jsonPath: .status.replicas
46+
name: REPLICAS
47+
type: integer
48+
- jsonPath: .metadata.creationTimestamp
49+
name: AGE
50+
type: date
51+
name: v1alpha1
52+
schema:
53+
openAPIV3Schema:
54+
description: |-
55+
DynamoGraphDeploymentScalingAdapter provides a scaling interface for individual services
56+
within a DynamoGraphDeployment. It implements the Kubernetes scale
57+
subresource, enabling integration with HPA, KEDA, and custom autoscalers.
58+
59+
The adapter acts as an intermediary between autoscalers and the DGD,
60+
ensuring that only the adapter controller modifies the DGD's service replicas.
61+
This prevents conflicts when multiple autoscaling mechanisms are in play.
62+
properties:
63+
apiVersion:
64+
description: |-
65+
APIVersion defines the versioned schema of this representation of an object.
66+
Servers should convert recognized schemas to the latest internal value, and
67+
may reject unrecognized values.
68+
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
69+
type: string
70+
kind:
71+
description: |-
72+
Kind is a string value representing the REST resource this object represents.
73+
Servers may infer this from the endpoint the client submits requests to.
74+
Cannot be updated.
75+
In CamelCase.
76+
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
77+
type: string
78+
metadata:
79+
type: object
80+
spec:
81+
description: DynamoGraphDeploymentScalingAdapterSpec defines the desired state of DynamoGraphDeploymentScalingAdapter
82+
properties:
83+
dgdRef:
84+
description: DGDRef references the DynamoGraphDeployment and the specific service to scale.
85+
properties:
86+
name:
87+
description: Name of the DynamoGraphDeployment
88+
minLength: 1
89+
type: string
90+
serviceName:
91+
description: ServiceName is the key name of the service within the DGD's spec.services map to scale
92+
minLength: 1
93+
type: string
94+
required:
95+
- name
96+
- serviceName
97+
type: object
98+
replicas:
99+
description: |-
100+
Replicas is the desired number of replicas for the target service.
101+
This field is modified by external autoscalers (HPA/KEDA/Planner) or manually by users.
102+
format: int32
103+
minimum: 0
104+
type: integer
105+
required:
106+
- dgdRef
107+
- replicas
108+
type: object
109+
status:
110+
description: DynamoGraphDeploymentScalingAdapterStatus defines the observed state of DynamoGraphDeploymentScalingAdapter
111+
properties:
112+
lastScaleTime:
113+
description: LastScaleTime is the last time the adapter scaled the target service.
114+
format: date-time
115+
type: string
116+
replicas:
117+
description: |-
118+
Replicas is the current number of replicas for the target service.
119+
This is synced from the DGD's service replicas and is required for the scale subresource.
120+
format: int32
121+
type: integer
122+
selector:
123+
description: |-
124+
Selector is a label selector string for the pods managed by this adapter.
125+
Required for HPA compatibility via the scale subresource.
126+
type: string
127+
type: object
128+
type: object
129+
served: true
130+
storage: true
131+
subresources:
132+
scale:
133+
labelSelectorPath: .status.selector
134+
specReplicasPath: .spec.replicas
135+
statusReplicasPath: .status.replicas
136+
status: {}

deploy/cloud/helm/platform/components/operator/templates/manager-rbac.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,7 @@ rules:
369369
- dynamocomponentdeployments
370370
- dynamographdeploymentrequests
371371
- dynamographdeployments
372+
- dynamographdeploymentscalingadapters
372373
- dynamomodels
373374
verbs:
374375
- create
@@ -393,6 +394,7 @@ rules:
393394
- dynamocomponentdeployments/status
394395
- dynamographdeploymentrequests/status
395396
- dynamographdeployments/status
397+
- dynamographdeploymentscalingadapters/status
396398
- dynamomodels/status
397399
verbs:
398400
- get

deploy/cloud/operator/api/v1alpha1/common.go

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,20 @@ type VolumeMount struct {
5353
UseAsCompilationCache bool `json:"useAsCompilationCache,omitempty"`
5454
}
5555

56+
// Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
57+
// with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
58+
// for migration guidance. This field will be removed in a future API version.
5659
type Autoscaling struct {
57-
Enabled bool `json:"enabled,omitempty"`
58-
MinReplicas int `json:"minReplicas,omitempty"`
59-
MaxReplicas int `json:"maxReplicas,omitempty"`
60-
Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
61-
Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
60+
// Deprecated: This field is ignored.
61+
Enabled bool `json:"enabled,omitempty"`
62+
// Deprecated: This field is ignored.
63+
MinReplicas int `json:"minReplicas,omitempty"`
64+
// Deprecated: This field is ignored.
65+
MaxReplicas int `json:"maxReplicas,omitempty"`
66+
// Deprecated: This field is ignored.
67+
Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
68+
// Deprecated: This field is ignored.
69+
Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
6270
}
6371

6472
type SharedMemorySpec struct {
@@ -115,3 +123,15 @@ type ExtraPodSpec struct {
115123
*corev1.PodSpec `json:",inline"`
116124
MainContainer *corev1.Container `json:"mainContainer,omitempty"`
117125
}
126+
127+
// ScalingAdapter configures whether a service uses the DynamoGraphDeploymentScalingAdapter
128+
// for replica management. When enabled (default), the DGDSA owns the replicas field and
129+
// external autoscalers (HPA, KEDA, Planner) can control scaling via the Scale subresource.
130+
type ScalingAdapter struct {
131+
// Disable indicates whether the ScalingAdapter should be disabled for this service.
132+
// When false (default), a DGDSA is created and owns the replicas field.
133+
// When true, no DGDSA is created and replicas can be modified directly in the DGD.
134+
// +optional
135+
// +kubebuilder:default=false
136+
Disable bool `json:"disable,omitempty"`
137+
}

deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,9 @@ type DynamoComponentDeploymentSharedSpec struct {
7474
// Resources requested and limits for this component, including CPU, memory,
7575
// GPUs/devices, and any runtime-specific resources.
7676
Resources *Resources `json:"resources,omitempty"`
77-
// Autoscaling config for this component (replica range, target utilization, etc.).
77+
// Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
78+
// with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
79+
// for migration guidance. This field will be removed in a future API version.
7880
Autoscaling *Autoscaling `json:"autoscaling,omitempty"`
7981
// Envs defines additional environment variables to inject into the component containers.
8082
Envs []corev1.EnvVar `json:"envs,omitempty"`
@@ -108,10 +110,18 @@ type DynamoComponentDeploymentSharedSpec struct {
108110
LivenessProbe *corev1.Probe `json:"livenessProbe,omitempty"`
109111
// ReadinessProbe to signal when the container is ready to receive traffic.
110112
ReadinessProbe *corev1.Probe `json:"readinessProbe,omitempty"`
111-
// Replicas is the desired number of Pods for this component when autoscaling is not used.
113+
// Replicas is the desired number of Pods for this component.
114+
// When scalingAdapter is enabled (default), this field is managed by the
115+
// DynamoGraphDeploymentScalingAdapter and should not be modified directly.
116+
// +kubebuilder:validation:Minimum=0
112117
Replicas *int32 `json:"replicas,omitempty"`
113118
// Multinode is the configuration for multinode components.
114119
Multinode *MultinodeSpec `json:"multinode,omitempty"`
120+
// ScalingAdapter configures whether this service uses the DynamoGraphDeploymentScalingAdapter.
121+
// When enabled (default), replicas are managed via DGDSA and external autoscalers can scale
122+
// the service using the Scale subresource. When disabled, replicas can be modified directly.
123+
// +optional
124+
ScalingAdapter *ScalingAdapter `json:"scalingAdapter,omitempty"`
115125
}
116126

117127
type MultinodeSpec struct {

0 commit comments

Comments
 (0)