diff --git a/deploy/cloud/helm/crds/templates/nvidia.com_dynamocomponentdeployments.yaml b/deploy/cloud/helm/crds/templates/nvidia.com_dynamocomponentdeployments.yaml
index 558a5b973d..c90e3bdfe7 100644
--- a/deploy/cloud/helm/crds/templates/nvidia.com_dynamocomponentdeployments.yaml
+++ b/deploy/cloud/helm/crds/templates/nvidia.com_dynamocomponentdeployments.yaml
@@ -77,12 +77,13 @@ spec:
                     (such as Pod, Service, and Ingress when applicable).
                   type: object
                 autoscaling:
-                  description: Autoscaling config for this component (replica range, target utilization, etc.).
+                  description: |-
+                    Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
+                    with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
+                    for migration guidance. This field will be removed in a future API version.
                   properties:
                     behavior:
-                      description: |-
-                        HorizontalPodAutoscalerBehavior configures the scaling behavior of the target
-                        in both Up and Down directions (scaleUp and scaleDown fields respectively).
+                      description: 'Deprecated: This field is ignored.'
                       properties:
                         scaleDown:
                           description: |-
@@ -231,10 +232,13 @@ spec:
                           type: object
                       type: object
                     enabled:
+                      description: 'Deprecated: This field is ignored.'
                       type: boolean
                     maxReplicas:
+                      description: 'Deprecated: This field is ignored.'
                       type: integer
                     metrics:
+                      description: 'Deprecated: This field is ignored.'
                       items:
                         description: |-
                           MetricSpec specifies how to scale based on a single metric
@@ -665,6 +669,7 @@ spec:
                         type: object
                       type: array
                     minReplicas:
+                      description: 'Deprecated: This field is ignored.'
                       type: integer
                   type: object
                 backendFramework:
@@ -10184,8 +10189,12 @@ spec:
                       type: integer
                   type: object
                 replicas:
-                  description: Replicas is the desired number of Pods for this component when autoscaling is not used.
+                  description: |-
+                    Replicas is the desired number of Pods for this component.
+                    When scalingAdapter is enabled (default), this field is managed by the
+                    DynamoGraphDeploymentScalingAdapter and should not be modified directly.
                   format: int32
+                  minimum: 0
                   type: integer
                 resources:
                   description: |-
@@ -10264,6 +10273,20 @@ spec:
                           type: string
                       type: object
                   type: object
+                scalingAdapter:
+                  description: |-
+                    ScalingAdapter configures whether this service uses the DynamoGraphDeploymentScalingAdapter.
+                    When enabled (default), replicas are managed via DGDSA and external autoscalers can scale
+                    the service using the Scale subresource. When disabled, replicas can be modified directly.
+                  properties:
+                    disable:
+                      default: false
+                      description: |-
+                        Disable indicates whether the ScalingAdapter should be disabled for this service.
+                        When false (default), a DGDSA is created and owns the replicas field.
+                        When true, no DGDSA is created and replicas can be modified directly in the DGD.
+                      type: boolean
+                  type: object
                 serviceName:
                   description: The name of the component
                   type: string
diff --git a/deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeployments.yaml b/deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeployments.yaml
index ba2b19fef9..4db1e902b8 100644
--- a/deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeployments.yaml
+++ b/deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeployments.yaml
@@ -219,12 +219,13 @@ spec:
                           (such as Pod, Service, and Ingress when applicable).
                         type: object
                       autoscaling:
-                        description: Autoscaling config for this component (replica range, target utilization, etc.).
+                        description: |-
+                          Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
+                          with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
+                          for migration guidance. This field will be removed in a future API version.
                         properties:
                           behavior:
-                            description: |-
-                              HorizontalPodAutoscalerBehavior configures the scaling behavior of the target
-                              in both Up and Down directions (scaleUp and scaleDown fields respectively).
+                            description: 'Deprecated: This field is ignored.'
                             properties:
                               scaleDown:
                                 description: |-
@@ -373,10 +374,13 @@ spec:
                                 type: object
                             type: object
                           enabled:
+                            description: 'Deprecated: This field is ignored.'
                             type: boolean
                           maxReplicas:
+                            description: 'Deprecated: This field is ignored.'
                             type: integer
                           metrics:
+                            description: 'Deprecated: This field is ignored.'
                             items:
                               description: |-
                                 MetricSpec specifies how to scale based on a single metric
@@ -807,6 +811,7 @@ spec:
                               type: object
                             type: array
                           minReplicas:
+                            description: 'Deprecated: This field is ignored.'
                             type: integer
                         type: object
                       componentType:
@@ -10319,8 +10324,12 @@ spec:
                             type: integer
                         type: object
                       replicas:
-                        description: Replicas is the desired number of Pods for this component when autoscaling is not used.
+                        description: |-
+                          Replicas is the desired number of Pods for this component.
+                          When scalingAdapter is enabled (default), this field is managed by the
+                          DynamoGraphDeploymentScalingAdapter and should not be modified directly.
                         format: int32
+                        minimum: 0
                         type: integer
                       resources:
                         description: |-
@@ -10399,6 +10408,20 @@ spec:
                                 type: string
                             type: object
                         type: object
+                      scalingAdapter:
+                        description: |-
+                          ScalingAdapter configures whether this service uses the DynamoGraphDeploymentScalingAdapter.
+                          When enabled (default), replicas are managed via DGDSA and external autoscalers can scale
+                          the service using the Scale subresource. When disabled, replicas can be modified directly.
+                        properties:
+                          disable:
+                            default: false
+                            description: |-
+                              Disable indicates whether the ScalingAdapter should be disabled for this service.
+                              When false (default), a DGDSA is created and owns the replicas field.
+                              When true, no DGDSA is created and replicas can be modified directly in the DGD.
+                            type: boolean
+                        type: object
                       serviceName:
                         description: The name of the component
                         type: string
diff --git a/deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeploymentscalingadapters.yaml b/deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeploymentscalingadapters.yaml
new file mode 100644
index 0000000000..f822bb91db
--- /dev/null
+++ b/deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeploymentscalingadapters.yaml
@@ -0,0 +1,136 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.16.4
+    helm.sh/resource-policy: keep
+  name: dynamographdeploymentscalingadapters.nvidia.com
+spec:
+  group: nvidia.com
+  names:
+    kind: DynamoGraphDeploymentScalingAdapter
+    listKind: DynamoGraphDeploymentScalingAdapterList
+    plural: dynamographdeploymentscalingadapters
+    shortNames:
+      - dgdsa
+    singular: dynamographdeploymentscalingadapter
+  scope: Namespaced
+  versions:
+    - additionalPrinterColumns:
+        - description: DynamoGraphDeployment name
+          jsonPath: .spec.dgdRef.name
+          name: DGD
+          type: string
+        - description: Service name
+          jsonPath: .spec.dgdRef.serviceName
+          name: SERVICE
+          type: string
+        - description: Current replicas
+          jsonPath: .status.replicas
+          name: REPLICAS
+          type: integer
+        - jsonPath: .metadata.creationTimestamp
+          name: AGE
+          type: date
+      name: v1alpha1
+      schema:
+        openAPIV3Schema:
+          description: |-
+            DynamoGraphDeploymentScalingAdapter provides a scaling interface for individual services
+            within a DynamoGraphDeployment. It implements the Kubernetes scale
+            subresource, enabling integration with HPA, KEDA, and custom autoscalers.
+
+            The adapter acts as an intermediary between autoscalers and the DGD,
+            ensuring that only the adapter controller modifies the DGD's service replicas.
+            This prevents conflicts when multiple autoscaling mechanisms are in play.
+          properties:
+            apiVersion:
+              description: |-
+                APIVersion defines the versioned schema of this representation of an object.
+                Servers should convert recognized schemas to the latest internal value, and
+                may reject unrecognized values.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+              type: string
+            kind:
+              description: |-
+                Kind is a string value representing the REST resource this object represents.
+                Servers may infer this from the endpoint the client submits requests to.
+                Cannot be updated.
+                In CamelCase.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+              type: string
+            metadata:
+              type: object
+            spec:
+              description: DynamoGraphDeploymentScalingAdapterSpec defines the desired state of DynamoGraphDeploymentScalingAdapter
+              properties:
+                dgdRef:
+                  description: DGDRef references the DynamoGraphDeployment and the specific service to scale.
+                  properties:
+                    name:
+                      description: Name of the DynamoGraphDeployment
+                      minLength: 1
+                      type: string
+                    serviceName:
+                      description: ServiceName is the key name of the service within the DGD's spec.services map to scale
+                      minLength: 1
+                      type: string
+                  required:
+                    - name
+                    - serviceName
+                  type: object
+                replicas:
+                  description: |-
+                    Replicas is the desired number of replicas for the target service.
+                    This field is modified by external autoscalers (HPA/KEDA/Planner) or manually by users.
+                  format: int32
+                  minimum: 0
+                  type: integer
+              required:
+                - dgdRef
+                - replicas
+              type: object
+            status:
+              description: DynamoGraphDeploymentScalingAdapterStatus defines the observed state of DynamoGraphDeploymentScalingAdapter
+              properties:
+                lastScaleTime:
+                  description: LastScaleTime is the last time the adapter scaled the target service.
+                  format: date-time
+                  type: string
+                replicas:
+                  description: |-
+                    Replicas is the current number of replicas for the target service.
+                    This is synced from the DGD's service replicas and is required for the scale subresource.
+                  format: int32
+                  type: integer
+                selector:
+                  description: |-
+                    Selector is a label selector string for the pods managed by this adapter.
+                    Required for HPA compatibility via the scale subresource.
+                  type: string
+              type: object
+          type: object
+      served: true
+      storage: true
+      subresources:
+        scale:
+          labelSelectorPath: .status.selector
+          specReplicasPath: .spec.replicas
+          statusReplicasPath: .status.replicas
+        status: {}
diff --git a/deploy/cloud/helm/platform/components/operator/templates/manager-rbac.yaml b/deploy/cloud/helm/platform/components/operator/templates/manager-rbac.yaml
index 8ab42c0988..7ae1eb6c5d 100644
--- a/deploy/cloud/helm/platform/components/operator/templates/manager-rbac.yaml
+++ b/deploy/cloud/helm/platform/components/operator/templates/manager-rbac.yaml
@@ -369,6 +369,7 @@ rules:
   - dynamocomponentdeployments
   - dynamographdeploymentrequests
   - dynamographdeployments
+  - dynamographdeploymentscalingadapters
   - dynamomodels
   verbs:
   - create
@@ -393,6 +394,7 @@ rules:
   - dynamocomponentdeployments/status
   - dynamographdeploymentrequests/status
   - dynamographdeployments/status
+  - dynamographdeploymentscalingadapters/status
   - dynamomodels/status
   verbs:
   - get
diff --git a/deploy/cloud/operator/api/v1alpha1/common.go b/deploy/cloud/operator/api/v1alpha1/common.go
index 5673fd5cfd..b68dd818c0 100644
--- a/deploy/cloud/operator/api/v1alpha1/common.go
+++ b/deploy/cloud/operator/api/v1alpha1/common.go
@@ -53,12 +53,20 @@ type VolumeMount struct {
 	UseAsCompilationCache bool `json:"useAsCompilationCache,omitempty"`
 }
 
+// Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
+// with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
+// for migration guidance. This field will be removed in a future API version.
 type Autoscaling struct {
-	Enabled     bool                                           `json:"enabled,omitempty"`
-	MinReplicas int                                            `json:"minReplicas,omitempty"`
-	MaxReplicas int                                            `json:"maxReplicas,omitempty"`
-	Behavior    *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
-	Metrics     []autoscalingv2.MetricSpec                     `json:"metrics,omitempty"`
+	// Deprecated: This field is ignored.
+	Enabled bool `json:"enabled,omitempty"`
+	// Deprecated: This field is ignored.
+	MinReplicas int `json:"minReplicas,omitempty"`
+	// Deprecated: This field is ignored.
+	MaxReplicas int `json:"maxReplicas,omitempty"`
+	// Deprecated: This field is ignored.
+	Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
+	// Deprecated: This field is ignored.
+	Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
 }
 
 type SharedMemorySpec struct {
@@ -115,3 +123,15 @@ type ExtraPodSpec struct {
 	*corev1.PodSpec `json:",inline"`
 	MainContainer   *corev1.Container `json:"mainContainer,omitempty"`
 }
+
+// ScalingAdapter configures whether a service uses the DynamoGraphDeploymentScalingAdapter
+// for replica management. When enabled (default), the DGDSA owns the replicas field and
+// external autoscalers (HPA, KEDA, Planner) can control scaling via the Scale subresource.
+type ScalingAdapter struct {
+	// Disable indicates whether the ScalingAdapter should be disabled for this service.
+	// When false (default), a DGDSA is created and owns the replicas field.
+	// When true, no DGDSA is created and replicas can be modified directly in the DGD.
+	// +optional
+	// +kubebuilder:default=false
+	Disable bool `json:"disable,omitempty"`
+}
diff --git a/deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go b/deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go
index 8f484057ab..8a2abb78f2 100644
--- a/deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go
+++ b/deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go
@@ -74,7 +74,9 @@ type DynamoComponentDeploymentSharedSpec struct {
 	// Resources requested and limits for this component, including CPU, memory,
 	// GPUs/devices, and any runtime-specific resources.
 	Resources *Resources `json:"resources,omitempty"`
-	// Autoscaling config for this component (replica range, target utilization, etc.).
+	// Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
+	// with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
+	// for migration guidance. This field will be removed in a future API version.
 	Autoscaling *Autoscaling `json:"autoscaling,omitempty"`
 	// Envs defines additional environment variables to inject into the component containers.
 	Envs []corev1.EnvVar `json:"envs,omitempty"`
@@ -108,10 +110,18 @@ type DynamoComponentDeploymentSharedSpec struct {
 	LivenessProbe *corev1.Probe `json:"livenessProbe,omitempty"`
 	// ReadinessProbe to signal when the container is ready to receive traffic.
 	ReadinessProbe *corev1.Probe `json:"readinessProbe,omitempty"`
-	// Replicas is the desired number of Pods for this component when autoscaling is not used.
+	// Replicas is the desired number of Pods for this component.
+	// When scalingAdapter is enabled (default), this field is managed by the
+	// DynamoGraphDeploymentScalingAdapter and should not be modified directly.
+	// +kubebuilder:validation:Minimum=0
 	Replicas *int32 `json:"replicas,omitempty"`
 	// Multinode is the configuration for multinode components.
 	Multinode *MultinodeSpec `json:"multinode,omitempty"`
+	// ScalingAdapter configures whether this service uses the DynamoGraphDeploymentScalingAdapter.
+	// When enabled (default), replicas are managed via DGDSA and external autoscalers can scale
+	// the service using the Scale subresource. When disabled, replicas can be modified directly.
+	// +optional
+	ScalingAdapter *ScalingAdapter `json:"scalingAdapter,omitempty"`
 }
 
 type MultinodeSpec struct {
diff --git a/deploy/cloud/operator/api/v1alpha1/dynamographdeploymentscalingadapter_types.go b/deploy/cloud/operator/api/v1alpha1/dynamographdeploymentscalingadapter_types.go
new file mode 100644
index 0000000000..d4da1a0ccf
--- /dev/null
+++ b/deploy/cloud/operator/api/v1alpha1/dynamographdeploymentscalingadapter_types.go
@@ -0,0 +1,102 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package v1alpha1
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// DynamoGraphDeploymentScalingAdapterSpec defines the desired state of DynamoGraphDeploymentScalingAdapter
+type DynamoGraphDeploymentScalingAdapterSpec struct {
+	// Replicas is the desired number of replicas for the target service.
+	// This field is modified by external autoscalers (HPA/KEDA/Planner) or manually by users.
+	// +kubebuilder:validation:Required
+	// +kubebuilder:validation:Minimum=0
+	Replicas int32 `json:"replicas"`
+
+	// DGDRef references the DynamoGraphDeployment and the specific service to scale.
+	// +kubebuilder:validation:Required
+	DGDRef DynamoGraphDeploymentServiceRef `json:"dgdRef"`
+}
+
+// DynamoGraphDeploymentServiceRef identifies a specific service within a DynamoGraphDeployment
+type DynamoGraphDeploymentServiceRef struct {
+	// Name of the DynamoGraphDeployment
+	// +kubebuilder:validation:Required
+	// +kubebuilder:validation:MinLength=1
+	Name string `json:"name"`
+
+	// ServiceName is the key name of the service within the DGD's spec.services map to scale
+	// +kubebuilder:validation:Required
+	// +kubebuilder:validation:MinLength=1
+	ServiceName string `json:"serviceName"`
+}
+
+// DynamoGraphDeploymentScalingAdapterStatus defines the observed state of DynamoGraphDeploymentScalingAdapter
+type DynamoGraphDeploymentScalingAdapterStatus struct {
+	// Replicas is the current number of replicas for the target service.
+	// This is synced from the DGD's service replicas and is required for the scale subresource.
+	// +optional
+	Replicas int32 `json:"replicas,omitempty"`
+
+	// Selector is a label selector string for the pods managed by this adapter.
+	// Required for HPA compatibility via the scale subresource.
+	// +optional
+	Selector string `json:"selector,omitempty"`
+
+	// LastScaleTime is the last time the adapter scaled the target service.
+	// +optional
+	LastScaleTime *metav1.Time `json:"lastScaleTime,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+// +kubebuilder:subresource:scale:specpath=.spec.replicas,statuspath=.status.replicas,selectorpath=.status.selector
+// +kubebuilder:printcolumn:name="DGD",type="string",JSONPath=".spec.dgdRef.name",description="DynamoGraphDeployment name"
+// +kubebuilder:printcolumn:name="SERVICE",type="string",JSONPath=".spec.dgdRef.serviceName",description="Service name"
+// +kubebuilder:printcolumn:name="REPLICAS",type="integer",JSONPath=".status.replicas",description="Current replicas"
+// +kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp"
+// +kubebuilder:resource:shortName={dgdsa}
+
+// DynamoGraphDeploymentScalingAdapter provides a scaling interface for individual services
+// within a DynamoGraphDeployment. It implements the Kubernetes scale
+// subresource, enabling integration with HPA, KEDA, and custom autoscalers.
+//
+// The adapter acts as an intermediary between autoscalers and the DGD,
+// ensuring that only the adapter controller modifies the DGD's service replicas.
+// This prevents conflicts when multiple autoscaling mechanisms are in play.
+type DynamoGraphDeploymentScalingAdapter struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec   DynamoGraphDeploymentScalingAdapterSpec   `json:"spec,omitempty"`
+	Status DynamoGraphDeploymentScalingAdapterStatus `json:"status,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+
+// DynamoGraphDeploymentScalingAdapterList contains a list of DynamoGraphDeploymentScalingAdapter
+type DynamoGraphDeploymentScalingAdapterList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []DynamoGraphDeploymentScalingAdapter `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&DynamoGraphDeploymentScalingAdapter{}, &DynamoGraphDeploymentScalingAdapterList{})
+}
diff --git a/deploy/cloud/operator/api/v1alpha1/zz_generated.deepcopy.go b/deploy/cloud/operator/api/v1alpha1/zz_generated.deepcopy.go
index 56d33cd498..d3ecbb44ec 100644
--- a/deploy/cloud/operator/api/v1alpha1/zz_generated.deepcopy.go
+++ b/deploy/cloud/operator/api/v1alpha1/zz_generated.deepcopy.go
@@ -371,6 +371,11 @@ func (in *DynamoComponentDeploymentSharedSpec) DeepCopyInto(out *DynamoComponent
 		*out = new(MultinodeSpec)
 		**out = **in
 	}
+	if in.ScalingAdapter != nil {
+		in, out := &in.ScalingAdapter, &out.ScalingAdapter
+		*out = new(ScalingAdapter)
+		**out = **in
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoComponentDeploymentSharedSpec.
@@ -599,6 +604,115 @@ func (in *DynamoGraphDeploymentRequestStatus) DeepCopy() *DynamoGraphDeploymentR
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DynamoGraphDeploymentScalingAdapter) DeepCopyInto(out *DynamoGraphDeploymentScalingAdapter) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+	out.Spec = in.Spec
+	in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoGraphDeploymentScalingAdapter.
+func (in *DynamoGraphDeploymentScalingAdapter) DeepCopy() *DynamoGraphDeploymentScalingAdapter {
+	if in == nil {
+		return nil
+	}
+	out := new(DynamoGraphDeploymentScalingAdapter)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *DynamoGraphDeploymentScalingAdapter) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DynamoGraphDeploymentScalingAdapterList) DeepCopyInto(out *DynamoGraphDeploymentScalingAdapterList) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ListMeta.DeepCopyInto(&out.ListMeta)
+	if in.Items != nil {
+		in, out := &in.Items, &out.Items
+		*out = make([]DynamoGraphDeploymentScalingAdapter, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoGraphDeploymentScalingAdapterList.
+func (in *DynamoGraphDeploymentScalingAdapterList) DeepCopy() *DynamoGraphDeploymentScalingAdapterList {
+	if in == nil {
+		return nil
+	}
+	out := new(DynamoGraphDeploymentScalingAdapterList)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *DynamoGraphDeploymentScalingAdapterList) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DynamoGraphDeploymentScalingAdapterSpec) DeepCopyInto(out *DynamoGraphDeploymentScalingAdapterSpec) {
+	*out = *in
+	out.DGDRef = in.DGDRef
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoGraphDeploymentScalingAdapterSpec.
+func (in *DynamoGraphDeploymentScalingAdapterSpec) DeepCopy() *DynamoGraphDeploymentScalingAdapterSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(DynamoGraphDeploymentScalingAdapterSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DynamoGraphDeploymentScalingAdapterStatus) DeepCopyInto(out *DynamoGraphDeploymentScalingAdapterStatus) {
+	*out = *in
+	if in.LastScaleTime != nil {
+		in, out := &in.LastScaleTime, &out.LastScaleTime
+		*out = (*in).DeepCopy()
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoGraphDeploymentScalingAdapterStatus.
+func (in *DynamoGraphDeploymentScalingAdapterStatus) DeepCopy() *DynamoGraphDeploymentScalingAdapterStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(DynamoGraphDeploymentScalingAdapterStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DynamoGraphDeploymentServiceRef) DeepCopyInto(out *DynamoGraphDeploymentServiceRef) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoGraphDeploymentServiceRef.
+func (in *DynamoGraphDeploymentServiceRef) DeepCopy() *DynamoGraphDeploymentServiceRef {
+	if in == nil {
+		return nil
+	}
+	out := new(DynamoGraphDeploymentServiceRef)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *DynamoGraphDeploymentSpec) DeepCopyInto(out *DynamoGraphDeploymentSpec) {
 	*out = *in
@@ -1085,6 +1199,21 @@ func (in *Resources) DeepCopy() *Resources {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ScalingAdapter) DeepCopyInto(out *ScalingAdapter) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ScalingAdapter.
+func (in *ScalingAdapter) DeepCopy() *ScalingAdapter {
+	if in == nil {
+		return nil
+	}
+	out := new(ScalingAdapter)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *SharedMemorySpec) DeepCopyInto(out *SharedMemorySpec) {
 	*out = *in
diff --git a/deploy/cloud/operator/cmd/main.go b/deploy/cloud/operator/cmd/main.go
index 4d79cfe3f0..dc1a33b262 100644
--- a/deploy/cloud/operator/cmd/main.go
+++ b/deploy/cloud/operator/cmd/main.go
@@ -578,6 +578,16 @@ func main() {
 		os.Exit(1)
 	}
 
+	if err = (&controller.DynamoGraphDeploymentScalingAdapterReconciler{
+		Client:   mgr.GetClient(),
+		Scheme:   mgr.GetScheme(),
+		Recorder: mgr.GetEventRecorderFor("dgdscalingadapter"),
+		Config:   ctrlConfig,
+	}).SetupWithManager(mgr); err != nil {
+		setupLog.Error(err, "unable to create controller", "controller", "DGDScalingAdapter")
+		os.Exit(1)
+	}
+
 	if err = (&controller.DynamoGraphDeploymentRequestReconciler{
 		Client:      mgr.GetClient(),
 		Recorder:    mgr.GetEventRecorderFor("dynamographdeploymentrequest"),
diff --git a/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamocomponentdeployments.yaml b/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamocomponentdeployments.yaml
index 558a5b973d..c90e3bdfe7 100644
--- a/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamocomponentdeployments.yaml
+++ b/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamocomponentdeployments.yaml
@@ -77,12 +77,13 @@ spec:
                     (such as Pod, Service, and Ingress when applicable).
                   type: object
                 autoscaling:
-                  description: Autoscaling config for this component (replica range, target utilization, etc.).
+                  description: |-
+                    Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
+                    with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
+                    for migration guidance. This field will be removed in a future API version.
                   properties:
                     behavior:
-                      description: |-
-                        HorizontalPodAutoscalerBehavior configures the scaling behavior of the target
-                        in both Up and Down directions (scaleUp and scaleDown fields respectively).
+                      description: 'Deprecated: This field is ignored.'
                       properties:
                         scaleDown:
                           description: |-
@@ -231,10 +232,13 @@ spec:
                           type: object
                       type: object
                     enabled:
+                      description: 'Deprecated: This field is ignored.'
                       type: boolean
                     maxReplicas:
+                      description: 'Deprecated: This field is ignored.'
                       type: integer
                     metrics:
+                      description: 'Deprecated: This field is ignored.'
                       items:
                         description: |-
                           MetricSpec specifies how to scale based on a single metric
@@ -665,6 +669,7 @@ spec:
                         type: object
                       type: array
                     minReplicas:
+                      description: 'Deprecated: This field is ignored.'
                       type: integer
                   type: object
                 backendFramework:
@@ -10184,8 +10189,12 @@ spec:
                       type: integer
                   type: object
                 replicas:
-                  description: Replicas is the desired number of Pods for this component when autoscaling is not used.
+                  description: |-
+                    Replicas is the desired number of Pods for this component.
+                    When scalingAdapter is enabled (default), this field is managed by the
+                    DynamoGraphDeploymentScalingAdapter and should not be modified directly.
                   format: int32
+                  minimum: 0
                   type: integer
                 resources:
                   description: |-
@@ -10264,6 +10273,20 @@ spec:
                           type: string
                       type: object
                   type: object
+                scalingAdapter:
+                  description: |-
+                    ScalingAdapter configures whether this service uses the DynamoGraphDeploymentScalingAdapter.
+                    When enabled (default), replicas are managed via DGDSA and external autoscalers can scale
+                    the service using the Scale subresource. When disabled, replicas can be modified directly.
+                  properties:
+                    disable:
+                      default: false
+                      description: |-
+                        Disable indicates whether the ScalingAdapter should be disabled for this service.
+                        When false (default), a DGDSA is created and owns the replicas field.
+                        When true, no DGDSA is created and replicas can be modified directly in the DGD.
+                      type: boolean
+                  type: object
                 serviceName:
                   description: The name of the component
                   type: string
diff --git a/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeployments.yaml b/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeployments.yaml
index ba2b19fef9..4db1e902b8 100644
--- a/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeployments.yaml
+++ b/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeployments.yaml
@@ -219,12 +219,13 @@ spec:
                           (such as Pod, Service, and Ingress when applicable).
                         type: object
                       autoscaling:
-                        description: Autoscaling config for this component (replica range, target utilization, etc.).
+                        description: |-
+                          Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
+                          with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
+                          for migration guidance. This field will be removed in a future API version.
                         properties:
                           behavior:
-                            description: |-
-                              HorizontalPodAutoscalerBehavior configures the scaling behavior of the target
-                              in both Up and Down directions (scaleUp and scaleDown fields respectively).
+                            description: 'Deprecated: This field is ignored.'
                             properties:
                               scaleDown:
                                 description: |-
@@ -373,10 +374,13 @@ spec:
                                 type: object
                             type: object
                           enabled:
+                            description: 'Deprecated: This field is ignored.'
                             type: boolean
                           maxReplicas:
+                            description: 'Deprecated: This field is ignored.'
                             type: integer
                           metrics:
+                            description: 'Deprecated: This field is ignored.'
                             items:
                               description: |-
                                 MetricSpec specifies how to scale based on a single metric
@@ -807,6 +811,7 @@ spec:
                               type: object
                             type: array
                           minReplicas:
+                            description: 'Deprecated: This field is ignored.'
                             type: integer
                         type: object
                       componentType:
@@ -10319,8 +10324,12 @@ spec:
                             type: integer
                         type: object
                       replicas:
-                        description: Replicas is the desired number of Pods for this component when autoscaling is not used.
+                        description: |-
+                          Replicas is the desired number of Pods for this component.
+                          When scalingAdapter is enabled (default), this field is managed by the
+                          DynamoGraphDeploymentScalingAdapter and should not be modified directly.
                         format: int32
+                        minimum: 0
                         type: integer
                       resources:
                         description: |-
@@ -10399,6 +10408,20 @@ spec:
                                 type: string
                             type: object
                         type: object
+                      scalingAdapter:
+                        description: |-
+                          ScalingAdapter configures whether this service uses the DynamoGraphDeploymentScalingAdapter.
+                          When enabled (default), replicas are managed via DGDSA and external autoscalers can scale
+                          the service using the Scale subresource. When disabled, replicas can be modified directly.
+                        properties:
+                          disable:
+                            default: false
+                            description: |-
+                              Disable indicates whether the ScalingAdapter should be disabled for this service.
+                              When false (default), a DGDSA is created and owns the replicas field.
+                              When true, no DGDSA is created and replicas can be modified directly in the DGD.
+                            type: boolean
+                        type: object
                       serviceName:
                         description: The name of the component
                         type: string
diff --git a/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeploymentscalingadapters.yaml b/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeploymentscalingadapters.yaml
new file mode 100644
index 0000000000..f822bb91db
--- /dev/null
+++ b/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeploymentscalingadapters.yaml
@@ -0,0 +1,136 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.16.4
+    helm.sh/resource-policy: keep
+  name: dynamographdeploymentscalingadapters.nvidia.com
+spec:
+  group: nvidia.com
+  names:
+    kind: DynamoGraphDeploymentScalingAdapter
+    listKind: DynamoGraphDeploymentScalingAdapterList
+    plural: dynamographdeploymentscalingadapters
+    shortNames:
+      - dgdsa
+    singular: dynamographdeploymentscalingadapter
+  scope: Namespaced
+  versions:
+    - additionalPrinterColumns:
+        - description: DynamoGraphDeployment name
+          jsonPath: .spec.dgdRef.name
+          name: DGD
+          type: string
+        - description: Service name
+          jsonPath: .spec.dgdRef.serviceName
+          name: SERVICE
+          type: string
+        - description: Current replicas
+          jsonPath: .status.replicas
+          name: REPLICAS
+          type: integer
+        - jsonPath: .metadata.creationTimestamp
+          name: AGE
+          type: date
+      name: v1alpha1
+      schema:
+        openAPIV3Schema:
+          description: |-
+            DynamoGraphDeploymentScalingAdapter provides a scaling interface for individual services
+            within a DynamoGraphDeployment. It implements the Kubernetes scale
+            subresource, enabling integration with HPA, KEDA, and custom autoscalers.
+
+            The adapter acts as an intermediary between autoscalers and the DGD,
+            ensuring that only the adapter controller modifies the DGD's service replicas.
+            This prevents conflicts when multiple autoscaling mechanisms are in play.
+          properties:
+            apiVersion:
+              description: |-
+                APIVersion defines the versioned schema of this representation of an object.
+                Servers should convert recognized schemas to the latest internal value, and
+                may reject unrecognized values.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+              type: string
+            kind:
+              description: |-
+                Kind is a string value representing the REST resource this object represents.
+                Servers may infer this from the endpoint the client submits requests to.
+                Cannot be updated.
+                In CamelCase.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+              type: string
+            metadata:
+              type: object
+            spec:
+              description: DynamoGraphDeploymentScalingAdapterSpec defines the desired state of DynamoGraphDeploymentScalingAdapter
+              properties:
+                dgdRef:
+                  description: DGDRef references the DynamoGraphDeployment and the specific service to scale.
+                  properties:
+                    name:
+                      description: Name of the DynamoGraphDeployment
+                      minLength: 1
+                      type: string
+                    serviceName:
+                      description: ServiceName is the key name of the service within the DGD's spec.services map to scale
+                      minLength: 1
+                      type: string
+                  required:
+                    - name
+                    - serviceName
+                  type: object
+                replicas:
+                  description: |-
+                    Replicas is the desired number of replicas for the target service.
+                    This field is modified by external autoscalers (HPA/KEDA/Planner) or manually by users.
+                  format: int32
+                  minimum: 0
+                  type: integer
+              required:
+                - dgdRef
+                - replicas
+              type: object
+            status:
+              description: DynamoGraphDeploymentScalingAdapterStatus defines the observed state of DynamoGraphDeploymentScalingAdapter
+              properties:
+                lastScaleTime:
+                  description: LastScaleTime is the last time the adapter scaled the target service.
+                  format: date-time
+                  type: string
+                replicas:
+                  description: |-
+                    Replicas is the current number of replicas for the target service.
+                    This is synced from the DGD's service replicas and is required for the scale subresource.
+                  format: int32
+                  type: integer
+                selector:
+                  description: |-
+                    Selector is a label selector string for the pods managed by this adapter.
+                    Required for HPA compatibility via the scale subresource.
+                  type: string
+              type: object
+          type: object
+      served: true
+      storage: true
+      subresources:
+        scale:
+          labelSelectorPath: .status.selector
+          specReplicasPath: .spec.replicas
+          statusReplicasPath: .status.replicas
+        status: {}
diff --git a/deploy/cloud/operator/config/rbac/role.yaml b/deploy/cloud/operator/config/rbac/role.yaml
index b473aa1ad7..2a3a00c6f8 100644
--- a/deploy/cloud/operator/config/rbac/role.yaml
+++ b/deploy/cloud/operator/config/rbac/role.yaml
@@ -182,6 +182,7 @@ rules:
   - dynamocomponentdeployments
   - dynamographdeploymentrequests
   - dynamographdeployments
+  - dynamographdeploymentscalingadapters
   - dynamomodels
   verbs:
   - create
@@ -206,6 +207,7 @@ rules:
   - dynamocomponentdeployments/status
   - dynamographdeploymentrequests/status
   - dynamographdeployments/status
+  - dynamographdeploymentscalingadapters/status
   - dynamomodels/status
   verbs:
   - get
diff --git a/deploy/cloud/operator/internal/consts/consts.go b/deploy/cloud/operator/internal/consts/consts.go
index 882f9f18d9..6dd3bc0712 100644
--- a/deploy/cloud/operator/internal/consts/consts.go
+++ b/deploy/cloud/operator/internal/consts/consts.go
@@ -7,8 +7,6 @@ import (
 )
 
 const (
-	HPACPUDefaultAverageUtilization = 80
-
 	DefaultUserId = "default"
 	DefaultOrgId  = "default"
 
diff --git a/deploy/cloud/operator/internal/controller/common.go b/deploy/cloud/operator/internal/controller/common.go
index 70a70fdead..e41cbe1deb 100644
--- a/deploy/cloud/operator/internal/controller/common.go
+++ b/deploy/cloud/operator/internal/controller/common.go
@@ -53,3 +53,43 @@ type dockerSecretRetriever interface {
 	// returns a list of secret names associated with the docker registry
 	GetSecrets(namespace, registry string) ([]string, error)
 }
+
+// getServiceKeys returns the keys of the services map for logging purposes
+func getServiceKeys(services map[string]*v1alpha1.DynamoComponentDeploymentSharedSpec) []string {
+	keys := make([]string, 0, len(services))
+	for k := range services {
+		keys = append(keys, k)
+	}
+	return keys
+}
+
+// servicesEqual compares two services maps to detect changes in replica counts
+func servicesEqual(old, new map[string]*v1alpha1.DynamoComponentDeploymentSharedSpec) bool {
+	if len(old) != len(new) {
+		return false
+	}
+
+	for key, oldSvc := range old {
+		newSvc, exists := new[key]
+		if !exists {
+			return false
+		}
+
+		// Compare replicas
+		oldReplicas := int32(1)
+		if oldSvc.Replicas != nil {
+			oldReplicas = *oldSvc.Replicas
+		}
+
+		newReplicas := int32(1)
+		if newSvc.Replicas != nil {
+			newReplicas = *newSvc.Replicas
+		}
+
+		if oldReplicas != newReplicas {
+			return false
+		}
+	}
+
+	return true
+}
diff --git a/deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller.go b/deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller.go
index 307bf7ac05..88d92e2f42 100644
--- a/deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller.go
+++ b/deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller.go
@@ -338,21 +338,6 @@ func (r *DynamoComponentDeploymentReconciler) Reconcile(ctx context.Context, req
 		}
 
 		deployment = obj
-
-		// create or update api-server hpa
-		modified_, _, err = commonController.SyncResource(ctx, r, dynamoComponentDeployment, func(ctx context.Context) (*autoscalingv2.HorizontalPodAutoscaler, bool, error) {
-			return r.generateHPA(generateResourceOption{
-				dynamoComponentDeployment: dynamoComponentDeployment,
-			})
-		})
-		if err != nil {
-			return ctrl.Result{}, err
-		}
-
-		if modified_ {
-			modified = true
-		}
-
 	}
 
 	// create or update api-server service
@@ -1114,63 +1099,6 @@ type generateResourceOption struct {
 	instanceID                              *int
 }
 
-func (r *DynamoComponentDeploymentReconciler) generateHPA(opt generateResourceOption) (*autoscalingv2.HorizontalPodAutoscaler, bool, error) {
-	labels := r.getKubeLabels(opt.dynamoComponentDeployment)
-
-	annotations := r.getKubeAnnotations(opt.dynamoComponentDeployment)
-
-	kubeName := r.getKubeName(opt.dynamoComponentDeployment, false)
-
-	kubeNs := opt.dynamoComponentDeployment.Namespace
-
-	hpaConf := opt.dynamoComponentDeployment.Spec.Autoscaling
-
-	kubeHpa := &autoscalingv2.HorizontalPodAutoscaler{
-		ObjectMeta: metav1.ObjectMeta{
-			Name:        kubeName,
-			Namespace:   kubeNs,
-			Labels:      labels,
-			Annotations: annotations,
-		},
-	}
-
-	if hpaConf == nil || !hpaConf.Enabled {
-		// if hpa is not enabled, we need to delete the hpa
-		return kubeHpa, true, nil
-	}
-
-	minReplica := int32(hpaConf.MinReplicas)
-
-	kubeHpa.Spec = autoscalingv2.HorizontalPodAutoscalerSpec{
-		MinReplicas: &minReplica,
-		MaxReplicas: int32(hpaConf.MaxReplicas),
-		ScaleTargetRef: autoscalingv2.CrossVersionObjectReference{
-			APIVersion: "apps/v1",
-			Kind:       "Deployment",
-			Name:       kubeName,
-		},
-		Metrics: hpaConf.Metrics,
-	}
-
-	if len(kubeHpa.Spec.Metrics) == 0 {
-		averageUtilization := int32(commonconsts.HPACPUDefaultAverageUtilization)
-		kubeHpa.Spec.Metrics = []autoscalingv2.MetricSpec{
-			{
-				Type: autoscalingv2.ResourceMetricSourceType,
-				Resource: &autoscalingv2.ResourceMetricSource{
-					Name: corev1.ResourceCPU,
-					Target: autoscalingv2.MetricTarget{
-						Type:               autoscalingv2.UtilizationMetricType,
-						AverageUtilization: &averageUtilization,
-					},
-				},
-			},
-		}
-	}
-
-	return kubeHpa, false, nil
-}
-
 //nolint:gocyclo,nakedret
 func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx context.Context, opt generateResourceOption, role dynamo.Role) (podTemplateSpec *corev1.PodTemplateSpec, err error) {
 	podLabels := r.getKubeLabels(opt.dynamoComponentDeployment)
diff --git a/deploy/cloud/operator/internal/controller/dynamographdeployment_controller.go b/deploy/cloud/operator/internal/controller/dynamographdeployment_controller.go
index 22dcdb5490..823818ac1e 100644
--- a/deploy/cloud/operator/internal/controller/dynamographdeployment_controller.go
+++ b/deploy/cloud/operator/internal/controller/dynamographdeployment_controller.go
@@ -86,6 +86,7 @@ type DynamoGraphDeploymentReconciler struct {
 // +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments/status,verbs=get;update;patch
 // +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments/finalizers,verbs=update
+// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeploymentscalingadapters,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=grove.io,resources=podcliquesets,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=grove.io,resources=podcliques/scale,verbs=get;update;patch
 // +kubebuilder:rbac:groups=grove.io,resources=podcliquescalinggroups/scale,verbs=get;update;patch
@@ -225,6 +226,13 @@ func (r *DynamoGraphDeploymentReconciler) reconcileResources(ctx context.Context
 		return "", "", "", fmt.Errorf("failed to reconcile top-level PVCs: %w", err)
 	}
 
+	// Reconcile DynamoGraphDeploymentScalingAdapters for each service
+	err = r.reconcileScalingAdapters(ctx, dynamoDeployment)
+	if err != nil {
+		logger.Error(err, "Failed to reconcile scaling adapters")
+		return "", "", "", fmt.Errorf("failed to reconcile scaling adapters: %w", err)
+	}
+
 	// Reconcile the SA, Role and RoleBinding if k8s discovery is enabled
 	err = r.reconcileK8sDiscoveryResources(ctx, dynamoDeployment)
 	if err != nil {
@@ -607,6 +615,89 @@ func (r *DynamoGraphDeploymentReconciler) reconcilePVCs(ctx context.Context, dyn
 	return nil
 }
 
+// reconcileScalingAdapters ensures a DynamoGraphDeploymentScalingAdapter exists for each service in the DGD
+// that has scaling adapter enabled (default). Services with scalingAdapter.disable=true will not have a DGDSA.
+// This enables pluggable autoscaling via HPA, KEDA, or Planner.
+func (r *DynamoGraphDeploymentReconciler) reconcileScalingAdapters(ctx context.Context, dynamoDeployment *nvidiacomv1alpha1.DynamoGraphDeployment) error {
+	logger := log.FromContext(ctx)
+
+	// Process each service - SyncResource handles create, update, and delete via toDelete flag
+	for serviceName, component := range dynamoDeployment.Spec.Services {
+		// Check if scaling adapter is disabled for this service
+		scalingAdapterDisabled := component.ScalingAdapter != nil && component.ScalingAdapter.Disable
+
+		// Get current replicas (default to 1 if not set)
+		currentReplicas := int32(1)
+		if component.Replicas != nil {
+			currentReplicas = *component.Replicas
+		}
+
+		// Use SyncResource to handle creation/updates/deletion
+		// When toDelete=true, SyncResource will delete the existing resource if it exists
+		_, _, err := commonController.SyncResource(ctx, r, dynamoDeployment, func(ctx context.Context) (*nvidiacomv1alpha1.DynamoGraphDeploymentScalingAdapter, bool, error) {
+			adapterName := generateAdapterName(dynamoDeployment.Name, serviceName)
+			adapter := &nvidiacomv1alpha1.DynamoGraphDeploymentScalingAdapter{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      adapterName,
+					Namespace: dynamoDeployment.Namespace,
+					Labels: map[string]string{
+						consts.KubeLabelDynamoGraphDeploymentName: dynamoDeployment.Name,
+						consts.KubeLabelDynamoComponent:           serviceName,
+					},
+				},
+				Spec: nvidiacomv1alpha1.DynamoGraphDeploymentScalingAdapterSpec{
+					Replicas: currentReplicas,
+					DGDRef: nvidiacomv1alpha1.DynamoGraphDeploymentServiceRef{
+						Name:        dynamoDeployment.Name,
+						ServiceName: serviceName,
+					},
+				},
+			}
+			// Return toDelete=true if scaling adapter is disabled
+			return adapter, scalingAdapterDisabled, nil
+		})
+
+		if err != nil {
+			logger.Error(err, "Failed to sync DynamoGraphDeploymentScalingAdapter", "service", serviceName)
+			return err
+		}
+	}
+
+	// Clean up adapters for services that were removed from DGD entirely
+	adapterList := &nvidiacomv1alpha1.DynamoGraphDeploymentScalingAdapterList{}
+	if err := r.List(ctx, adapterList,
+		client.InNamespace(dynamoDeployment.Namespace),
+		client.MatchingLabels{consts.KubeLabelDynamoGraphDeploymentName: dynamoDeployment.Name},
+	); err != nil {
+		logger.Error(err, "Failed to list DynamoGraphDeploymentScalingAdapters")
+		return err
+	}
+
+	for i := range adapterList.Items {
+		adapter := &adapterList.Items[i]
+		serviceName := adapter.Spec.DGDRef.ServiceName
+
+		// Delete adapter if service no longer exists in DGD
+		if _, exists := dynamoDeployment.Spec.Services[serviceName]; !exists {
+			logger.Info("Deleting orphaned DynamoGraphDeploymentScalingAdapter", "adapter", adapter.Name, "service", serviceName)
+			if err := r.Delete(ctx, adapter); err != nil && !errors.IsNotFound(err) {
+				logger.Error(err, "Failed to delete orphaned adapter", "adapter", adapter.Name)
+				return err
+			}
+			r.Recorder.Eventf(dynamoDeployment, corev1.EventTypeNormal, "AdapterDeleted",
+				"Deleted orphaned scaling adapter %s for removed service %s", adapter.Name, serviceName)
+		}
+	}
+
+	return nil
+}
+
+// generateAdapterName creates a consistent name for a DynamoGraphDeploymentScalingAdapter
+// Service names are lowercased to comply with Kubernetes DNS subdomain naming requirements
+func generateAdapterName(dgdName, serviceName string) string {
+	return fmt.Sprintf("%s-%s", dgdName, strings.ToLower(serviceName))
+}
+
 func (r *DynamoGraphDeploymentReconciler) FinalizeResource(ctx context.Context, dynamoDeployment *nvidiacomv1alpha1.DynamoGraphDeployment) error {
 	// for now doing nothing
 	return nil
@@ -626,6 +717,13 @@ func (r *DynamoGraphDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) err
 			UpdateFunc:  func(de event.UpdateEvent) bool { return true },
 			GenericFunc: func(ge event.GenericEvent) bool { return true },
 		})).
+		Owns(&nvidiacomv1alpha1.DynamoGraphDeploymentScalingAdapter{}, builder.WithPredicates(predicate.Funcs{
+			// ignore creation cause we don't want to be called again after we create the adapter
+			CreateFunc:  func(ce event.CreateEvent) bool { return false },
+			DeleteFunc:  func(de event.DeleteEvent) bool { return true },
+			UpdateFunc:  func(de event.UpdateEvent) bool { return false }, // Adapter updates are handled by adapter controller
+			GenericFunc: func(ge event.GenericEvent) bool { return false },
+		})).
 		Owns(&corev1.PersistentVolumeClaim{}, builder.WithPredicates(predicate.Funcs{
 			// ignore creation cause we don't want to be called again after we create the PVC
 			CreateFunc:  func(ce event.CreateEvent) bool { return false },
diff --git a/deploy/cloud/operator/internal/controller/dynamographdeployment_controller_test.go b/deploy/cloud/operator/internal/controller/dynamographdeployment_controller_test.go
new file mode 100644
index 0000000000..a217fd403c
--- /dev/null
+++ b/deploy/cloud/operator/internal/controller/dynamographdeployment_controller_test.go
@@ -0,0 +1,321 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package controller
+
+import (
+	"context"
+	"testing"
+
+	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
+	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/kubernetes/scheme"
+	"k8s.io/client-go/tools/record"
+	"k8s.io/utils/ptr"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+)
+
+func TestDynamoGraphDeploymentReconciler_reconcileScalingAdapters(t *testing.T) {
+	// Register custom types with the scheme
+	if err := v1alpha1.AddToScheme(scheme.Scheme); err != nil {
+		t.Fatalf("Failed to add v1alpha1 to scheme: %v", err)
+	}
+
+	tests := []struct {
+		name                 string
+		dgd                  *v1alpha1.DynamoGraphDeployment
+		existingAdapters     []v1alpha1.DynamoGraphDeploymentScalingAdapter
+		expectedAdapterCount int
+		expectedAdapters     map[string]int32 // map of adapter name to expected replicas
+		expectDeleted        []string         // adapter names that should be deleted
+	}{
+		{
+			name: "creates adapters for all services",
+			dgd: &v1alpha1.DynamoGraphDeployment{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-dgd",
+					Namespace: "default",
+				},
+				Spec: v1alpha1.DynamoGraphDeploymentSpec{
+					Services: map[string]*v1alpha1.DynamoComponentDeploymentSharedSpec{
+						"Frontend": {
+							Replicas: ptr.To(int32(2)),
+						},
+						"decode": {
+							Replicas: ptr.To(int32(3)),
+						},
+					},
+				},
+			},
+			expectedAdapterCount: 2,
+			expectedAdapters: map[string]int32{
+				"test-dgd-frontend": 2,
+				"test-dgd-decode":   3,
+			},
+		},
+		{
+			name: "uses default replicas when not specified",
+			dgd: &v1alpha1.DynamoGraphDeployment{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-dgd",
+					Namespace: "default",
+				},
+				Spec: v1alpha1.DynamoGraphDeploymentSpec{
+					Services: map[string]*v1alpha1.DynamoComponentDeploymentSharedSpec{
+						"worker": {},
+					},
+				},
+			},
+			expectedAdapterCount: 1,
+			expectedAdapters: map[string]int32{
+				"test-dgd-worker": 1, // default replicas
+			},
+		},
+		{
+			name: "skips adapter creation when disabled",
+			dgd: &v1alpha1.DynamoGraphDeployment{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-dgd",
+					Namespace: "default",
+				},
+				Spec: v1alpha1.DynamoGraphDeploymentSpec{
+					Services: map[string]*v1alpha1.DynamoComponentDeploymentSharedSpec{
+						"Frontend": {
+							Replicas: ptr.To(int32(2)),
+						},
+						"decode": {
+							Replicas: ptr.To(int32(3)),
+							ScalingAdapter: &v1alpha1.ScalingAdapter{
+								Disable: true,
+							},
+						},
+					},
+				},
+			},
+			expectedAdapterCount: 1,
+			expectedAdapters: map[string]int32{
+				"test-dgd-frontend": 2,
+			},
+		},
+		{
+			name: "deletes adapter when service is removed",
+			dgd: &v1alpha1.DynamoGraphDeployment{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-dgd",
+					Namespace: "default",
+					UID:       "test-uid",
+				},
+				Spec: v1alpha1.DynamoGraphDeploymentSpec{
+					Services: map[string]*v1alpha1.DynamoComponentDeploymentSharedSpec{
+						"Frontend": {
+							Replicas: ptr.To(int32(2)),
+						},
+					},
+				},
+			},
+			existingAdapters: []v1alpha1.DynamoGraphDeploymentScalingAdapter{
+				{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      "test-dgd-frontend",
+						Namespace: "default",
+						Labels: map[string]string{
+							consts.KubeLabelDynamoGraphDeploymentName: "test-dgd",
+						},
+						OwnerReferences: []metav1.OwnerReference{
+							{
+								APIVersion: "nvidia.com/v1alpha1",
+								Kind:       "DynamoGraphDeployment",
+								Name:       "test-dgd",
+								UID:        "test-uid",
+							},
+						},
+					},
+					Spec: v1alpha1.DynamoGraphDeploymentScalingAdapterSpec{
+						Replicas: 2,
+						DGDRef: v1alpha1.DynamoGraphDeploymentServiceRef{
+							Name:        "test-dgd",
+							ServiceName: "Frontend",
+						},
+					},
+				},
+				{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      "test-dgd-removed",
+						Namespace: "default",
+						Labels: map[string]string{
+							consts.KubeLabelDynamoGraphDeploymentName: "test-dgd",
+						},
+						OwnerReferences: []metav1.OwnerReference{
+							{
+								APIVersion: "nvidia.com/v1alpha1",
+								Kind:       "DynamoGraphDeployment",
+								Name:       "test-dgd",
+								UID:        "test-uid",
+							},
+						},
+					},
+					Spec: v1alpha1.DynamoGraphDeploymentScalingAdapterSpec{
+						Replicas: 1,
+						DGDRef: v1alpha1.DynamoGraphDeploymentServiceRef{
+							Name:        "test-dgd",
+							ServiceName: "removed",
+						},
+					},
+				},
+			},
+			expectedAdapterCount: 1,
+			expectedAdapters: map[string]int32{
+				"test-dgd-frontend": 2,
+			},
+			expectDeleted: []string{"test-dgd-removed"},
+		},
+		{
+			name: "deletes adapter when scalingAdapter.disable is set to true",
+			dgd: &v1alpha1.DynamoGraphDeployment{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-dgd",
+					Namespace: "default",
+					UID:       "test-uid",
+				},
+				Spec: v1alpha1.DynamoGraphDeploymentSpec{
+					Services: map[string]*v1alpha1.DynamoComponentDeploymentSharedSpec{
+						"Frontend": {
+							Replicas: ptr.To(int32(2)),
+							ScalingAdapter: &v1alpha1.ScalingAdapter{
+								Disable: true,
+							},
+						},
+					},
+				},
+			},
+			existingAdapters: []v1alpha1.DynamoGraphDeploymentScalingAdapter{
+				{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      "test-dgd-frontend",
+						Namespace: "default",
+						Labels: map[string]string{
+							consts.KubeLabelDynamoGraphDeploymentName: "test-dgd",
+						},
+						OwnerReferences: []metav1.OwnerReference{
+							{
+								APIVersion: "nvidia.com/v1alpha1",
+								Kind:       "DynamoGraphDeployment",
+								Name:       "test-dgd",
+								UID:        "test-uid",
+							},
+						},
+					},
+					Spec: v1alpha1.DynamoGraphDeploymentScalingAdapterSpec{
+						Replicas: 2,
+						DGDRef: v1alpha1.DynamoGraphDeploymentServiceRef{
+							Name:        "test-dgd",
+							ServiceName: "Frontend",
+						},
+					},
+				},
+			},
+			expectedAdapterCount: 0,
+			expectedAdapters:     map[string]int32{},
+			expectDeleted:        []string{"test-dgd-frontend"},
+		},
+		{
+			name: "adapter name uses lowercase service name",
+			dgd: &v1alpha1.DynamoGraphDeployment{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "my-dgd",
+					Namespace: "default",
+				},
+				Spec: v1alpha1.DynamoGraphDeploymentSpec{
+					Services: map[string]*v1alpha1.DynamoComponentDeploymentSharedSpec{
+						"MyService": {
+							Replicas: ptr.To(int32(1)),
+						},
+					},
+				},
+			},
+			expectedAdapterCount: 1,
+			expectedAdapters: map[string]int32{
+				"my-dgd-myservice": 1, // lowercase
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Build initial objects
+			var initObjs []client.Object
+			initObjs = append(initObjs, tt.dgd)
+			for i := range tt.existingAdapters {
+				initObjs = append(initObjs, &tt.existingAdapters[i])
+			}
+
+			// Create fake client
+			fakeClient := fake.NewClientBuilder().
+				WithScheme(scheme.Scheme).
+				WithObjects(initObjs...).
+				Build()
+
+			// Create reconciler
+			r := &DynamoGraphDeploymentReconciler{
+				Client:   fakeClient,
+				Recorder: record.NewFakeRecorder(10),
+			}
+
+			// Run reconcileScalingAdapters
+			ctx := context.Background()
+			err := r.reconcileScalingAdapters(ctx, tt.dgd)
+			if err != nil {
+				t.Fatalf("reconcileScalingAdapters() error = %v", err)
+			}
+
+			// Verify adapters
+			adapterList := &v1alpha1.DynamoGraphDeploymentScalingAdapterList{}
+			if err := fakeClient.List(ctx, adapterList, client.InNamespace("default")); err != nil {
+				t.Fatalf("Failed to list adapters: %v", err)
+			}
+
+			if len(adapterList.Items) != tt.expectedAdapterCount {
+				t.Errorf("Expected %d adapters, got %d", tt.expectedAdapterCount, len(adapterList.Items))
+			}
+
+			// Check expected adapters exist with correct replicas
+			for name, expectedReplicas := range tt.expectedAdapters {
+				adapter := &v1alpha1.DynamoGraphDeploymentScalingAdapter{}
+				err := fakeClient.Get(ctx, types.NamespacedName{Name: name, Namespace: "default"}, adapter)
+				if err != nil {
+					t.Errorf("Expected adapter %s to exist, but got error: %v", name, err)
+					continue
+				}
+				if adapter.Spec.Replicas != expectedReplicas {
+					t.Errorf("Adapter %s has replicas=%d, expected %d", name, adapter.Spec.Replicas, expectedReplicas)
+				}
+			}
+
+			// Check that deleted adapters don't exist
+			for _, name := range tt.expectDeleted {
+				adapter := &v1alpha1.DynamoGraphDeploymentScalingAdapter{}
+				err := fakeClient.Get(ctx, types.NamespacedName{Name: name, Namespace: "default"}, adapter)
+				if err == nil {
+					t.Errorf("Expected adapter %s to be deleted, but it still exists", name)
+				}
+			}
+		})
+	}
+}
diff --git a/deploy/cloud/operator/internal/controller/dynamographdeploymentscalingadapter_controller.go b/deploy/cloud/operator/internal/controller/dynamographdeploymentscalingadapter_controller.go
new file mode 100644
index 0000000000..edaa4323ae
--- /dev/null
+++ b/deploy/cloud/operator/internal/controller/dynamographdeploymentscalingadapter_controller.go
@@ -0,0 +1,213 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package controller
+
+import (
+	"context"
+	"fmt"
+
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/tools/record"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/builder"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/event"
+	"sigs.k8s.io/controller-runtime/pkg/handler"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+	"sigs.k8s.io/controller-runtime/pkg/predicate"
+	"sigs.k8s.io/controller-runtime/pkg/reconcile"
+
+	nvidiacomv1alpha1 "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
+	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
+	commonController "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
+)
+
+// DynamoGraphDeploymentScalingAdapterReconciler reconciles a DynamoGraphDeploymentScalingAdapter object
+type DynamoGraphDeploymentScalingAdapterReconciler struct {
+	client.Client
+	Scheme   *runtime.Scheme
+	Recorder record.EventRecorder
+	Config   commonController.Config
+}
+
+// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeploymentscalingadapters,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeploymentscalingadapters/status,verbs=get;update;patch
+// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments,verbs=get;list;watch;update;patch
+
+// Reconcile implements the reconciliation loop for DynamoGraphDeploymentScalingAdapter
+func (r *DynamoGraphDeploymentScalingAdapterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+	logger := log.FromContext(ctx)
+
+	// 1. Fetch the DynamoGraphDeploymentScalingAdapter
+	adapter := &nvidiacomv1alpha1.DynamoGraphDeploymentScalingAdapter{}
+	if err := r.Get(ctx, req.NamespacedName, adapter); err != nil {
+		return ctrl.Result{}, client.IgnoreNotFound(err)
+	}
+
+	// Skip reconciliation if being deleted
+	if !adapter.GetDeletionTimestamp().IsZero() {
+		logger.V(1).Info("Adapter is being deleted, skipping reconciliation")
+		return ctrl.Result{}, nil
+	}
+
+	// 2. Fetch the referenced DGD
+	dgd := &nvidiacomv1alpha1.DynamoGraphDeployment{}
+	dgdKey := types.NamespacedName{
+		Name:      adapter.Spec.DGDRef.Name,
+		Namespace: adapter.Namespace,
+	}
+	if err := r.Get(ctx, dgdKey, dgd); err != nil {
+		if errors.IsNotFound(err) {
+			logger.Error(err, "Referenced DGD not found", "dgd", dgdKey)
+			// DGD doesn't exist, can't proceed
+			return ctrl.Result{}, err
+		}
+		return ctrl.Result{}, err
+	}
+
+	// 3. Find the target service in DGD's spec.services map
+	component, exists := dgd.Spec.Services[adapter.Spec.DGDRef.ServiceName]
+	if !exists || component == nil {
+		logger.Error(nil, "Service not found in DGD",
+			"service", adapter.Spec.DGDRef.ServiceName,
+			"dgd", dgd.Name,
+			"availableServices", getServiceKeys(dgd.Spec.Services))
+		return ctrl.Result{}, fmt.Errorf("service %s not found in DGD", adapter.Spec.DGDRef.ServiceName)
+	}
+
+	// Get current replicas from DGD (default to 1 if not set)
+	currentReplicas := int32(1)
+	if component.Replicas != nil {
+		currentReplicas = *component.Replicas
+	}
+
+	// 4. Update DGD if replicas changed (DGDSA is the source of truth)
+	if currentReplicas != adapter.Spec.Replicas {
+		// Update the service's replicas in DGD
+		component.Replicas = &adapter.Spec.Replicas
+		dgd.Spec.Services[adapter.Spec.DGDRef.ServiceName] = component
+
+		if err := r.Update(ctx, dgd); err != nil {
+			logger.Error(err, "Failed to update DGD")
+			r.Recorder.Eventf(adapter, corev1.EventTypeWarning, "UpdateFailed",
+				"Failed to update DGD %s: %v", dgd.Name, err)
+			return ctrl.Result{}, err
+		}
+
+		logger.Info("Scaled service",
+			"dgd", dgd.Name,
+			"service", adapter.Spec.DGDRef.ServiceName,
+			"from", currentReplicas,
+			"to", adapter.Spec.Replicas)
+
+		r.Recorder.Eventf(adapter, corev1.EventTypeNormal, "Scaled",
+			"Scaled service %s from %d to %d replicas", adapter.Spec.DGDRef.ServiceName, currentReplicas, adapter.Spec.Replicas)
+
+		// Record scaling event
+		now := metav1.Now()
+		adapter.Status.LastScaleTime = &now
+	}
+
+	// 5. Update adapter status
+	adapter.Status.Replicas = adapter.Spec.Replicas
+	adapter.Status.Selector = r.buildPodSelector(dgd, adapter.Spec.DGDRef.ServiceName)
+
+	if err := r.Status().Update(ctx, adapter); err != nil {
+		logger.Error(err, "Failed to update adapter status")
+		return ctrl.Result{}, err
+	}
+
+	return ctrl.Result{}, nil
+}
+
+// buildPodSelector constructs a label selector for the pods managed by this service
+func (r *DynamoGraphDeploymentScalingAdapterReconciler) buildPodSelector(dgd *nvidiacomv1alpha1.DynamoGraphDeployment, serviceName string) string {
+	// Pods are labeled with:
+	// - nvidia.com/dynamo-graph-deployment-name = dgd.Name
+	// - nvidia.com/dynamo-component = serviceName (the key from spec.services map)
+	return fmt.Sprintf("%s=%s,%s=%s",
+		consts.KubeLabelDynamoGraphDeploymentName, dgd.Name,
+		consts.KubeLabelDynamoComponent, serviceName)
+}
+
+// SetupWithManager sets up the controller with the Manager
+func (r *DynamoGraphDeploymentScalingAdapterReconciler) SetupWithManager(mgr ctrl.Manager) error {
+	return ctrl.NewControllerManagedBy(mgr).
+		For(&nvidiacomv1alpha1.DynamoGraphDeploymentScalingAdapter{}, builder.WithPredicates(
+			predicate.GenerationChangedPredicate{},
+		)).
+		Named("dgdscalingadapter").
+		// Watch DGDs to sync status when DGD service replicas change
+		Watches(
+			&nvidiacomv1alpha1.DynamoGraphDeployment{},
+			handler.EnqueueRequestsFromMapFunc(r.findAdaptersForDGD),
+			builder.WithPredicates(predicate.Funcs{
+				CreateFunc: func(ce event.CreateEvent) bool { return false },
+				DeleteFunc: func(de event.DeleteEvent) bool { return true },
+				UpdateFunc: func(ue event.UpdateEvent) bool {
+					// Only trigger on spec changes (not status)
+					oldDGD, okOld := ue.ObjectOld.(*nvidiacomv1alpha1.DynamoGraphDeployment)
+					newDGD, okNew := ue.ObjectNew.(*nvidiacomv1alpha1.DynamoGraphDeployment)
+					if !okOld || !okNew {
+						return false
+					}
+					// Trigger if services map changed
+					return !servicesEqual(oldDGD.Spec.Services, newDGD.Spec.Services)
+				},
+				GenericFunc: func(ge event.GenericEvent) bool { return false },
+			}),
+		).
+		WithEventFilter(commonController.EphemeralDeploymentEventFilter(r.Config)).
+		Complete(r)
+}
+
+// findAdaptersForDGD maps DGD changes to adapter reconcile requests
+// Uses label selector to efficiently query only adapters for this specific DGD
+func (r *DynamoGraphDeploymentScalingAdapterReconciler) findAdaptersForDGD(ctx context.Context, obj client.Object) []reconcile.Request {
+	dgd, ok := obj.(*nvidiacomv1alpha1.DynamoGraphDeployment)
+	if !ok {
+		return nil
+	}
+
+	// Use label selector to filter at API level (more efficient than in-memory filtering)
+	adapterList := &nvidiacomv1alpha1.DynamoGraphDeploymentScalingAdapterList{}
+	if err := r.List(ctx, adapterList,
+		client.InNamespace(dgd.Namespace),
+		client.MatchingLabels{consts.KubeLabelDynamoGraphDeploymentName: dgd.Name},
+	); err != nil {
+		log.FromContext(ctx).Error(err, "Failed to list adapters for DGD", "dgd", dgd.Name)
+		return nil
+	}
+
+	// All returned adapters are guaranteed to belong to this DGD
+	requests := make([]reconcile.Request, 0, len(adapterList.Items))
+	for i := range adapterList.Items {
+		requests = append(requests, reconcile.Request{
+			NamespacedName: types.NamespacedName{
+				Name:      adapterList.Items[i].Name,
+				Namespace: adapterList.Items[i].Namespace,
+			},
+		})
+	}
+
+	return requests
+}
diff --git a/deploy/cloud/operator/internal/controller/dynamographdeploymentscalingadapter_controller_test.go b/deploy/cloud/operator/internal/controller/dynamographdeploymentscalingadapter_controller_test.go
new file mode 100644
index 0000000000..33c6b9f5e8
--- /dev/null
+++ b/deploy/cloud/operator/internal/controller/dynamographdeploymentscalingadapter_controller_test.go
@@ -0,0 +1,512 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package controller
+
+import (
+	"context"
+	"testing"
+
+	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
+	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/kubernetes/scheme"
+	"k8s.io/client-go/tools/record"
+	"k8s.io/utils/ptr"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+)
+
+func TestDynamoGraphDeploymentScalingAdapterReconciler_Reconcile(t *testing.T) {
+	// Register custom types with the scheme
+	if err := v1alpha1.AddToScheme(scheme.Scheme); err != nil {
+		t.Fatalf("Failed to add v1alpha1 to scheme: %v", err)
+	}
+
+	tests := []struct {
+		name                   string
+		adapter                *v1alpha1.DynamoGraphDeploymentScalingAdapter
+		dgd                    *v1alpha1.DynamoGraphDeployment
+		expectedDGDReplicas    int32
+		expectedStatusReplicas int32
+		expectError            bool
+		expectRequeue          bool
+	}{
+		{
+			name: "updates DGD replicas when DGDSA spec differs",
+			adapter: &v1alpha1.DynamoGraphDeploymentScalingAdapter{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-dgd-frontend",
+					Namespace: "default",
+				},
+				Spec: v1alpha1.DynamoGraphDeploymentScalingAdapterSpec{
+					Replicas: 5,
+					DGDRef: v1alpha1.DynamoGraphDeploymentServiceRef{
+						Name:        "test-dgd",
+						ServiceName: "Frontend",
+					},
+				},
+			},
+			dgd: &v1alpha1.DynamoGraphDeployment{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-dgd",
+					Namespace: "default",
+				},
+				Spec: v1alpha1.DynamoGraphDeploymentSpec{
+					Services: map[string]*v1alpha1.DynamoComponentDeploymentSharedSpec{
+						"Frontend": {
+							Replicas: ptr.To(int32(2)),
+						},
+					},
+				},
+			},
+			expectedDGDReplicas:    5,
+			expectedStatusReplicas: 5,
+			expectError:            false,
+		},
+		{
+			name: "no update when replicas already match",
+			adapter: &v1alpha1.DynamoGraphDeploymentScalingAdapter{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-dgd-frontend",
+					Namespace: "default",
+				},
+				Spec: v1alpha1.DynamoGraphDeploymentScalingAdapterSpec{
+					Replicas: 3,
+					DGDRef: v1alpha1.DynamoGraphDeploymentServiceRef{
+						Name:        "test-dgd",
+						ServiceName: "Frontend",
+					},
+				},
+			},
+			dgd: &v1alpha1.DynamoGraphDeployment{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-dgd",
+					Namespace: "default",
+				},
+				Spec: v1alpha1.DynamoGraphDeploymentSpec{
+					Services: map[string]*v1alpha1.DynamoComponentDeploymentSharedSpec{
+						"Frontend": {
+							Replicas: ptr.To(int32(3)),
+						},
+					},
+				},
+			},
+			expectedDGDReplicas:    3,
+			expectedStatusReplicas: 3,
+			expectError:            false,
+		},
+		{
+			name: "uses default replicas (1) when DGD service has no replicas set",
+			adapter: &v1alpha1.DynamoGraphDeploymentScalingAdapter{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-dgd-worker",
+					Namespace: "default",
+				},
+				Spec: v1alpha1.DynamoGraphDeploymentScalingAdapterSpec{
+					Replicas: 4,
+					DGDRef: v1alpha1.DynamoGraphDeploymentServiceRef{
+						Name:        "test-dgd",
+						ServiceName: "worker",
+					},
+				},
+			},
+			dgd: &v1alpha1.DynamoGraphDeployment{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-dgd",
+					Namespace: "default",
+				},
+				Spec: v1alpha1.DynamoGraphDeploymentSpec{
+					Services: map[string]*v1alpha1.DynamoComponentDeploymentSharedSpec{
+						"worker": {}, // no replicas set
+					},
+				},
+			},
+			expectedDGDReplicas:    4,
+			expectedStatusReplicas: 4,
+			expectError:            false,
+		},
+		{
+			name: "error when service not found in DGD",
+			adapter: &v1alpha1.DynamoGraphDeploymentScalingAdapter{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-dgd-missing",
+					Namespace: "default",
+				},
+				Spec: v1alpha1.DynamoGraphDeploymentScalingAdapterSpec{
+					Replicas: 2,
+					DGDRef: v1alpha1.DynamoGraphDeploymentServiceRef{
+						Name:        "test-dgd",
+						ServiceName: "nonexistent",
+					},
+				},
+			},
+			dgd: &v1alpha1.DynamoGraphDeployment{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-dgd",
+					Namespace: "default",
+				},
+				Spec: v1alpha1.DynamoGraphDeploymentSpec{
+					Services: map[string]*v1alpha1.DynamoComponentDeploymentSharedSpec{
+						"Frontend": {
+							Replicas: ptr.To(int32(1)),
+						},
+					},
+				},
+			},
+			expectError: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Build initial objects
+			var initObjs []client.Object
+			initObjs = append(initObjs, tt.adapter, tt.dgd)
+
+			// Create fake client with status subresource support
+			fakeClient := fake.NewClientBuilder().
+				WithScheme(scheme.Scheme).
+				WithObjects(initObjs...).
+				WithStatusSubresource(&v1alpha1.DynamoGraphDeploymentScalingAdapter{}).
+				Build()
+
+			// Create reconciler
+			r := &DynamoGraphDeploymentScalingAdapterReconciler{
+				Client:   fakeClient,
+				Scheme:   scheme.Scheme,
+				Recorder: record.NewFakeRecorder(10),
+			}
+
+			// Run Reconcile
+			ctx := context.Background()
+			req := ctrl.Request{
+				NamespacedName: types.NamespacedName{
+					Name:      tt.adapter.Name,
+					Namespace: tt.adapter.Namespace,
+				},
+			}
+
+			result, err := r.Reconcile(ctx, req)
+
+			// Check error expectation
+			if tt.expectError && err == nil {
+				t.Errorf("Expected error, but got none")
+			}
+			if !tt.expectError && err != nil {
+				t.Errorf("Unexpected error: %v", err)
+			}
+
+			// Skip further checks if error was expected
+			if tt.expectError {
+				return
+			}
+
+			// Check requeue
+			if tt.expectRequeue && result.RequeueAfter == 0 {
+				t.Errorf("Expected requeue, but got none")
+			}
+
+			// Verify DGD replicas were updated
+			updatedDGD := &v1alpha1.DynamoGraphDeployment{}
+			if err := fakeClient.Get(ctx, types.NamespacedName{Name: tt.dgd.Name, Namespace: tt.dgd.Namespace}, updatedDGD); err != nil {
+				t.Fatalf("Failed to get updated DGD: %v", err)
+			}
+
+			service, exists := updatedDGD.Spec.Services[tt.adapter.Spec.DGDRef.ServiceName]
+			if !exists {
+				t.Fatalf("Service %s not found in updated DGD", tt.adapter.Spec.DGDRef.ServiceName)
+			}
+
+			actualReplicas := int32(1)
+			if service.Replicas != nil {
+				actualReplicas = *service.Replicas
+			}
+
+			if actualReplicas != tt.expectedDGDReplicas {
+				t.Errorf("DGD service replicas = %d, expected %d", actualReplicas, tt.expectedDGDReplicas)
+			}
+
+			// Verify adapter status was updated
+			updatedAdapter := &v1alpha1.DynamoGraphDeploymentScalingAdapter{}
+			if err := fakeClient.Get(ctx, types.NamespacedName{Name: tt.adapter.Name, Namespace: tt.adapter.Namespace}, updatedAdapter); err != nil {
+				t.Fatalf("Failed to get updated adapter: %v", err)
+			}
+
+			if updatedAdapter.Status.Replicas != tt.expectedStatusReplicas {
+				t.Errorf("Adapter status.replicas = %d, expected %d", updatedAdapter.Status.Replicas, tt.expectedStatusReplicas)
+			}
+
+			// Verify selector is set
+			if updatedAdapter.Status.Selector == "" {
+				t.Errorf("Adapter status.selector is empty, expected non-empty")
+			}
+		})
+	}
+}
+
+func TestDynamoGraphDeploymentScalingAdapterReconciler_Reconcile_NotFound(t *testing.T) {
+	// Register custom types with the scheme
+	if err := v1alpha1.AddToScheme(scheme.Scheme); err != nil {
+		t.Fatalf("Failed to add v1alpha1 to scheme: %v", err)
+	}
+
+	// Create fake client with no objects
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme.Scheme).
+		Build()
+
+	r := &DynamoGraphDeploymentScalingAdapterReconciler{
+		Client:   fakeClient,
+		Scheme:   scheme.Scheme,
+		Recorder: record.NewFakeRecorder(10),
+	}
+
+	ctx := context.Background()
+	req := ctrl.Request{
+		NamespacedName: types.NamespacedName{
+			Name:      "nonexistent",
+			Namespace: "default",
+		},
+	}
+
+	// Should return no error when adapter not found (client.IgnoreNotFound)
+	result, err := r.Reconcile(ctx, req)
+	if err != nil {
+		t.Errorf("Expected no error for not found adapter, got: %v", err)
+	}
+	if result.RequeueAfter != 0 {
+		t.Errorf("Expected no requeueAfter for not found adapter, got: %v", result.RequeueAfter)
+	}
+}
+
+func TestDynamoGraphDeploymentScalingAdapterReconciler_Reconcile_DGDNotFound(t *testing.T) {
+	// Register custom types with the scheme
+	if err := v1alpha1.AddToScheme(scheme.Scheme); err != nil {
+		t.Fatalf("Failed to add v1alpha1 to scheme: %v", err)
+	}
+
+	adapter := &v1alpha1.DynamoGraphDeploymentScalingAdapter{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "test-dgd-frontend",
+			Namespace: "default",
+		},
+		Spec: v1alpha1.DynamoGraphDeploymentScalingAdapterSpec{
+			Replicas: 5,
+			DGDRef: v1alpha1.DynamoGraphDeploymentServiceRef{
+				Name:        "nonexistent-dgd",
+				ServiceName: "Frontend",
+			},
+		},
+	}
+
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme.Scheme).
+		WithObjects(adapter).
+		Build()
+
+	r := &DynamoGraphDeploymentScalingAdapterReconciler{
+		Client:   fakeClient,
+		Scheme:   scheme.Scheme,
+		Recorder: record.NewFakeRecorder(10),
+	}
+
+	ctx := context.Background()
+	req := ctrl.Request{
+		NamespacedName: types.NamespacedName{
+			Name:      adapter.Name,
+			Namespace: adapter.Namespace,
+		},
+	}
+
+	// Should return error when DGD not found
+	_, err := r.Reconcile(ctx, req)
+	if err == nil {
+		t.Errorf("Expected error when DGD not found, got none")
+	}
+}
+
+func TestDynamoGraphDeploymentScalingAdapterReconciler_Reconcile_BeingDeleted(t *testing.T) {
+	// Register custom types with the scheme
+	if err := v1alpha1.AddToScheme(scheme.Scheme); err != nil {
+		t.Fatalf("Failed to add v1alpha1 to scheme: %v", err)
+	}
+
+	now := metav1.Now()
+	adapter := &v1alpha1.DynamoGraphDeploymentScalingAdapter{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:              "test-dgd-frontend",
+			Namespace:         "default",
+			DeletionTimestamp: &now,
+			Finalizers:        []string{"test-finalizer"}, // Required for deletion timestamp to be set
+		},
+		Spec: v1alpha1.DynamoGraphDeploymentScalingAdapterSpec{
+			Replicas: 5,
+			DGDRef: v1alpha1.DynamoGraphDeploymentServiceRef{
+				Name:        "test-dgd",
+				ServiceName: "Frontend",
+			},
+		},
+	}
+
+	dgd := &v1alpha1.DynamoGraphDeployment{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "test-dgd",
+			Namespace: "default",
+		},
+		Spec: v1alpha1.DynamoGraphDeploymentSpec{
+			Services: map[string]*v1alpha1.DynamoComponentDeploymentSharedSpec{
+				"Frontend": {
+					Replicas: ptr.To(int32(2)),
+				},
+			},
+		},
+	}
+
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme.Scheme).
+		WithObjects(adapter, dgd).
+		Build()
+
+	r := &DynamoGraphDeploymentScalingAdapterReconciler{
+		Client:   fakeClient,
+		Scheme:   scheme.Scheme,
+		Recorder: record.NewFakeRecorder(10),
+	}
+
+	ctx := context.Background()
+	req := ctrl.Request{
+		NamespacedName: types.NamespacedName{
+			Name:      adapter.Name,
+			Namespace: adapter.Namespace,
+		},
+	}
+
+	// Should return no error and skip reconciliation
+	result, err := r.Reconcile(ctx, req)
+	if err != nil {
+		t.Errorf("Expected no error for deleting adapter, got: %v", err)
+	}
+	if result.RequeueAfter != 0 {
+		t.Errorf("Expected no requeueAfter for deleting adapter, got: %v", result.RequeueAfter)
+	}
+
+	// DGD replicas should NOT be updated (still 2)
+	updatedDGD := &v1alpha1.DynamoGraphDeployment{}
+	if err := fakeClient.Get(ctx, types.NamespacedName{Name: dgd.Name, Namespace: dgd.Namespace}, updatedDGD); err != nil {
+		t.Fatalf("Failed to get DGD: %v", err)
+	}
+
+	if *updatedDGD.Spec.Services["Frontend"].Replicas != 2 {
+		t.Errorf("DGD replicas should remain unchanged, got %d", *updatedDGD.Spec.Services["Frontend"].Replicas)
+	}
+}
+
+func TestDynamoGraphDeploymentScalingAdapterReconciler_findAdaptersForDGD(t *testing.T) {
+	// Register custom types with the scheme
+	if err := v1alpha1.AddToScheme(scheme.Scheme); err != nil {
+		t.Fatalf("Failed to add v1alpha1 to scheme: %v", err)
+	}
+
+	dgd := &v1alpha1.DynamoGraphDeployment{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "test-dgd",
+			Namespace: "default",
+		},
+	}
+
+	// Adapters belonging to test-dgd
+	adapter1 := &v1alpha1.DynamoGraphDeploymentScalingAdapter{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "test-dgd-frontend",
+			Namespace: "default",
+			Labels: map[string]string{
+				consts.KubeLabelDynamoGraphDeploymentName: "test-dgd",
+			},
+		},
+		Spec: v1alpha1.DynamoGraphDeploymentScalingAdapterSpec{
+			DGDRef: v1alpha1.DynamoGraphDeploymentServiceRef{
+				Name:        "test-dgd",
+				ServiceName: "Frontend",
+			},
+		},
+	}
+
+	adapter2 := &v1alpha1.DynamoGraphDeploymentScalingAdapter{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "test-dgd-decode",
+			Namespace: "default",
+			Labels: map[string]string{
+				consts.KubeLabelDynamoGraphDeploymentName: "test-dgd",
+			},
+		},
+		Spec: v1alpha1.DynamoGraphDeploymentScalingAdapterSpec{
+			DGDRef: v1alpha1.DynamoGraphDeploymentServiceRef{
+				Name:        "test-dgd",
+				ServiceName: "decode",
+			},
+		},
+	}
+
+	// Adapter belonging to different DGD
+	adapterOther := &v1alpha1.DynamoGraphDeploymentScalingAdapter{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "other-dgd-frontend",
+			Namespace: "default",
+			Labels: map[string]string{
+				consts.KubeLabelDynamoGraphDeploymentName: "other-dgd",
+			},
+		},
+		Spec: v1alpha1.DynamoGraphDeploymentScalingAdapterSpec{
+			DGDRef: v1alpha1.DynamoGraphDeploymentServiceRef{
+				Name:        "other-dgd",
+				ServiceName: "Frontend",
+			},
+		},
+	}
+
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme.Scheme).
+		WithObjects(adapter1, adapter2, adapterOther).
+		Build()
+
+	r := &DynamoGraphDeploymentScalingAdapterReconciler{
+		Client: fakeClient,
+	}
+
+	ctx := context.Background()
+	requests := r.findAdaptersForDGD(ctx, dgd)
+
+	// Should return 2 requests (for test-dgd adapters only)
+	if len(requests) != 2 {
+		t.Errorf("findAdaptersForDGD() returned %d requests, expected 2", len(requests))
+	}
+
+	// Verify correct adapters are returned
+	expectedNames := map[string]bool{
+		"test-dgd-frontend": true,
+		"test-dgd-decode":   true,
+	}
+
+	for _, req := range requests {
+		if !expectedNames[req.Name] {
+			t.Errorf("Unexpected adapter in results: %s", req.Name)
+		}
+	}
+}
diff --git a/deploy/cloud/operator/internal/dynamo/graph.go b/deploy/cloud/operator/internal/dynamo/graph.go
index 706dcec234..e644e5e881 100644
--- a/deploy/cloud/operator/internal/dynamo/graph.go
+++ b/deploy/cloud/operator/internal/dynamo/graph.go
@@ -1034,7 +1034,7 @@ func GenerateGrovePodCliqueSet(
 					PodSpec:      *podSpec,
 				},
 			}
-			labels, err := generateLabels(component, dynamoDeployment, r.Name)
+			labels, err := generateLabels(component, dynamoDeployment, serviceName)
 			if err != nil {
 				return nil, fmt.Errorf("failed to generate labels: %w", err)
 			}
@@ -1075,6 +1075,7 @@ func generateLabels(component *v1alpha1.DynamoComponentDeploymentSharedSpec, dyn
 	labels := make(map[string]string)
 	labels[commonconsts.KubeLabelDynamoSelector] = GetDynamoComponentName(dynamoDeployment, componentName)
 	labels[commonconsts.KubeLabelDynamoGraphDeploymentName] = dynamoDeployment.Name
+	labels[commonconsts.KubeLabelDynamoComponent] = componentName
 	if component.DynamoNamespace != nil {
 		labels[commonconsts.KubeLabelDynamoNamespace] = *component.DynamoNamespace
 	}
diff --git a/deploy/cloud/operator/internal/dynamo/graph_test.go b/deploy/cloud/operator/internal/dynamo/graph_test.go
index d93a60459b..6a126cf445 100644
--- a/deploy/cloud/operator/internal/dynamo/graph_test.go
+++ b/deploy/cloud/operator/internal/dynamo/graph_test.go
@@ -121,7 +121,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
 								commonconsts.KubeLabelDynamoNamespace:           "default-test-dynamographdeployment",
 								commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamographdeployment",
 							},
-							Autoscaling: nil,
 						},
 					},
 				},
@@ -153,7 +152,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
 									Custom: map[string]string{},
 								},
 							},
-							Autoscaling: nil,
 						},
 					},
 				},
@@ -229,7 +227,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
 								commonconsts.KubeLabelDynamoNamespace:           "default-test-dynamographdeployment",
 								commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamographdeployment",
 							},
-							Autoscaling: nil,
 						},
 					},
 				},
@@ -261,7 +258,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
 									Custom: map[string]string{},
 								},
 							},
-							Autoscaling: nil,
 						},
 					},
 				},
@@ -341,7 +337,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
 								commonconsts.KubeLabelDynamoNamespace:           "default-test-dynamographdeployment",
 								commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamographdeployment",
 							},
-							Autoscaling: nil,
 							Ingress: &v1alpha1.IngressSpec{
 								Enabled: true,
 								Host:    "test-dynamographdeployment",
@@ -377,7 +372,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
 									Custom: map[string]string{},
 								},
 							},
-							Autoscaling: nil,
 						},
 					},
 				},
@@ -465,7 +459,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
 								commonconsts.KubeLabelDynamoNamespace:           "default-test-dynamographdeployment",
 								commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamographdeployment",
 							},
-							Autoscaling: nil,
 							Envs: []corev1.EnvVar{
 								{
 									Name:  "DYN_DEPLOYMENT_CONFIG",
@@ -503,7 +496,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
 									Custom: map[string]string{},
 								},
 							},
-							Autoscaling: nil,
 							Envs: []corev1.EnvVar{
 								{
 									Name:  "DYN_DEPLOYMENT_CONFIG",
@@ -599,7 +591,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
 								commonconsts.KubeLabelDynamoNamespace:           "default-test-dynamographdeployment",
 								commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamographdeployment",
 							},
-							Autoscaling: nil,
 							ExtraPodSpec: &v1alpha1.ExtraPodSpec{
 								MainContainer: &corev1.Container{
 									Command: []string{"sh", "-c"},
@@ -644,7 +635,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
 									Custom: map[string]string{},
 								},
 							},
-							Autoscaling: nil,
 							Envs: []corev1.EnvVar{
 								{
 									Name:  "TEST_ENV",
@@ -1307,6 +1297,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
 								Name: "frontend",
 								Labels: map[string]string{
 									commonconsts.KubeLabelDynamoSelector:            "test-dynamo-graph-deployment-frontend",
+									commonconsts.KubeLabelDynamoComponent:           "Frontend",
 									commonconsts.KubeLabelMetricsEnabled:            commonconsts.KubeLabelValueTrue,
 									commonconsts.KubeLabelDynamoComponentType:       commonconsts.ComponentTypeFrontend,
 									commonconsts.KubeLabelDynamoSubComponentType:    "test-sub-component",
@@ -1483,6 +1474,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
 								Labels: map[string]string{
 									commonconsts.KubeLabelMetricsEnabled:            commonconsts.KubeLabelValueTrue,
 									commonconsts.KubeLabelDynamoSelector:            "test-dynamo-graph-deployment-planner",
+									commonconsts.KubeLabelDynamoComponent:           "Planner",
 									commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
 									commonconsts.KubeLabelDynamoComponentType:       commonconsts.ComponentTypePlanner,
 									commonconsts.KubeLabelDynamoNamespace:           "test-namespace-test-dynamo-graph-deployment",
@@ -1884,8 +1876,9 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
 									commonconsts.KubeLabelDynamoComponentType:       commonconsts.ComponentTypeWorker,
 									commonconsts.KubeLabelDynamoSubComponentType:    "test-sub-component",
 									commonconsts.KubeLabelMetricsEnabled:            commonconsts.KubeLabelValueTrue,
-									commonconsts.KubeLabelDynamoSelector:            "test-dynamo-graph-deployment-worker-ldr",
+									commonconsts.KubeLabelDynamoSelector:            "test-dynamo-graph-deployment-worker",
 									commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
+									commonconsts.KubeLabelDynamoComponent:           "worker",
 									commonconsts.KubeLabelDynamoNamespace:           "test-namespace-test-dynamo-graph-deployment",
 									"nvidia.com/label1":                             "label1",
 									"nvidia.com/label2":                             "label2",
@@ -2059,8 +2052,9 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
 									commonconsts.KubeLabelDynamoComponentType:       commonconsts.ComponentTypeWorker,
 									commonconsts.KubeLabelDynamoSubComponentType:    "test-sub-component",
 									commonconsts.KubeLabelMetricsEnabled:            commonconsts.KubeLabelValueTrue,
-									commonconsts.KubeLabelDynamoSelector:            "test-dynamo-graph-deployment-worker-wkr",
+									commonconsts.KubeLabelDynamoSelector:            "test-dynamo-graph-deployment-worker",
 									commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
+									commonconsts.KubeLabelDynamoComponent:           "worker",
 									commonconsts.KubeLabelDynamoNamespace:           "test-namespace-test-dynamo-graph-deployment",
 									"nvidia.com/label1":                             "label1",
 									"nvidia.com/label2":                             "label2",
@@ -2200,6 +2194,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
 									commonconsts.KubeLabelDynamoSelector:            "test-dynamo-graph-deployment-frontend",
 									commonconsts.KubeLabelDynamoComponentType:       commonconsts.ComponentTypeFrontend,
 									commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
+									commonconsts.KubeLabelDynamoComponent:           "Frontend",
 									commonconsts.KubeLabelDynamoNamespace:           "test-namespace-test-dynamo-graph-deployment",
 								},
 								Annotations: map[string]string{},
@@ -2358,6 +2353,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
 								Name: "planner",
 								Labels: map[string]string{
 									commonconsts.KubeLabelDynamoSelector:            "test-dynamo-graph-deployment-planner",
+									commonconsts.KubeLabelDynamoComponent:           "Planner",
 									commonconsts.KubeLabelMetricsEnabled:            commonconsts.KubeLabelValueTrue,
 									commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
 									commonconsts.KubeLabelDynamoComponentType:       commonconsts.ComponentTypePlanner,
@@ -2779,7 +2775,8 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
 							{
 								Name: "worker-ldr",
 								Labels: map[string]string{
-									commonconsts.KubeLabelDynamoSelector:            "test-dynamo-graph-deployment-worker-ldr",
+									commonconsts.KubeLabelDynamoSelector:            "test-dynamo-graph-deployment-worker",
+									commonconsts.KubeLabelDynamoComponent:           "worker",
 									commonconsts.KubeLabelMetricsEnabled:            commonconsts.KubeLabelValueTrue,
 									commonconsts.KubeLabelDynamoComponentType:       commonconsts.ComponentTypeWorker,
 									commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
@@ -2943,7 +2940,8 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
 								Labels: map[string]string{
 									commonconsts.KubeLabelDynamoComponentType:       commonconsts.ComponentTypeWorker,
 									commonconsts.KubeLabelMetricsEnabled:            commonconsts.KubeLabelValueTrue,
-									commonconsts.KubeLabelDynamoSelector:            "test-dynamo-graph-deployment-worker-wkr",
+									commonconsts.KubeLabelDynamoSelector:            "test-dynamo-graph-deployment-worker",
+									commonconsts.KubeLabelDynamoComponent:           "worker",
 									commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
 									commonconsts.KubeLabelDynamoNamespace:           "test-namespace-test-dynamo-graph-deployment",
 									"nvidia.com/label1":                             "label1",
@@ -3084,6 +3082,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
 									commonconsts.KubeLabelMetricsEnabled:            commonconsts.KubeLabelValueTrue,
 									commonconsts.KubeLabelDynamoSelector:            "test-dynamo-graph-deployment-frontend",
 									commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
+									commonconsts.KubeLabelDynamoComponent:           "Frontend",
 									commonconsts.KubeLabelDynamoNamespace:           "test-namespace-test-dynamo-graph-deployment",
 								},
 								Annotations: map[string]string{},
@@ -3243,6 +3242,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
 								Labels: map[string]string{
 									commonconsts.KubeLabelMetricsEnabled:            commonconsts.KubeLabelValueTrue,
 									commonconsts.KubeLabelDynamoSelector:            "test-dynamo-graph-deployment-planner",
+									commonconsts.KubeLabelDynamoComponent:           "Planner",
 									commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
 									commonconsts.KubeLabelDynamoComponentType:       commonconsts.ComponentTypePlanner,
 									commonconsts.KubeLabelDynamoNamespace:           "test-namespace-test-dynamo-graph-deployment",
diff --git a/deploy/cloud/operator/internal/webhook/common.go b/deploy/cloud/operator/internal/webhook/common.go
index 6333738739..c18edd98f4 100644
--- a/deploy/cloud/operator/internal/webhook/common.go
+++ b/deploy/cloud/operator/internal/webhook/common.go
@@ -19,7 +19,9 @@ package webhook
 
 import (
 	"context"
+	"strings"
 
+	authenticationv1 "k8s.io/api/authentication/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	logf "sigs.k8s.io/controller-runtime/pkg/log"
@@ -118,3 +120,54 @@ func (v *LeaseAwareValidator) shouldSkipValidation(obj runtime.Object) bool {
 
 	return false
 }
+
+// DGDReplicasModifierSuffixes defines suffixes for service accounts that are authorized
+// to modify DGD replicas when scaling adapter is enabled.
+// Service accounts matching any of these suffixes are allowed regardless of namespace.
+var DGDReplicasModifierSuffixes = []string{
+	// Dynamo operator controller manager (handles DGDSA reconciliation)
+	// Example: "dynamo-platform-dynamo-operator-controller-manager"
+	"-dynamo-operator-controller-manager",
+
+	// Planner service account (manages DGD replicas for autoscaling)
+	// Example: "planner-serviceaccount"
+	"planner-serviceaccount",
+}
+
+// CanModifyDGDReplicas checks if the request comes from a service account authorized
+// to modify DGD replicas when scaling adapter is enabled.
+// Service accounts are identified by username format: system:serviceaccount:<namespace>:<name>
+//
+// Authorized service accounts (by suffix):
+// - *-dynamo-operator-controller-manager (for DGDSA reconciliation)
+// - *planner-serviceaccount (for Planner autoscaling)
+func CanModifyDGDReplicas(userInfo authenticationv1.UserInfo) bool {
+	username := userInfo.Username
+
+	// Service accounts have username format: system:serviceaccount:<namespace>:<name>
+	if !strings.HasPrefix(username, "system:serviceaccount:") {
+		return false
+	}
+
+	// Parse: system:serviceaccount:<namespace>:<name>
+	parts := strings.Split(username, ":")
+	if len(parts) != 4 {
+		return false
+	}
+
+	namespace := parts[2]
+	saName := parts[3]
+
+	// Check against authorized suffixes
+	for _, suffix := range DGDReplicasModifierSuffixes {
+		if strings.HasSuffix(saName, suffix) {
+			webhookCommonLog.V(1).Info("allowing DGD replicas modification",
+				"serviceAccount", saName,
+				"namespace", namespace,
+				"matchedSuffix", suffix)
+			return true
+		}
+	}
+
+	return false
+}
diff --git a/deploy/cloud/operator/internal/webhook/validation/dynamocomponentdeployment.go b/deploy/cloud/operator/internal/webhook/validation/dynamocomponentdeployment.go
index c77303fde2..c0e0628834 100644
--- a/deploy/cloud/operator/internal/webhook/validation/dynamocomponentdeployment.go
+++ b/deploy/cloud/operator/internal/webhook/validation/dynamocomponentdeployment.go
@@ -42,13 +42,10 @@ func NewDynamoComponentDeploymentValidator(deployment *nvidiacomv1alpha1.DynamoC
 func (v *DynamoComponentDeploymentValidator) Validate() (admission.Warnings, error) {
 	// Validate shared spec fields using SharedSpecValidator
 	sharedValidator := NewSharedSpecValidator(&v.deployment.Spec.DynamoComponentDeploymentSharedSpec, "spec")
-	if err := sharedValidator.Validate(); err != nil {
-		return nil, err
-	}
 
 	// DCD-specific validation would go here (currently none)
 
-	return nil, nil
+	return sharedValidator.Validate()
 }
 
 // ValidateUpdate performs stateful validation comparing old and new DynamoComponentDeployment.
diff --git a/deploy/cloud/operator/internal/webhook/validation/dynamocomponentdeployment_test.go b/deploy/cloud/operator/internal/webhook/validation/dynamocomponentdeployment_test.go
index 0324856dfd..f38240c8ee 100644
--- a/deploy/cloud/operator/internal/webhook/validation/dynamocomponentdeployment_test.go
+++ b/deploy/cloud/operator/internal/webhook/validation/dynamocomponentdeployment_test.go
@@ -47,11 +47,6 @@ func TestDynamoComponentDeploymentValidator_Validate(t *testing.T) {
 				Spec: nvidiacomv1alpha1.DynamoComponentDeploymentSpec{
 					DynamoComponentDeploymentSharedSpec: nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
 						Replicas: &validReplicas,
-						Autoscaling: &nvidiacomv1alpha1.Autoscaling{
-							Enabled:     true,
-							MinReplicas: 1,
-							MaxReplicas: 10,
-						},
 					},
 					BackendFramework: "sglang",
 				},
@@ -74,26 +69,6 @@ func TestDynamoComponentDeploymentValidator_Validate(t *testing.T) {
 			wantErr: true,
 			errMsg:  "spec.replicas must be non-negative",
 		},
-		{
-			name: "invalid autoscaling",
-			deployment: &nvidiacomv1alpha1.DynamoComponentDeployment{
-				ObjectMeta: metav1.ObjectMeta{
-					Name:      "test-deployment",
-					Namespace: "default",
-				},
-				Spec: nvidiacomv1alpha1.DynamoComponentDeploymentSpec{
-					DynamoComponentDeploymentSharedSpec: nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
-						Autoscaling: &nvidiacomv1alpha1.Autoscaling{
-							Enabled:     true,
-							MinReplicas: 5,
-							MaxReplicas: 3,
-						},
-					},
-				},
-			},
-			wantErr: true,
-			errMsg:  "spec.autoscaling.maxReplicas must be > minReplicas",
-		},
 		{
 			name: "invalid ingress",
 			deployment: &nvidiacomv1alpha1.DynamoComponentDeployment{
diff --git a/deploy/cloud/operator/internal/webhook/validation/dynamographdeployment.go b/deploy/cloud/operator/internal/webhook/validation/dynamographdeployment.go
index e6bf9e3893..00a1668806 100644
--- a/deploy/cloud/operator/internal/webhook/validation/dynamographdeployment.go
+++ b/deploy/cloud/operator/internal/webhook/validation/dynamographdeployment.go
@@ -22,6 +22,8 @@ import (
 	"fmt"
 
 	nvidiacomv1alpha1 "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
+	internalwebhook "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/webhook"
+	authenticationv1 "k8s.io/api/authentication/v1"
 	"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
 )
 
@@ -51,30 +53,106 @@ func (v *DynamoGraphDeploymentValidator) Validate() (admission.Warnings, error)
 		return nil, err
 	}
 
+	var allWarnings admission.Warnings
+
 	// Validate each service
 	for serviceName, service := range v.deployment.Spec.Services {
-		if err := v.validateService(serviceName, service); err != nil {
+		warnings, err := v.validateService(serviceName, service)
+		if err != nil {
 			return nil, err
 		}
+		allWarnings = append(allWarnings, warnings...)
 	}
 
-	return nil, nil
+	return allWarnings, nil
 }
 
 // ValidateUpdate performs stateful validation comparing old and new DynamoGraphDeployment.
+// userInfo is used for identity-based validation (replica protection).
+// If userInfo is nil, replica changes for DGDSA-enabled services are rejected (fail closed).
 // Returns warnings and error.
-func (v *DynamoGraphDeploymentValidator) ValidateUpdate(old *nvidiacomv1alpha1.DynamoGraphDeployment) (admission.Warnings, error) {
-	// Validate that BackendFramework is not changed (immutable)
+func (v *DynamoGraphDeploymentValidator) ValidateUpdate(old *nvidiacomv1alpha1.DynamoGraphDeployment, userInfo *authenticationv1.UserInfo) (admission.Warnings, error) {
+	var warnings admission.Warnings
+
+	// Validate immutable fields
+	if err := v.validateImmutableFields(old, &warnings); err != nil {
+		return warnings, err
+	}
+
+	// Validate replicas changes for services with scaling adapter enabled
+	// Pass userInfo (may be nil - will fail closed for DGDSA-enabled services)
+	if err := v.validateReplicasChanges(old, userInfo); err != nil {
+		return warnings, err
+	}
+
+	return warnings, nil
+}
+
+// validateImmutableFields checks that immutable fields have not been changed.
+// Appends warnings to the provided slice.
+func (v *DynamoGraphDeploymentValidator) validateImmutableFields(old *nvidiacomv1alpha1.DynamoGraphDeployment, warnings *admission.Warnings) error {
 	if v.deployment.Spec.BackendFramework != old.Spec.BackendFramework {
-		warning := "Changing spec.backendFramework may cause unexpected behavior"
-		return admission.Warnings{warning}, fmt.Errorf("spec.backendFramework is immutable and cannot be changed after creation")
+		*warnings = append(*warnings, "Changing spec.backendFramework may cause unexpected behavior")
+		return fmt.Errorf("spec.backendFramework is immutable and cannot be changed after creation")
 	}
+	return nil
+}
 
-	return nil, nil
+// validateReplicasChanges checks if replicas were changed for services with scaling adapter enabled.
+// Only authorized service accounts (operator controller, planner) can modify these fields.
+// If userInfo is nil, all replica changes for DGDSA-enabled services are rejected (fail closed).
+func (v *DynamoGraphDeploymentValidator) validateReplicasChanges(old *nvidiacomv1alpha1.DynamoGraphDeployment, userInfo *authenticationv1.UserInfo) error {
+	// If the request comes from an authorized service account, allow the change
+	if userInfo != nil && internalwebhook.CanModifyDGDReplicas(*userInfo) {
+		return nil
+	}
+
+	var errs []error
+
+	for serviceName, newService := range v.deployment.Spec.Services {
+		// Check if scaling adapter is enabled for this service (enabled by default)
+		scalingAdapterEnabled := true
+		if newService.ScalingAdapter != nil && newService.ScalingAdapter.Disable {
+			scalingAdapterEnabled = false
+		}
+
+		if !scalingAdapterEnabled {
+			// Scaling adapter is disabled, users can modify replicas directly
+			continue
+		}
+
+		// Get old service (if exists)
+		oldService, exists := old.Spec.Services[serviceName]
+		if !exists {
+			// New service, no comparison needed
+			continue
+		}
+
+		// Check if replicas changed
+		oldReplicas := int32(1) // default
+		if oldService.Replicas != nil {
+			oldReplicas = *oldService.Replicas
+		}
+
+		newReplicas := int32(1) // default
+		if newService.Replicas != nil {
+			newReplicas = *newService.Replicas
+		}
+
+		if oldReplicas != newReplicas {
+			errs = append(errs, fmt.Errorf(
+				"spec.services[%s].replicas cannot be modified directly when scaling adapter is enabled; "+
+					"scale or update the related DynamoGraphDeploymentScalingAdapter instead",
+				serviceName))
+		}
+	}
+
+	return errors.Join(errs...)
 }
 
 // validateService validates a single service configuration using SharedSpecValidator.
-func (v *DynamoGraphDeploymentValidator) validateService(serviceName string, service *nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec) error {
+// Returns warnings and error.
+func (v *DynamoGraphDeploymentValidator) validateService(serviceName string, service *nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec) (admission.Warnings, error) {
 	// Use SharedSpecValidator to validate service spec (which is a DynamoComponentDeploymentSharedSpec)
 	fieldPath := fmt.Sprintf("spec.services[%s]", serviceName)
 	sharedValidator := NewSharedSpecValidator(service, fieldPath)
diff --git a/deploy/cloud/operator/internal/webhook/validation/dynamographdeployment_handler.go b/deploy/cloud/operator/internal/webhook/validation/dynamographdeployment_handler.go
index 074a4c5cc2..e98bd03442 100644
--- a/deploy/cloud/operator/internal/webhook/validation/dynamographdeployment_handler.go
+++ b/deploy/cloud/operator/internal/webhook/validation/dynamographdeployment_handler.go
@@ -23,6 +23,7 @@ import (
 
 	nvidiacomv1alpha1 "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
 	internalwebhook "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/webhook"
+	authenticationv1 "k8s.io/api/authentication/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	"sigs.k8s.io/controller-runtime/pkg/log"
 	"sigs.k8s.io/controller-runtime/pkg/manager"
@@ -91,9 +92,24 @@ func (h *DynamoGraphDeploymentHandler) ValidateUpdate(ctx context.Context, oldOb
 		return warnings, err
 	}
 
-	// Validate stateful rules (immutability)
-	updateWarnings, err := validator.ValidateUpdate(oldDeployment)
+	// Get user info from admission request context for identity-based validation
+	var userInfo *authenticationv1.UserInfo
+	req, err := admission.RequestFromContext(ctx)
 	if err != nil {
+		logger.Error(err, "failed to get admission request from context, replica changes for DGDSA-enabled services will be rejected")
+		// userInfo remains nil - validateReplicasChanges will fail closed
+	} else {
+		userInfo = &req.UserInfo
+	}
+
+	// Validate stateful rules (immutability + replicas protection)
+	updateWarnings, err := validator.ValidateUpdate(oldDeployment, userInfo)
+	if err != nil {
+		username := "<unknown>"
+		if userInfo != nil {
+			username = userInfo.Username
+		}
+		logger.Info("validation failed", "error", err.Error(), "user", username)
 		return updateWarnings, err
 	}
 
diff --git a/deploy/cloud/operator/internal/webhook/validation/dynamographdeployment_test.go b/deploy/cloud/operator/internal/webhook/validation/dynamographdeployment_test.go
index 75c18dd33f..71228327b6 100644
--- a/deploy/cloud/operator/internal/webhook/validation/dynamographdeployment_test.go
+++ b/deploy/cloud/operator/internal/webhook/validation/dynamographdeployment_test.go
@@ -93,28 +93,6 @@ func TestDynamoGraphDeploymentValidator_Validate(t *testing.T) {
 			wantErr: true,
 			errMsg:  "spec.services[main].replicas must be non-negative",
 		},
-		{
-			name: "service with invalid autoscaling",
-			deployment: &nvidiacomv1alpha1.DynamoGraphDeployment{
-				ObjectMeta: metav1.ObjectMeta{
-					Name:      "test-graph",
-					Namespace: "default",
-				},
-				Spec: nvidiacomv1alpha1.DynamoGraphDeploymentSpec{
-					Services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
-						"prefill": {
-							Autoscaling: &nvidiacomv1alpha1.Autoscaling{
-								Enabled:     true,
-								MinReplicas: 10,
-								MaxReplicas: 5,
-							},
-						},
-					},
-				},
-			},
-			wantErr: true,
-			errMsg:  "spec.services[prefill].autoscaling.maxReplicas must be > minReplicas",
-		},
 		{
 			name: "service with invalid ingress",
 			deployment: &nvidiacomv1alpha1.DynamoGraphDeployment{
@@ -441,7 +419,8 @@ func TestDynamoGraphDeploymentValidator_ValidateUpdate(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			validator := NewDynamoGraphDeploymentValidator(tt.newDeployment)
-			warnings, err := validator.ValidateUpdate(tt.oldDeployment)
+			// Pass nil userInfo - these tests don't modify replicas, so it's safe
+			warnings, err := validator.ValidateUpdate(tt.oldDeployment, nil)
 
 			if (err != nil) != tt.wantErr {
 				t.Errorf("DynamoGraphDeploymentValidator.ValidateUpdate() error = %v, wantErr %v", err, tt.wantErr)
diff --git a/deploy/cloud/operator/internal/webhook/validation/shared.go b/deploy/cloud/operator/internal/webhook/validation/shared.go
index 5348193f3f..30edb0500d 100644
--- a/deploy/cloud/operator/internal/webhook/validation/shared.go
+++ b/deploy/cloud/operator/internal/webhook/validation/shared.go
@@ -21,6 +21,7 @@ import (
 	"fmt"
 
 	nvidiacomv1alpha1 "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
+	"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
 )
 
 // SharedSpecValidator validates DynamoComponentDeploymentSharedSpec fields.
@@ -41,61 +42,45 @@ func NewSharedSpecValidator(spec *nvidiacomv1alpha1.DynamoComponentDeploymentSha
 }
 
 // Validate performs validation on the shared spec fields.
-// Returns an error if validation fails.
-func (v *SharedSpecValidator) Validate() error {
+// Returns warnings (e.g., deprecation notices) and error if validation fails.
+func (v *SharedSpecValidator) Validate() (admission.Warnings, error) {
 	// Validate replicas if specified
 	if v.spec.Replicas != nil && *v.spec.Replicas < 0 {
-		return fmt.Errorf("%s.replicas must be non-negative", v.fieldPath)
-	}
-
-	// Validate autoscaling configuration if specified
-	if v.spec.Autoscaling != nil {
-		if err := v.validateAutoscaling(); err != nil {
-			return err
-		}
+		return nil, fmt.Errorf("%s.replicas must be non-negative", v.fieldPath)
 	}
 
 	// Validate ingress configuration if enabled
 	if v.spec.Ingress != nil && v.spec.Ingress.Enabled {
 		if err := v.validateIngress(); err != nil {
-			return err
+			return nil, err
 		}
 	}
 
 	// Validate volume mounts
 	if err := v.validateVolumeMounts(); err != nil {
-		return err
+		return nil, err
 	}
 
 	// Validate shared memory
 	if v.spec.SharedMemory != nil {
 		if err := v.validateSharedMemory(); err != nil {
-			return err
+			return nil, err
 		}
 	}
 
-	return nil
-}
-
-// validateAutoscaling validates the autoscaling configuration.
-func (v *SharedSpecValidator) validateAutoscaling() error {
-	autoscaling := v.spec.Autoscaling
-
-	if !autoscaling.Enabled {
-		return nil
-	}
-
-	// Validate minReplicas
-	if autoscaling.MinReplicas < 1 {
-		return fmt.Errorf("%s.autoscaling.minReplicas must be >= 1", v.fieldPath)
-	}
+	// Collect warnings (e.g., deprecation notices)
+	var warnings admission.Warnings
 
-	// Validate maxReplicas
-	if autoscaling.MaxReplicas <= autoscaling.MinReplicas {
-		return fmt.Errorf("%s.autoscaling.maxReplicas must be > minReplicas", v.fieldPath)
+	// Check for deprecated autoscaling field
+	//nolint:staticcheck // SA1019: Intentionally checking deprecated field to warn users
+	if v.spec.Autoscaling != nil {
+		warnings = append(warnings, fmt.Sprintf(
+			"%s.autoscaling is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter "+
+				"with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md",
+			v.fieldPath))
 	}
 
-	return nil
+	return warnings, nil
 }
 
 // validateIngress validates the ingress configuration.
diff --git a/deploy/cloud/operator/internal/webhook/validation/shared_test.go b/deploy/cloud/operator/internal/webhook/validation/shared_test.go
index 472bb7d990..b7a2687cbd 100644
--- a/deploy/cloud/operator/internal/webhook/validation/shared_test.go
+++ b/deploy/cloud/operator/internal/webhook/validation/shared_test.go
@@ -41,11 +41,6 @@ func TestSharedSpecValidator_Validate(t *testing.T) {
 			name: "valid spec with all fields",
 			spec: &nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
 				Replicas: &validReplicas,
-				Autoscaling: &nvidiacomv1alpha1.Autoscaling{
-					Enabled:     true,
-					MinReplicas: 1,
-					MaxReplicas: 10,
-				},
 				Ingress: &nvidiacomv1alpha1.IngressSpec{
 					Enabled: true,
 					Host:    "example.com",
@@ -77,44 +72,6 @@ func TestSharedSpecValidator_Validate(t *testing.T) {
 			wantErr:   true,
 			errMsg:    "spec.replicas must be non-negative",
 		},
-		{
-			name: "autoscaling minReplicas too low",
-			spec: &nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
-				Autoscaling: &nvidiacomv1alpha1.Autoscaling{
-					Enabled:     true,
-					MinReplicas: 0,
-					MaxReplicas: 10,
-				},
-			},
-			fieldPath: "spec",
-			wantErr:   true,
-			errMsg:    "spec.autoscaling.minReplicas must be >= 1",
-		},
-		{
-			name: "autoscaling maxReplicas less than minReplicas",
-			spec: &nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
-				Autoscaling: &nvidiacomv1alpha1.Autoscaling{
-					Enabled:     true,
-					MinReplicas: 5,
-					MaxReplicas: 3,
-				},
-			},
-			fieldPath: "spec",
-			wantErr:   true,
-			errMsg:    "spec.autoscaling.maxReplicas must be > minReplicas",
-		},
-		{
-			name: "autoscaling disabled - no validation",
-			spec: &nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
-				Autoscaling: &nvidiacomv1alpha1.Autoscaling{
-					Enabled:     false,
-					MinReplicas: 0,
-					MaxReplicas: 0,
-				},
-			},
-			fieldPath: "spec",
-			wantErr:   false,
-		},
 		{
 			name: "ingress enabled without host",
 			spec: &nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
@@ -227,7 +184,7 @@ func TestSharedSpecValidator_Validate(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			validator := NewSharedSpecValidator(tt.spec, tt.fieldPath)
-			err := validator.Validate()
+			_, err := validator.Validate()
 
 			if (err != nil) != tt.wantErr {
 				t.Errorf("SharedSpecValidator.Validate() error = %v, wantErr %v", err, tt.wantErr)
@@ -240,3 +197,53 @@ func TestSharedSpecValidator_Validate(t *testing.T) {
 		})
 	}
 }
+
+func TestSharedSpecValidator_Validate_Warnings(t *testing.T) {
+	validReplicas := int32(3)
+
+	tests := []struct {
+		name         string
+		spec         *nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec
+		fieldPath    string
+		wantWarnings int
+	}{
+		{
+			name: "no warnings for spec without autoscaling",
+			spec: &nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
+				Replicas: &validReplicas,
+			},
+			fieldPath:    "spec",
+			wantWarnings: 0,
+		},
+		{
+			name: "warning for deprecated autoscaling field enabled",
+			spec: &nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
+				Replicas: &validReplicas,
+				//nolint:staticcheck // SA1019: Intentionally testing deprecated field
+				Autoscaling: &nvidiacomv1alpha1.Autoscaling{
+					Enabled:     true,
+					MinReplicas: 1,
+					MaxReplicas: 10,
+				},
+			},
+			fieldPath:    "spec",
+			wantWarnings: 1,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			validator := NewSharedSpecValidator(tt.spec, tt.fieldPath)
+			warnings, err := validator.Validate()
+
+			if err != nil {
+				t.Errorf("SharedSpecValidator.Validate() unexpected error = %v", err)
+				return
+			}
+
+			if len(warnings) != tt.wantWarnings {
+				t.Errorf("SharedSpecValidator.Validate() warnings count = %d, want %d", len(warnings), tt.wantWarnings)
+			}
+		})
+	}
+}
diff --git a/docs/_sections/k8s_deployment.rst b/docs/_sections/k8s_deployment.rst
index 81d06513cb..cdd7d2029a 100644
--- a/docs/_sections/k8s_deployment.rst
+++ b/docs/_sections/k8s_deployment.rst
@@ -10,3 +10,4 @@ Deployment Guide
    Webhooks <../kubernetes/webhooks>
    Minikube Setup <../kubernetes/deployment/minikube>
    Managing Models with DynamoModel <../kubernetes/deployment/dynamomodel-guide>
+   Autoscaling <../kubernetes/autoscaling>
diff --git a/docs/kubernetes/api_reference.md b/docs/kubernetes/api_reference.md
index 09e7415769..4ae3246155 100644
--- a/docs/kubernetes/api_reference.md
+++ b/docs/kubernetes/api_reference.md
@@ -37,6 +37,7 @@ Package v1alpha1 contains API Schema definitions for the nvidia.com v1alpha1 API
 - [DynamoComponentDeployment](#dynamocomponentdeployment)
 - [DynamoGraphDeployment](#dynamographdeployment)
 - [DynamoGraphDeploymentRequest](#dynamographdeploymentrequest)
+- [DynamoGraphDeploymentScalingAdapter](#dynamographdeploymentscalingadapter)
 - [DynamoModel](#dynamomodel)
 
 
@@ -45,7 +46,9 @@ Package v1alpha1 contains API Schema definitions for the nvidia.com v1alpha1 API
 
 
 
-
+Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
+with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
+for migration guidance. This field will be removed in a future API version.
 
 
 
@@ -55,11 +58,11 @@ _Appears in:_
 
 | Field | Description | Default | Validation |
 | --- | --- | --- | --- |
-| `enabled` _boolean_ |  |  |  |
-| `minReplicas` _integer_ |  |  |  |
-| `maxReplicas` _integer_ |  |  |  |
-| `behavior` _[HorizontalPodAutoscalerBehavior](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#horizontalpodautoscalerbehavior-v2-autoscaling)_ |  |  |  |
-| `metrics` _[MetricSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#metricspec-v2-autoscaling) array_ |  |  |  |
+| `enabled` _boolean_ | Deprecated: This field is ignored. |  |  |
+| `minReplicas` _integer_ | Deprecated: This field is ignored. |  |  |
+| `maxReplicas` _integer_ | Deprecated: This field is ignored. |  |  |
+| `behavior` _[HorizontalPodAutoscalerBehavior](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#horizontalpodautoscalerbehavior-v2-autoscaling)_ | Deprecated: This field is ignored. |  |  |
+| `metrics` _[MetricSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#metricspec-v2-autoscaling) array_ | Deprecated: This field is ignored. |  |  |
 
 
 
@@ -165,7 +168,7 @@ _Appears in:_
 | `dynamoNamespace` _string_ | DynamoNamespace is deprecated and will be removed in a future version.<br />The DGD Kubernetes namespace and DynamoGraphDeployment name are used to construct the Dynamo namespace for each component |  | Optional: \{\} <br /> |
 | `globalDynamoNamespace` _boolean_ | GlobalDynamoNamespace indicates that the Component will be placed in the global Dynamo namespace |  |  |
 | `resources` _[Resources](#resources)_ | Resources requested and limits for this component, including CPU, memory,<br />GPUs/devices, and any runtime-specific resources. |  |  |
-| `autoscaling` _[Autoscaling](#autoscaling)_ | Autoscaling config for this component (replica range, target utilization, etc.). |  |  |
+| `autoscaling` _[Autoscaling](#autoscaling)_ | Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter<br />with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md<br />for migration guidance. This field will be removed in a future API version. |  |  |
 | `envs` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Envs defines additional environment variables to inject into the component containers. |  |  |
 | `envFromSecret` _string_ | EnvFromSecret references a Secret whose key/value pairs will be exposed as<br />environment variables in the component containers. |  |  |
 | `volumeMounts` _[VolumeMount](#volumemount) array_ | VolumeMounts references PVCs defined at the top level for volumes to be mounted by the component. |  |  |
@@ -176,8 +179,9 @@ _Appears in:_
 | `extraPodSpec` _[ExtraPodSpec](#extrapodspec)_ | ExtraPodSpec allows to override the main pod spec configuration.<br />It is a k8s standard PodSpec. It also contains a MainContainer (standard k8s Container) field<br />that allows overriding the main container configuration. |  |  |
 | `livenessProbe` _[Probe](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#probe-v1-core)_ | LivenessProbe to detect and restart unhealthy containers. |  |  |
 | `readinessProbe` _[Probe](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#probe-v1-core)_ | ReadinessProbe to signal when the container is ready to receive traffic. |  |  |
-| `replicas` _integer_ | Replicas is the desired number of Pods for this component when autoscaling is not used. |  |  |
+| `replicas` _integer_ | Replicas is the desired number of Pods for this component.<br />When scalingAdapter is enabled (default), this field is managed by the<br />DynamoGraphDeploymentScalingAdapter and should not be modified directly. |  | Minimum: 0 <br /> |
 | `multinode` _[MultinodeSpec](#multinodespec)_ | Multinode is the configuration for multinode components. |  |  |
+| `scalingAdapter` _[ScalingAdapter](#scalingadapter)_ | ScalingAdapter configures whether this service uses the DynamoGraphDeploymentScalingAdapter.<br />When enabled (default), replicas are managed via DGDSA and external autoscalers can scale<br />the service using the Scale subresource. When disabled, replicas can be modified directly. |  |  |
 
 
 #### DynamoComponentDeploymentSpec
@@ -202,7 +206,7 @@ _Appears in:_
 | `dynamoNamespace` _string_ | DynamoNamespace is deprecated and will be removed in a future version.<br />The DGD Kubernetes namespace and DynamoGraphDeployment name are used to construct the Dynamo namespace for each component |  | Optional: \{\} <br /> |
 | `globalDynamoNamespace` _boolean_ | GlobalDynamoNamespace indicates that the Component will be placed in the global Dynamo namespace |  |  |
 | `resources` _[Resources](#resources)_ | Resources requested and limits for this component, including CPU, memory,<br />GPUs/devices, and any runtime-specific resources. |  |  |
-| `autoscaling` _[Autoscaling](#autoscaling)_ | Autoscaling config for this component (replica range, target utilization, etc.). |  |  |
+| `autoscaling` _[Autoscaling](#autoscaling)_ | Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter<br />with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md<br />for migration guidance. This field will be removed in a future API version. |  |  |
 | `envs` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Envs defines additional environment variables to inject into the component containers. |  |  |
 | `envFromSecret` _string_ | EnvFromSecret references a Secret whose key/value pairs will be exposed as<br />environment variables in the component containers. |  |  |
 | `volumeMounts` _[VolumeMount](#volumemount) array_ | VolumeMounts references PVCs defined at the top level for volumes to be mounted by the component. |  |  |
@@ -213,8 +217,9 @@ _Appears in:_
 | `extraPodSpec` _[ExtraPodSpec](#extrapodspec)_ | ExtraPodSpec allows to override the main pod spec configuration.<br />It is a k8s standard PodSpec. It also contains a MainContainer (standard k8s Container) field<br />that allows overriding the main container configuration. |  |  |
 | `livenessProbe` _[Probe](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#probe-v1-core)_ | LivenessProbe to detect and restart unhealthy containers. |  |  |
 | `readinessProbe` _[Probe](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#probe-v1-core)_ | ReadinessProbe to signal when the container is ready to receive traffic. |  |  |
-| `replicas` _integer_ | Replicas is the desired number of Pods for this component when autoscaling is not used. |  |  |
+| `replicas` _integer_ | Replicas is the desired number of Pods for this component.<br />When scalingAdapter is enabled (default), this field is managed by the<br />DynamoGraphDeploymentScalingAdapter and should not be modified directly. |  | Minimum: 0 <br /> |
 | `multinode` _[MultinodeSpec](#multinodespec)_ | Multinode is the configuration for multinode components. |  |  |
+| `scalingAdapter` _[ScalingAdapter](#scalingadapter)_ | ScalingAdapter configures whether this service uses the DynamoGraphDeploymentScalingAdapter.<br />When enabled (default), replicas are managed via DGDSA and external autoscalers can scale<br />the service using the Scale subresource. When disabled, replicas can be modified directly. |  |  |
 
 
 #### DynamoGraphDeployment
@@ -314,6 +319,83 @@ _Appears in:_
 | `deployment` _[DeploymentStatus](#deploymentstatus)_ | Deployment tracks the auto-created DGD when AutoApply is true.<br />Contains name, namespace, state, and creation status of the managed DGD. |  | Optional: \{\} <br /> |
 
 
+#### DynamoGraphDeploymentScalingAdapter
+
+
+
+DynamoGraphDeploymentScalingAdapter provides a scaling interface for individual services
+within a DynamoGraphDeployment. It implements the Kubernetes scale
+subresource, enabling integration with HPA, KEDA, and custom autoscalers.
+
+The adapter acts as an intermediary between autoscalers and the DGD,
+ensuring that only the adapter controller modifies the DGD's service replicas.
+This prevents conflicts when multiple autoscaling mechanisms are in play.
+
+
+
+
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `apiVersion` _string_ | `nvidia.com/v1alpha1` | | |
+| `kind` _string_ | `DynamoGraphDeploymentScalingAdapter` | | |
+| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. |  |  |
+| `spec` _[DynamoGraphDeploymentScalingAdapterSpec](#dynamographdeploymentscalingadapterspec)_ |  |  |  |
+| `status` _[DynamoGraphDeploymentScalingAdapterStatus](#dynamographdeploymentscalingadapterstatus)_ |  |  |  |
+
+
+#### DynamoGraphDeploymentScalingAdapterSpec
+
+
+
+DynamoGraphDeploymentScalingAdapterSpec defines the desired state of DynamoGraphDeploymentScalingAdapter
+
+
+
+_Appears in:_
+- [DynamoGraphDeploymentScalingAdapter](#dynamographdeploymentscalingadapter)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `replicas` _integer_ | Replicas is the desired number of replicas for the target service.<br />This field is modified by external autoscalers (HPA/KEDA/Planner) or manually by users. |  | Minimum: 0 <br />Required: \{\} <br /> |
+| `dgdRef` _[DynamoGraphDeploymentServiceRef](#dynamographdeploymentserviceref)_ | DGDRef references the DynamoGraphDeployment and the specific service to scale. |  | Required: \{\} <br /> |
+
+
+#### DynamoGraphDeploymentScalingAdapterStatus
+
+
+
+DynamoGraphDeploymentScalingAdapterStatus defines the observed state of DynamoGraphDeploymentScalingAdapter
+
+
+
+_Appears in:_
+- [DynamoGraphDeploymentScalingAdapter](#dynamographdeploymentscalingadapter)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `replicas` _integer_ | Replicas is the current number of replicas for the target service.<br />This is synced from the DGD's service replicas and is required for the scale subresource. |  |  |
+| `selector` _string_ | Selector is a label selector string for the pods managed by this adapter.<br />Required for HPA compatibility via the scale subresource. |  |  |
+| `lastScaleTime` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#time-v1-meta)_ | LastScaleTime is the last time the adapter scaled the target service. |  |  |
+
+
+#### DynamoGraphDeploymentServiceRef
+
+
+
+DynamoGraphDeploymentServiceRef identifies a specific service within a DynamoGraphDeployment
+
+
+
+_Appears in:_
+- [DynamoGraphDeploymentScalingAdapterSpec](#dynamographdeploymentscalingadapterspec)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `name` _string_ | Name of the DynamoGraphDeployment |  | MinLength: 1 <br />Required: \{\} <br /> |
+| `serviceName` _string_ | ServiceName is the key name of the service within the DGD's spec.services map to scale |  | MinLength: 1 <br />Required: \{\} <br /> |
+
+
 #### DynamoGraphDeploymentSpec
 
 
@@ -638,6 +720,25 @@ _Appears in:_
 | `claims` _[ResourceClaim](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#resourceclaim-v1-core) array_ | Claims specifies resource claims for dynamic resource allocation |  |  |
 
 
+#### ScalingAdapter
+
+
+
+ScalingAdapter configures whether a service uses the DynamoGraphDeploymentScalingAdapter
+for replica management. When enabled (default), the DGDSA owns the replicas field and
+external autoscalers (HPA, KEDA, Planner) can control scaling via the Scale subresource.
+
+
+
+_Appears in:_
+- [DynamoComponentDeploymentSharedSpec](#dynamocomponentdeploymentsharedspec)
+- [DynamoComponentDeploymentSpec](#dynamocomponentdeploymentspec)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `disable` _boolean_ | Disable indicates whether the ScalingAdapter should be disabled for this service.<br />When false (default), a DGDSA is created and owns the replicas field.<br />When true, no DGDSA is created and replicas can be modified directly in the DGD. | false |  |
+
+
 #### SharedMemorySpec
 
 
diff --git a/docs/kubernetes/autoscaling.md b/docs/kubernetes/autoscaling.md
new file mode 100644
index 0000000000..8adaf09107
--- /dev/null
+++ b/docs/kubernetes/autoscaling.md
@@ -0,0 +1,733 @@
+# Autoscaling
+
+This guide explains how to configure autoscaling for DynamoGraphDeployment (DGD) services using the `sglang-agg` example from `examples/backends/sglang/deploy/agg.yaml`.
+
+## Example DGD
+
+All examples in this guide use the following DGD:
+
+```yaml
+# examples/backends/sglang/deploy/agg.yaml
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoGraphDeployment
+metadata:
+  name: sglang-agg
+  namespace: default
+spec:
+  services:
+    Frontend:
+      dynamoNamespace: sglang-agg
+      componentType: frontend
+      replicas: 1
+
+    decode:
+      dynamoNamespace: sglang-agg
+      componentType: worker
+      replicas: 1
+      resources:
+        limits:
+          gpu: "1"
+```
+
+**Key identifiers:**
+- **DGD name**: `sglang-agg`
+- **Namespace**: `default`
+- **Services**: `Frontend`, `decode`
+- **dynamo_namespace label**: `default-sglang-agg` (used for metric filtering)
+
+## Overview
+
+Dynamo provides flexible autoscaling through the `DynamoGraphDeploymentScalingAdapter` (DGDSA) resource. When you deploy a DGD, the operator automatically creates one adapter per service (unless explicitly disabled). These adapters implement the Kubernetes [Scale subresource](https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/#scale-subresource), enabling integration with:
+
+| Autoscaler | Description | Best For |
+|------------|-------------|----------|
+| **KEDA** | Event-driven autoscaling (recommended) | Most use cases |
+| **Kubernetes HPA** | Native horizontal pod autoscaling | Simple CPU/memory-based scaling |
+| **Dynamo Planner** | LLM-aware autoscaling with SLA optimization | Production LLM workloads |
+| **Custom Controllers** | Any scale-subresource-compatible controller | Custom requirements |
+
+> **⚠️ Deprecation Notice**: The `spec.services[X].autoscaling` field in DGD is **deprecated and ignored**. Use DGDSA with HPA, KEDA, or Planner instead. If you have existing DGDs with `autoscaling` configured, you'll see a warning. Remove the field to silence the warning.
+
+## Architecture
+
+```
+┌──────────────────────────────────┐          ┌─────────────────────────────────────┐
+│   DynamoGraphDeployment          │          │   Scaling Adapters (auto-created)   │
+│   "sglang-agg"                   │          │   (one per service)                 │
+├──────────────────────────────────┤          ├─────────────────────────────────────┤
+│                                  │          │                                     │
+│  spec.services:                  │          │  ┌─────────────────────────────┐    │      ┌──────────────────┐
+│                                  │          │  │ sglang-agg-frontend         │◄───┼──────│   Autoscalers    │
+│    ┌────────────────────────┐◄───┼──────────┼──│ spec.replicas: 1            │    │      │                  │
+│    │ Frontend: 1 replica    │    │          │  └─────────────────────────────┘    │      │  • KEDA          │
+│    └────────────────────────┘    │          │                                     │      │  • HPA           │
+│                                  │          │  ┌─────────────────────────────┐    │      │  • Planner       │
+│    ┌────────────────────────┐◄───┼──────────┼──│ sglang-agg-decode           │◄───┼──────│  • Custom        │
+│    │ decode:   1 replica    │    │          │  │ spec.replicas: 1            │    │      │                  │
+│    └────────────────────────┘    │          │  └─────────────────────────────┘    │      └──────────────────┘
+│                                  │          │                                     │
+└──────────────────────────────────┘          └─────────────────────────────────────┘
+```
+
+**How it works:**
+
+1. You deploy a DGD with services (Frontend, decode)
+2. The operator auto-creates one DGDSA per service
+3. Autoscalers (KEDA, HPA, Planner) target the adapters via `/scale` subresource
+4. Adapter controller syncs replica changes to the DGD
+5. DGD controller reconciles the underlying pods
+
+## Viewing Scaling Adapters
+
+After deploying the `sglang-agg` DGD, verify the auto-created adapters:
+
+```bash
+kubectl get dgdsa -n default
+
+# Example output:
+# NAME                  DGD         SERVICE    REPLICAS   AGE
+# sglang-agg-frontend   sglang-agg  Frontend   1          5m
+# sglang-agg-decode     sglang-agg  decode     1          5m
+```
+
+## Replica Ownership Model
+
+When DGDSA is enabled (the default), it becomes the **source of truth** for replica counts. This follows the same pattern as Kubernetes Deployments owning ReplicaSets.
+
+### How It Works
+
+1. **DGDSA owns replicas**: Autoscalers (HPA, KEDA, Planner) update the DGDSA's `spec.replicas`
+2. **DGDSA syncs to DGD**: The DGDSA controller writes the replica count to the DGD's service
+3. **Direct DGD edits blocked**: A validating webhook prevents users from directly editing `spec.services[X].replicas` in the DGD
+4. **Controllers allowed**: Only authorized controllers (operator, Planner) can modify DGD replicas
+
+### Manual Scaling with DGDSA Enabled
+
+When DGDSA is enabled, use `kubectl scale` on the adapter (not the DGD):
+
+```bash
+# ✅ Correct - scale via DGDSA
+kubectl scale dgdsa sglang-agg-decode --replicas=3
+
+# ❌ Blocked - direct DGD edit rejected by webhook
+kubectl patch dgd sglang-agg --type=merge -p '{"spec":{"services":{"decode":{"replicas":3}}}}'
+# Error: spec.services[decode].replicas cannot be modified directly when scaling adapter is enabled;
+#        use 'kubectl scale dgdsa/sglang-agg-decode --replicas=3' or update the DynamoGraphDeploymentScalingAdapter instead
+```
+
+## Disabling DGDSA for a Service
+
+If you want to manage replicas directly in the DGD (without autoscaling), you can disable the scaling adapter per service:
+
+```yaml
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoGraphDeployment
+metadata:
+  name: sglang-agg
+spec:
+  services:
+    Frontend:
+      replicas: 2
+      scalingAdapter:
+        disable: true    # ← No DGDSA created, direct edits allowed
+
+    decode:
+      replicas: 1        # ← DGDSA created by default, managed via adapter
+```
+
+**When to disable DGDSA:**
+- You want simple, manual replica management
+- You don't need autoscaling for that service
+- You prefer direct DGD edits over adapter-based scaling
+
+**When to keep DGDSA enabled (default):**
+- You want to use HPA, KEDA, or Planner for autoscaling
+- You want a clear separation between "desired scale" (adapter) and "deployment config" (DGD)
+- You want protection against accidental direct replica edits
+
+## Autoscaling with Dynamo Planner
+
+The Dynamo Planner is an LLM-aware autoscaler that optimizes scaling decisions based on inference-specific metrics like Time To First Token (TTFT), Inter-Token Latency (ITL), and KV cache utilization.
+
+**When to use Planner:**
+- You want LLM-optimized autoscaling out of the box
+- You need coordinated scaling across prefill/decode services
+- You want SLA-driven scaling (e.g., target TTFT < 500ms)
+
+**How Planner works:**
+
+Planner is deployed as a service component within your DGD. It:
+1. Queries Prometheus for frontend metrics (request rate, latency, etc.)
+2. Uses profiling data to predict optimal replica counts
+3. Scales prefill/decode workers to meet SLA targets
+
+**Deployment:**
+
+The recommended way to deploy Planner is via `DynamoGraphDeploymentRequest` (DGDR). See the [SLA Planner Quick Start](../planner/sla_planner_quickstart.md) for complete instructions.
+
+Example configurations with Planner:
+- `examples/backends/vllm/deploy/disagg_planner.yaml`
+- `examples/backends/sglang/deploy/disagg_planner.yaml`
+- `examples/backends/trtllm/deploy/disagg_planner.yaml`
+
+For more details, see the [SLA Planner documentation](../planner/sla_planner.md).
+
+## Autoscaling with Kubernetes HPA
+
+The Horizontal Pod Autoscaler (HPA) is Kubernetes' native autoscaling solution.
+
+**When to use HPA:**
+- You have simple, predictable scaling requirements
+- You want to use standard Kubernetes tooling
+- You need CPU or memory-based scaling
+
+> **Note**: For custom metrics (like TTFT or queue depth), consider using [KEDA](#autoscaling-with-keda-recommended) instead - it's simpler to configure.
+
+### Basic HPA (CPU-based)
+
+```yaml
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: sglang-agg-frontend-hpa
+  namespace: default
+spec:
+  scaleTargetRef:
+    apiVersion: nvidia.com/v1alpha1
+    kind: DynamoGraphDeploymentScalingAdapter
+    name: sglang-agg-frontend
+  minReplicas: 1
+  maxReplicas: 10
+  metrics:
+  - type: Resource
+    resource:
+      name: cpu
+      target:
+        type: Utilization
+        averageUtilization: 70
+  behavior:
+    scaleDown:
+      stabilizationWindowSeconds: 300
+    scaleUp:
+      stabilizationWindowSeconds: 0
+```
+
+### HPA with Dynamo Metrics
+
+Dynamo exports several metrics useful for autoscaling. These are available at the `/metrics` endpoint on each frontend pod.
+
+> **See also**: For a complete list of all Dynamo metrics, see the [Metrics Reference](../observability/metrics.md). For Prometheus and Grafana setup, see the [Prometheus and Grafana Setup Guide](../observability/prometheus-grafana.md).
+
+#### Available Dynamo Metrics
+
+| Metric | Type | Description | Good for scaling |
+|--------|------|-------------|------------------|
+| `dynamo_frontend_queued_requests` | Gauge | Requests waiting in HTTP queue | ✅ Workers |
+| `dynamo_frontend_inflight_requests` | Gauge | Concurrent requests to engine | ✅ All services |
+| `dynamo_frontend_time_to_first_token_seconds` | Histogram | TTFT latency | ✅ Workers |
+| `dynamo_frontend_inter_token_latency_seconds` | Histogram | ITL latency | ✅ Decode |
+| `dynamo_frontend_request_duration_seconds` | Histogram | Total request duration | ⚠️ General |
+| `kvstats_gpu_cache_usage_percent` | Gauge | GPU KV cache usage (0-1) | ✅ Decode |
+
+#### Metric Labels
+
+Dynamo metrics include these labels for filtering:
+
+| Label | Description | Example |
+|-------|-------------|---------|
+| `dynamo_namespace` | Unique DGD identifier (`{k8s-namespace}-{dynamoNamespace}`) | `default-sglang-agg` |
+| `model` | Model being served | `Qwen/Qwen3-0.6B` |
+
+> **Note**: When you have multiple DGDs in the same namespace, use `dynamo_namespace` to filter metrics for a specific DGD.
+
+#### Example: Scale Decode Service Based on TTFT
+
+Using HPA with Prometheus Adapter requires configuring external metrics.
+
+**Step 1: Configure Prometheus Adapter**
+
+Add this to your Helm values file (e.g., `prometheus-adapter-values.yaml`):
+
+```yaml
+# prometheus-adapter-values.yaml
+prometheus:
+  url: http://prometheus-kube-prometheus-prometheus.monitoring.svc
+  port: 9090
+
+rules:
+  external:
+  # TTFT p95 from frontend - used to scale decode
+  - seriesQuery: 'dynamo_frontend_time_to_first_token_seconds_bucket{namespace!=""}'
+    resources:
+      overrides:
+        namespace: {resource: "namespace"}
+    name:
+      as: "dynamo_ttft_p95_seconds"
+    metricsQuery: |
+      histogram_quantile(0.95,
+        sum(rate(dynamo_frontend_time_to_first_token_seconds_bucket{<<.LabelMatchers>>}[5m]))
+        by (le, namespace, dynamo_namespace)
+      )
+```
+
+**Step 2: Install Prometheus Adapter**
+
+```bash
+helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+helm repo update
+
+helm upgrade --install prometheus-adapter prometheus-community/prometheus-adapter \
+  -n monitoring --create-namespace \
+  -f prometheus-adapter-values.yaml
+```
+
+**Step 3: Verify the metric is available**
+
+```bash
+kubectl get --raw "/apis/external.metrics.k8s.io/v1beta1/namespaces/<your-namespace>/dynamo_ttft_p95_seconds" | jq
+```
+
+**Step 4: Create the HPA**
+
+```yaml
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: sglang-agg-decode-hpa
+spec:
+  scaleTargetRef:
+    apiVersion: nvidia.com/v1alpha1
+    kind: DynamoGraphDeploymentScalingAdapter
+    name: sglang-agg-decode              # ← DGD name + service name (lowercase)
+  minReplicas: 1
+  maxReplicas: 10
+  metrics:
+  - type: External
+    external:
+      metric:
+        name: dynamo_ttft_p95_seconds
+        selector:
+          matchLabels:
+            dynamo_namespace: "default-sglang-agg"  # ← {namespace}-{dynamoNamespace}
+      target:
+        type: Value
+        value: "500m"  # Scale up when TTFT p95 > 500ms
+  behavior:
+    scaleDown:
+      stabilizationWindowSeconds: 60    # Wait 1 min before scaling down
+      policies:
+      - type: Pods
+        value: 1
+        periodSeconds: 30
+    scaleUp:
+      stabilizationWindowSeconds: 0      # Scale up immediately
+      policies:
+      - type: Pods
+        value: 2
+        periodSeconds: 30
+```
+
+**How it works:**
+1. Frontend pods export `dynamo_frontend_time_to_first_token_seconds` histogram
+2. Prometheus Adapter calculates p95 TTFT per `dynamo_namespace`
+3. HPA monitors this metric filtered by `dynamo_namespace: "default-sglang-agg"`
+4. When TTFT p95 > 500ms, HPA scales up the `sglang-agg-decode` adapter
+5. Adapter controller syncs the replica count to the DGD's `decode` service
+6. More decode workers are created, reducing TTFT
+
+#### Example: Scale Based on Queue Depth
+
+Add this rule to your `prometheus-adapter-values.yaml` (alongside the TTFT rule):
+
+```yaml
+# Add to rules.external in prometheus-adapter-values.yaml
+- seriesQuery: 'dynamo_frontend_queued_requests{namespace!=""}'
+  resources:
+    overrides:
+      namespace: {resource: "namespace"}
+  name:
+    as: "dynamo_queued_requests"
+  metricsQuery: |
+    sum(<<.Series>>{<<.LabelMatchers>>}) by (namespace, dynamo_namespace)
+```
+
+Then create the HPA:
+
+```yaml
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: sglang-agg-decode-queue-hpa
+  namespace: default
+spec:
+  scaleTargetRef:
+    apiVersion: nvidia.com/v1alpha1
+    kind: DynamoGraphDeploymentScalingAdapter
+    name: sglang-agg-decode
+  minReplicas: 1
+  maxReplicas: 10
+  metrics:
+  - type: External
+    external:
+      metric:
+        name: dynamo_queued_requests
+        selector:
+          matchLabels:
+            dynamo_namespace: "default-sglang-agg"
+      target:
+        type: Value
+        value: "10"  # Scale up when queue > 10 requests
+```
+
+## Autoscaling with KEDA (Recommended)
+
+KEDA (Kubernetes Event-driven Autoscaling) extends Kubernetes with event-driven autoscaling, supporting 50+ scalers including Prometheus.
+
+**Advantages over HPA + Prometheus Adapter:**
+- No Prometheus Adapter configuration needed
+- PromQL queries are defined in the ScaledObject itself (declarative, per-deployment)
+- Easy to update - just `kubectl apply` the ScaledObject
+- Can scale to zero when idle
+- Supports multiple triggers per object
+
+**When to use KEDA:**
+- You want simpler configuration (no Prometheus Adapter to manage)
+- You need event-driven scaling (e.g., queue depth, Kafka, etc.)
+- You want to scale to zero when idle
+
+### Installing KEDA
+
+```bash
+# Add KEDA Helm repo
+helm repo add kedacore https://kedacore.github.io/charts
+helm repo update
+
+# Install KEDA
+helm install keda kedacore/keda \
+  --namespace keda \
+  --create-namespace
+
+# Verify installation
+kubectl get pods -n keda
+```
+
+> **Note**: If you have Prometheus Adapter installed, either uninstall it first (`helm uninstall prometheus-adapter -n monitoring`) or install KEDA with `--set metricsServer.enabled=false` to avoid API conflicts.
+
+### Example: Scale Decode Based on TTFT
+
+Using the `sglang-agg` DGD from `examples/backends/sglang/deploy/agg.yaml`:
+
+```yaml
+apiVersion: keda.sh/v1alpha1
+kind: ScaledObject
+metadata:
+  name: sglang-agg-decode-scaler
+  namespace: default
+spec:
+  scaleTargetRef:
+    apiVersion: nvidia.com/v1alpha1
+    kind: DynamoGraphDeploymentScalingAdapter
+    name: sglang-agg-decode
+  minReplicaCount: 1
+  maxReplicaCount: 10
+  pollingInterval: 15      # Check metrics every 15 seconds
+  cooldownPeriod: 60       # Wait 60s before scaling down
+  triggers:
+  - type: prometheus
+    metadata:
+      # Update this URL to match your Prometheus service
+      serverAddress: http://prometheus-kube-prometheus-prometheus.monitoring.svc:9090
+      metricName: dynamo_ttft_p95
+      query: |
+        histogram_quantile(0.95,
+          sum(rate(dynamo_frontend_time_to_first_token_seconds_bucket{dynamo_namespace="default-sglang-agg"}[5m]))
+          by (le)
+        )
+      threshold: "0.5"              # Scale up when TTFT p95 > 500ms (0.5 seconds)
+      activationThreshold: "0.1"    # Start scaling when TTFT > 100ms
+```
+
+Apply it:
+
+```bash
+kubectl apply -f sglang-agg-decode-scaler.yaml
+```
+
+### Verify KEDA Scaling
+
+```bash
+# Check ScaledObject status
+kubectl get scaledobject -n default
+
+# KEDA creates an HPA under the hood - you can see it
+kubectl get hpa -n default
+
+# Example output:
+# NAME                                REFERENCE                                              TARGETS      MINPODS   MAXPODS   REPLICAS
+# keda-hpa-sglang-agg-decode-scaler   DynamoGraphDeploymentScalingAdapter/sglang-agg-decode  45m/500m     1         10        1
+
+# Get detailed status
+kubectl describe scaledobject sglang-agg-decode-scaler -n default
+```
+
+### Example: Scale Based on Queue Depth
+
+```yaml
+apiVersion: keda.sh/v1alpha1
+kind: ScaledObject
+metadata:
+  name: sglang-agg-decode-queue-scaler
+  namespace: default
+spec:
+  scaleTargetRef:
+    apiVersion: nvidia.com/v1alpha1
+    kind: DynamoGraphDeploymentScalingAdapter
+    name: sglang-agg-decode
+  minReplicaCount: 1
+  maxReplicaCount: 10
+  pollingInterval: 15
+  cooldownPeriod: 60
+  triggers:
+  - type: prometheus
+    metadata:
+      serverAddress: http://prometheus-kube-prometheus-prometheus.monitoring.svc:9090
+      metricName: dynamo_queued_requests
+      query: |
+        sum(dynamo_frontend_queued_requests{dynamo_namespace="default-sglang-agg"})
+      threshold: "10"    # Scale up when queue > 10 requests
+```
+
+### How KEDA Works
+
+KEDA creates and manages an HPA under the hood:
+
+```
+┌──────────────────────────────────────────────────────────────────────┐
+│  You create: ScaledObject                                            │
+│    - scaleTargetRef: sglang-agg-decode                               │
+│    - triggers: prometheus query                                      │
+└──────────────────────────────────────────────────────────────────────┘
+                                │
+                                ▼
+┌──────────────────────────────────────────────────────────────────────┐
+│  KEDA Operator automatically creates: HPA                            │
+│    - name: keda-hpa-sglang-agg-decode-scaler                         │
+│    - scaleTargetRef: sglang-agg-decode                               │
+│    - metrics: External (from KEDA metrics server)                    │
+└──────────────────────────────────────────────────────────────────────┘
+                                │
+                                ▼
+┌──────────────────────────────────────────────────────────────────────┐
+│  DynamoGraphDeploymentScalingAdapter: sglang-agg-decode              │
+│    - spec.replicas: updated by HPA                                   │
+└──────────────────────────────────────────────────────────────────────┘
+                                │
+                                ▼
+┌──────────────────────────────────────────────────────────────────────┐
+│  DynamoGraphDeployment: sglang-agg                                   │
+│    - spec.services.decode.replicas: synced from adapter              │
+└──────────────────────────────────────────────────────────────────────┘
+```
+
+## Mixed Autoscaling
+
+For disaggregated deployments (prefill + decode), you can use different autoscaling strategies for different services:
+
+```yaml
+---
+# HPA for Frontend (CPU-based)
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: sglang-agg-frontend-hpa
+  namespace: default
+spec:
+  scaleTargetRef:
+    apiVersion: nvidia.com/v1alpha1
+    kind: DynamoGraphDeploymentScalingAdapter
+    name: sglang-agg-frontend
+  minReplicas: 1
+  maxReplicas: 5
+  metrics:
+  - type: Resource
+    resource:
+      name: cpu
+      target:
+        type: Utilization
+        averageUtilization: 70
+
+---
+# KEDA for Decode (TTFT-based)
+apiVersion: keda.sh/v1alpha1
+kind: ScaledObject
+metadata:
+  name: sglang-agg-decode-scaler
+  namespace: default
+spec:
+  scaleTargetRef:
+    apiVersion: nvidia.com/v1alpha1
+    kind: DynamoGraphDeploymentScalingAdapter
+    name: sglang-agg-decode
+  minReplicaCount: 1
+  maxReplicaCount: 10
+  triggers:
+  - type: prometheus
+    metadata:
+      serverAddress: http://prometheus-kube-prometheus-prometheus.monitoring.svc:9090
+      query: |
+        histogram_quantile(0.95,
+          sum(rate(dynamo_frontend_time_to_first_token_seconds_bucket{dynamo_namespace="default-sglang-agg"}[5m]))
+          by (le)
+        )
+      threshold: "0.5"
+```
+
+## Manual Scaling
+
+### With DGDSA Enabled (Default)
+
+When DGDSA is enabled (the default), scale via the adapter:
+
+```bash
+kubectl scale dgdsa sglang-agg-decode -n default --replicas=3
+```
+
+Verify the scaling:
+
+```bash
+kubectl get dgdsa sglang-agg-decode -n default
+
+# Output:
+# NAME                DGD         SERVICE   REPLICAS   AGE
+# sglang-agg-decode   sglang-agg  decode    3          10m
+```
+
+> **Note**: If an autoscaler (KEDA, HPA, Planner) is managing the adapter, your change will be overwritten on the next evaluation cycle.
+
+### With DGDSA Disabled
+
+If you've disabled the scaling adapter for a service, edit the DGD directly:
+
+```bash
+kubectl patch dgd sglang-agg --type=merge -p '{"spec":{"services":{"decode":{"replicas":3}}}}'
+```
+
+Or edit the YAML:
+
+```yaml
+spec:
+  services:
+    decode:
+      replicas: 3
+      scalingAdapter:
+        disable: true
+```
+
+## Best Practices
+
+### 1. Choose One Autoscaler Per Service
+
+Avoid configuring multiple autoscalers for the same service:
+
+| Configuration | Status |
+|---------------|--------|
+| HPA for frontend, Planner for prefill/decode | ✅ Good |
+| KEDA for all services | ✅ Good |
+| Planner only (default) | ✅ Good |
+| HPA + Planner both targeting decode | ❌ Bad - they will fight |
+
+### 2. Use Appropriate Metrics
+
+| Service Type | Recommended Metrics | Dynamo Metric |
+|--------------|---------------------|---------------|
+| Frontend | CPU utilization, request rate | `dynamo_frontend_requests_total` |
+| Prefill | Queue depth, TTFT | `dynamo_frontend_queued_requests`, `dynamo_frontend_time_to_first_token_seconds` |
+| Decode | KV cache utilization, ITL | `kvstats_gpu_cache_usage_percent`, `dynamo_frontend_inter_token_latency_seconds` |
+
+### 3. Configure Stabilization Windows
+
+Prevent thrashing with appropriate stabilization:
+
+```yaml
+# HPA
+behavior:
+  scaleDown:
+    stabilizationWindowSeconds: 300  # Wait 5 min before scaling down
+  scaleUp:
+    stabilizationWindowSeconds: 0    # Scale up immediately
+
+# KEDA
+spec:
+  cooldownPeriod: 300
+```
+
+### 4. Set Sensible Min/Max Replicas
+
+Always configure minimum and maximum replicas in your HPA/KEDA to prevent:
+- Scaling to zero (unless intentional)
+- Unbounded scaling that exhausts cluster resources
+
+## Troubleshooting
+
+### Adapters Not Created
+
+```bash
+# Check DGD status
+kubectl describe dgd sglang-agg -n default
+
+# Check operator logs
+kubectl logs -n dynamo-system deployment/dynamo-operator
+```
+
+### Scaling Not Working
+
+```bash
+# Check adapter status
+kubectl describe dgdsa sglang-agg-decode -n default
+
+# Check HPA/KEDA status
+kubectl describe hpa sglang-agg-decode-hpa -n default
+kubectl describe scaledobject sglang-agg-decode-scaler -n default
+
+# Verify metrics are available in Kubernetes metrics API
+kubectl get --raw /apis/external.metrics.k8s.io/v1beta1
+```
+
+### Metrics Not Available
+
+If HPA/KEDA shows `<unknown>` for metrics:
+
+```bash
+# Check if Dynamo metrics are being scraped
+kubectl port-forward -n default svc/sglang-agg-frontend 8000:8000
+curl http://localhost:8000/metrics | grep dynamo_frontend
+
+# Example output:
+# dynamo_frontend_queued_requests{model="Qwen/Qwen3-0.6B"} 2
+# dynamo_frontend_inflight_requests{model="Qwen/Qwen3-0.6B"} 5
+
+# Verify Prometheus is scraping the metrics
+kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090
+# Then query: dynamo_frontend_time_to_first_token_seconds_bucket
+
+# Check KEDA operator logs
+kubectl logs -n keda deployment/keda-operator
+```
+
+### Rapid Scaling Up and Down
+
+If you see unstable scaling:
+
+1. Check if multiple autoscalers are targeting the same adapter
+2. Increase `cooldownPeriod` in KEDA ScaledObject
+3. Increase `stabilizationWindowSeconds` in HPA behavior
+
+## References
+
+- [Kubernetes HPA Documentation](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/)
+- [KEDA Documentation](https://keda.sh/)
+- [Prometheus Adapter](https://github.com/kubernetes-sigs/prometheus-adapter)
+- [Planner Documentation](../planner/sla_planner.md)
+- [Dynamo Metrics Reference](../observability/metrics.md)
+- [Prometheus and Grafana Setup](../observability/prometheus-grafana.md)
+