Skip to content

Commit fe2d2f3

Browse files
committed
KV config has only quant_cfg meaningful
Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
1 parent a481bd1 commit fe2d2f3

File tree

1 file changed

+5
-8
lines changed

1 file changed

+5
-8
lines changed

modelopt/torch/quantization/config.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -516,8 +516,7 @@ class QuantizerCfgEntry(TypedDict, total=False):
516516
},
517517
"enable": True,
518518
},
519-
],
520-
"algorithm": "max",
519+
]
521520
}
522521

523522
FP8_AFFINE_KV_CFG = {
@@ -529,8 +528,7 @@ class QuantizerCfgEntry(TypedDict, total=False):
529528
"bias": {-2: None, -4: None, "type": "static"},
530529
},
531530
},
532-
],
533-
"algorithm": "max",
531+
]
534532
}
535533

536534
_nvfp4_cfg = {
@@ -646,13 +644,13 @@ def _nvfp4_selective_quant_cfg(
646644
},
647645
"enable": True,
648646
},
649-
],
647+
]
650648
}
651649

652650
NVFP4_KV_CFG = {
653651
"quant_cfg": [
654652
{"quantizer_path": "*[kv]_bmm_quantizer", "cfg": _nvfp4_cfg, "enable": True},
655-
],
653+
]
656654
}
657655

658656
# Moved from examples/diffusers/quantization/config.py to here
@@ -714,8 +712,7 @@ def _nvfp4_selective_quant_cfg(
714712
"enable": True,
715713
},
716714
{"quantizer_path": "*v_bmm_quantizer", "cfg": _nvfp4_cfg, "enable": True},
717-
],
718-
"algorithm": "max",
715+
]
719716
}
720717

721718
NVFP4_SVDQUANT_DEFAULT_CFG = _nvfp4_selective_quant_cfg(

0 commit comments

Comments
 (0)