flip mx inference scaling setting to RCEIL (#3428)

vkuzo · web-flow · commit 534bea57e4b6 · 2025-12-04T06:22:52.000-05:00
* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]
diff --git a/torchao/prototype/mx_formats/README.md b/torchao/prototype/mx_formats/README.md
@@ -230,7 +230,7 @@ Note: the accuracy results below are WIP and are not optimized yet.
 | recipe | wikitext word_perplexity | winogrande |
 | ------ | -------- | ---------- |
 | bfloat16 (baseline) | 7.5472105433748435 | 0.7426992896606156 |
-| mxfp8 | 7.609070006132819 | 0.7292817679558011 |
+| mxfp8 | 7.605192917647689 | 0.7355958958168903 |
 | nvfp4 | 8.44478255417328 | 0.7182320441988951 |
 
 To reproduce:
diff --git a/torchao/prototype/mx_formats/inference_workflow.py b/torchao/prototype/mx_formats/inference_workflow.py
@@ -85,6 +85,7 @@ def _mx_inference_linear_transform(
         block_size=config.block_size,
         kernel_preference=config.kernel_preference,
         is_swizzled_scales=True,
+        scaling_mode=ScaleCalculationMode.RCEIL,
     )
 
     # Convert weight to MX Tensor
@@ -95,6 +96,7 @@ def _mx_inference_linear_transform(
         kernel_preference=config.kernel_preference,
         act_quant_kwargs=act_quant_kwargs,
         is_swizzled_scales=True,
+        scaling_mode=ScaleCalculationMode.RCEIL,
     )
 
     module.weight = torch.nn.Parameter(quantized_weight, requires_grad=False)
diff --git a/torchao/prototype/mx_formats/mx_tensor.py b/torchao/prototype/mx_formats/mx_tensor.py
@@ -87,6 +87,7 @@
 class QuantizeTensorToMXKwargs(QuantizeTensorKwargs):
     elem_dtype: Union[torch.dtype, str] = torch.float8_e4m3fn
     block_size: int = 32
+    # TODO(future PR): flip the scaling_mode default to RCEIL
     scaling_mode: ScaleCalculationMode = ScaleCalculationMode.FLOOR
     kernel_preference: KernelPreference = KernelPreference.EMULATED
     is_swizzled_scales: bool = False
@@ -533,6 +534,7 @@ def to_mx(
         data_hp: torch.Tensor,
         elem_dtype: Union[torch.dtype, str],
         block_size: int = BLOCK_SIZE_DEFAULT,
+        # TODO(future PR): flip the scaling_mode default to RCEIL
         scaling_mode: ScaleCalculationMode = ScaleCalculationMode.FLOOR,
         # TODO(future PR): switch default gemm to cublas
         kernel_preference: KernelPreference = KernelPreference.EMULATED,

Original file line number	Diff line number	Diff line change
`@@ -85,6 +85,7 @@ def _mx_inference_linear_transform(`
`85`	`85`	`block_size=config.block_size,`
`86`	`86`	`kernel_preference=config.kernel_preference,`
`87`	`87`	`is_swizzled_scales=True,`
	`88`	`+ scaling_mode=ScaleCalculationMode.RCEIL,`
`88`	`89`	`)`
`89`	`90`
`90`	`91`	`# Convert weight to MX Tensor`
`@@ -95,6 +96,7 @@ def _mx_inference_linear_transform(`
`95`	`96`	`kernel_preference=config.kernel_preference,`
`96`	`97`	`act_quant_kwargs=act_quant_kwargs,`
`97`	`98`	`is_swizzled_scales=True,`
	`99`	`+ scaling_mode=ScaleCalculationMode.RCEIL,`
`98`	`100`	`)`
`99`	`101`
`100`	`102`	`module.weight = torch.nn.Parameter(quantized_weight, requires_grad=False)`