Updates

jainapurva · jainapurva · commit 5ee6b589e6c1 · 2025-04-10T11:46:23.000-07:00
diff --git a/benchmarks/microbenchmarks/benchmark_inference.py b/benchmarks/microbenchmarks/benchmark_inference.py
@@ -94,11 +94,11 @@ def run(config: BenchmarkConfig) -> BenchmarkResult:
         if config.enable_profiler:
             print("Running profiler...")
             try:
-                result.profiler_json_path, result.perfetto_url = generate_model_profile(
+                result.profiler_json_path = generate_model_profile(
                     m_copy, input_data, config.profiler_file_name
                 )
-            except Exception:
-                print(f"Error running profiler for {config.name}")
+            except Exception as e:
+                print(f"Error running profiler for {config.name} with error: {e}")
 
         return result
     except Exception as e:
diff --git a/benchmarks/microbenchmarks/test/benchmark_config.yml b/benchmarks/microbenchmarks/test/benchmark_config.yml
@@ -2,51 +2,27 @@
 benchmark_mode: "inference"
 quantization_config_recipe_names:
   # Will run a baseline inference for model by default, without quantization for comparison
-  # - "int4wo-32"
-  # - "marlin"
   - "int8wo"
+  - "int8dq"
+  - "float8dq"
+  - "float8wo"
 # sparsity_config_recipe_names:
   # Will run a baseline inference for model by default, without sparsity for comparison
   # - "semi-sparse"
   # - "block"
 output_dir: "benchmarks/microbenchmarks/results"
 model_params:
-  # - name: "small_bf16_linear"
-  #   matrix_shapes:
-  #     - name: "custom"
-  #       shapes: [
-  #         [1024, 1024, 1024],  # [m, k, n]
-  #       ]
-  #   high_precision_dtype: "torch.bfloat16"
-  #   use_torch_compile: true
-  #   torch_compile_mode: "max-autotune"
-  #   device: "cuda"
-  #   model_type: "linear"
-  #   enable_profiler: true  # Enable profiling for this model
-
-  - name: "large_bf16_ln_linear"
+  - name: "small_bf16_linear"
     matrix_shapes:
       - name: "custom"
         shapes: [
+          [1024, 1024, 1024],  # [m, k, n]
           [2048, 4096, 1024],
-          # [4096, 4096, 1024]
+          [4096, 4096, 1024]
         ]
     high_precision_dtype: "torch.bfloat16"
     use_torch_compile: true
     torch_compile_mode: "max-autotune"
     device: "cuda"
     model_type: "linear"
     enable_profiler: true  # Enable profiling for this model
-    enable_memory_profile: true  # Enable memory profiling for this model
-
-  # - name: "cpu_fp32_linear"
-  #   matrix_shapes:
-  #     - name: "custom"
-  #       shapes: [
-  #         [4096, 4096, 1024]
-  #       ]
-  #   high_precision_dtype: "torch.float32"
-  #   use_torch_compile: false
-  #   device: "cpu"
-  #   model_type: "linear"
-  #   enable_profiler: true  # Enable profiling for this model
diff --git a/benchmarks/microbenchmarks/utils.py b/benchmarks/microbenchmarks/utils.py
@@ -473,7 +473,7 @@ def print_results(results: List[BenchmarkResult]):
             result.config.name,
             result.config.quantization or "baseline",
             result.config.sparsity or "none",
-            f"{result.config.shape_name} ({result.config.m}, {result.config.k}, {result.config.n})"
+            f"{result.config.shape_name} ({result.config.m}, {result.config.k}, {result.config.n})",
             f"{result.model_inference_time_in_ms:.2f}",
             str(result.config.enable_profiler),
         ]
@@ -485,6 +485,7 @@ def print_results(results: List[BenchmarkResult]):
         "Name",
         "Quantization",
         "Sparsity",
+        "Shape",
         "Inference Time (ms)",
         "Profiler Enabled",
     ]