|
2 | 2 | benchmark_mode: "inference" |
3 | 3 | quantization_config_recipe_names: |
4 | 4 | # Will run a baseline inference for model by default, without quantization for comparison |
5 | | - # - "int4wo-32" |
6 | | - # - "marlin" |
7 | 5 | - "int8wo" |
| 6 | + - "int8dq" |
| 7 | + - "float8dq" |
| 8 | + - "float8wo" |
8 | 9 | # sparsity_config_recipe_names: |
9 | 10 | # Will run a baseline inference for model by default, without sparsity for comparison |
10 | 11 | # - "semi-sparse" |
11 | 12 | # - "block" |
12 | 13 | output_dir: "benchmarks/microbenchmarks/results" |
13 | 14 | model_params: |
14 | | - # - name: "small_bf16_linear" |
15 | | - # matrix_shapes: |
16 | | - # - name: "custom" |
17 | | - # shapes: [ |
18 | | - # [1024, 1024, 1024], # [m, k, n] |
19 | | - # ] |
20 | | - # high_precision_dtype: "torch.bfloat16" |
21 | | - # use_torch_compile: true |
22 | | - # torch_compile_mode: "max-autotune" |
23 | | - # device: "cuda" |
24 | | - # model_type: "linear" |
25 | | - # enable_profiler: true # Enable profiling for this model |
26 | | - |
27 | | - - name: "large_bf16_ln_linear" |
| 15 | + - name: "small_bf16_linear" |
28 | 16 | matrix_shapes: |
29 | 17 | - name: "custom" |
30 | 18 | shapes: [ |
| 19 | + [1024, 1024, 1024], # [m, k, n] |
31 | 20 | [2048, 4096, 1024], |
32 | | - # [4096, 4096, 1024] |
| 21 | + [4096, 4096, 1024] |
33 | 22 | ] |
34 | 23 | high_precision_dtype: "torch.bfloat16" |
35 | 24 | use_torch_compile: true |
36 | 25 | torch_compile_mode: "max-autotune" |
37 | 26 | device: "cuda" |
38 | 27 | model_type: "linear" |
39 | 28 | enable_profiler: true # Enable profiling for this model |
40 | | - enable_memory_profile: true # Enable memory profiling for this model |
41 | | - |
42 | | - # - name: "cpu_fp32_linear" |
43 | | - # matrix_shapes: |
44 | | - # - name: "custom" |
45 | | - # shapes: [ |
46 | | - # [4096, 4096, 1024] |
47 | | - # ] |
48 | | - # high_precision_dtype: "torch.float32" |
49 | | - # use_torch_compile: false |
50 | | - # device: "cpu" |
51 | | - # model_type: "linear" |
52 | | - # enable_profiler: true # Enable profiling for this model |
0 commit comments