File tree Expand file tree Collapse file tree 3 files changed +12
-12
lines changed
Expand file tree Collapse file tree 3 files changed +12
-12
lines changed Original file line number Diff line number Diff line change @@ -25,17 +25,17 @@ def string_to_config(s):
2525 return None
2626 elif s == "float8_rowwise" :
2727 return Float8DynamicActivationFloat8WeightConfig (granularity = PerRow ())
28- elif s == "int4_weight_float8_rowwise_activation " :
28+ elif s == "int4_groupwise_weight_float8_rowwise_activation " :
2929 return Float8DynamicActivationInt4WeightConfig ()
30- elif s == "int4_weight_only_hqq " :
30+ elif s == "int4_groupwise_hqq_weight_only " :
3131 return Int4WeightOnlyConfig (
3232 group_size = 32 ,
3333 int4_packing_format = "tile_packed_to_4d" ,
3434 int4_choose_qparams_algorithm = "hqq" ,
3535 )
36- elif s == "int8_weight_only " :
36+ elif s == "int8_rowwise_weight_only " :
3737 return Int8WeightOnlyConfig ()
38- elif s == "int8 " :
38+ elif s == "int8_rowwise " :
3939 return Int8DynamicActivationInt8WeightConfig ()
4040 else :
4141 raise AssertionError (f"unsupported { s } " )
Original file line number Diff line number Diff line change @@ -21,9 +21,9 @@ time python -u benchmarks/quantization/eval_accuracy_for_readme.py $BASE_ARGS 2>
2121
2222# quantized recipes
2323# note:
24- # * `int4_weight_float8_rowwise_activation ` doesn't work with dtype_map auto: https://gist.github.com/vkuzo/6b128681b628744d445c553cdeac8a85
25- # * `int4_weight_only_hqq ` only works on A100
26- for quant_recipe in float8_rowwise int4_weight_float8_rowwise_activation int4_weight_only_hqq int8_weight_only int8 ; do
24+ # * `int4_groupwise_hqq_weight_float8_rowwise_activation ` doesn't work with dtype_map auto: https://gist.github.com/vkuzo/6b128681b628744d445c553cdeac8a85
25+ # * `int4_groupwise_hqq_weight_only ` only works on A100
26+ for quant_recipe in float8_rowwise int4_groupwise_weight_float8_rowwise_activation int4_groupwise_hqq_weight_only int8_rowwise_weight_only int8_rowwise ; do
2727 time python -u benchmarks/quantization/eval_accuracy_for_readme.py $BASE_ARGS --quant_recipe_name $quant_recipe 2>&1 | tee -a " $LOG_FILE "
2828done
2929
Original file line number Diff line number Diff line change @@ -5,12 +5,12 @@ Typically quantization algorithms will have different schemes for how the activa
55
66All the following benchmarks are for ` meta-llama/Llama-3-8.1B ` using ` lm-eval ` measured on an H100 GPU.
77
8- | Technique | wikitext-perplexity | winogrande | checkpoint size (GB) |
8+ | weight | activation | wikitext-perplexity | winogrande | checkpoint size (GB) |
99| --------- | ------------------- | ---------- | -------------------- |
10- | baseline ( bfloat16) | 7.3315 | 0.7380 | 16.1 |
11- | float8_rowwise weight, float8_rowwise activation | 7.4197 | 0.7388 | 9.1 |
12- | int8_weight_only | 7.3451 | 0.7340 | 9.1 |
13- | int8 weight, int8 activation | 7.4535 | 0.7285 | 9.1 |
10+ | bfloat16 | bfloat16 | 7.3315 | 0.7380 | 16.1 |
11+ | float8_rowwise | float8_rowwise | 7.4197 | 0.7388 | 9.1 |
12+ | int8_rowwise | bfloat16 | 7.3451 | 0.7340 | 9.1 |
13+ | int8_rowwise | int8_rowwise | 7.4535 | 0.7285 | 9.1 |
1414
1515To reproduce, run the following command:
1616
You can’t perform that action at this time.
0 commit comments