Skip to content

Commit ff30900

Browse files
authored
fix vlm quant bug(qwen25vl merger) (#445)
1 parent d9aa6bc commit ff30900

File tree

13 files changed

+96
-148
lines changed

13 files changed

+96
-148
lines changed

configs/quantization/methods/GPTQ/gptq_w_only_vlm.yml

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,22 +25,38 @@ eval:
2525
bs: 1
2626
inference_per_block: False
2727
quant:
28-
method: GPTQ
29-
quant_objects: [vision, language] # default is [language]
30-
weight:
31-
bit: 4
32-
symmetric: False
33-
granularity: per_group
34-
group_size: 128
35-
# calib_algo: mse
36-
# mse_b_num: 2
37-
special:
38-
actorder: True
39-
static_groups: False
40-
percdamp: 0.01
41-
blocksize: 128
42-
true_sequential: True
43-
quant_out: True
28+
vision:
29+
method: GPTQ
30+
weight:
31+
bit: 4
32+
symmetric: False
33+
granularity: per_group
34+
group_size: 128
35+
# calib_algo: mse
36+
# mse_b_num: 2
37+
special:
38+
actorder: True
39+
static_groups: False
40+
percdamp: 0.01
41+
blocksize: 128
42+
true_sequential: True
43+
quant_out: True
44+
language:
45+
method: GPTQ
46+
weight:
47+
bit: 4
48+
symmetric: False
49+
granularity: per_group
50+
group_size: 128
51+
# calib_algo: mse
52+
# mse_b_num: 2
53+
special:
54+
actorder: True
55+
static_groups: False
56+
percdamp: 0.01
57+
blocksize: 128
58+
true_sequential: True
59+
quant_out: True
4460
save:
4561
save_fake: False
4662
save_path: /path/to/save/

configs/quantization/methods/RTN/rtn_w_a_vlm.yml

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,28 @@ eval:
1313
bs: 1
1414
inference_per_block: False
1515
quant:
16-
method: RTN
17-
quant_objects: [vision, language] # default is [language]
18-
weight:
19-
bit: 8
20-
symmetric: True
21-
granularity: per_channel
22-
group_size: -1
23-
act:
24-
bit: 8
25-
symmetric: True
26-
granularity: per_token
16+
vision:
17+
method: RTN
18+
weight:
19+
bit: 8
20+
symmetric: True
21+
granularity: per_channel
22+
group_size: -1
23+
act:
24+
bit: 8
25+
symmetric: True
26+
granularity: per_token
27+
language:
28+
method: RTN
29+
weight:
30+
bit: 8
31+
symmetric: True
32+
granularity: per_channel
33+
group_size: -1
34+
act:
35+
bit: 8
36+
symmetric: True
37+
granularity: per_token
2738
save:
2839
save_fake: False
2940
save_path: /path/to/save/

configs/quantization/methods/SmoothQuant/smoothquant_w_a_vlm.yml

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,18 +25,30 @@ eval:
2525
bs: 1
2626
inference_per_block: False
2727
quant:
28-
method: SmoothQuant
29-
quant_objects: [vision, language]
30-
weight:
31-
bit: 8
32-
symmetric: True
33-
granularity: per_channel
34-
act:
35-
bit: 8
36-
symmetric: True
37-
granularity: per_token
38-
special:
39-
alpha: 0.8
28+
vision:
29+
method: SmoothQuant
30+
weight:
31+
bit: 8
32+
symmetric: True
33+
granularity: per_channel
34+
act:
35+
bit: 8
36+
symmetric: True
37+
granularity: per_token
38+
special:
39+
alpha: 0.8
40+
language:
41+
method: SmoothQuant
42+
weight:
43+
bit: 8
44+
symmetric: True
45+
granularity: per_channel
46+
act:
47+
bit: 8
48+
symmetric: True
49+
granularity: per_token
50+
special:
51+
alpha: 0.8
4052
save:
4153
save_trans: False
4254
save_fake: False

llmc/compression/quantization/base_blockwise_quantization.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -949,20 +949,11 @@ def deploy(self, quant_format, keep_device=False):
949949
self.set_no_quant_layer()
950950

951951
module = module_mapping[quant_format]
952-
if self.modality == 'vision':
953-
self.model.replace_vision_module_all(
954-
module,
955-
self.get_replacement_params(mode=quant_format, w_only=self.w_only),
956-
keep_device=keep_device,
957-
)
958-
if self.modality == 'language':
959-
self.model.replace_language_module_all(
960-
module,
961-
self.get_replacement_params(mode=quant_format, w_only=self.w_only),
962-
keep_device=keep_device,
963-
)
964-
if self.modality == 'video_gen':
965-
self.model.replace_video_gen_module_all(
952+
953+
self.model.set_modality(self.modality)
954+
logger.info(f'set modality: {self.modality}')
955+
if self.modality in ('vision', 'language', 'video_gen'):
956+
self.model.replace_module_all(
966957
module,
967958
self.get_replacement_params(mode=quant_format, w_only=self.w_only),
968959
keep_device=keep_device,

llmc/compression/quantization/llmint8.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def deploy(self, quant_format):
6666
logger.info(f'-- deploy_{quant_format}_model start --')
6767
logger.info(f'quant_config : {self.quant_config}')
6868

69-
self.model.replace_language_module_all(
69+
self.model.replace_module_all(
7070
FakeQuantLinear,
7171
self.get_replacement_params(
7272
mode='fake_quant', w_only=self.w_only, name=None

llmc/compression/quantization/tesseraq.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import gc
44
import math
55
import os
6-
import pdb
76
import random
87
from contextlib import nullcontext
98
from math import inf
@@ -268,7 +267,6 @@ def tesseraq_train(self, block):
268267

269268
if not math.isfinite(loss.item()):
270269
logger.info('Loss is NAN, stopping training')
271-
pdb.set_trace()
272270

273271
optimizer.zero_grad()
274272

llmc/compression/token_reduction/holitom.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,6 @@ def prepare_inputs_labels_for_multimodal(
594594
if isinstance(modalities, str):
595595
modalities = [modalities]
596596

597-
# import pdb; pdb.set_trace()
598597
if type(images) is list or images.ndim == 5:
599598
mm_patch_merge_type = getattr(self.config, 'mm_patch_merge_type', 'flat')
600599
image_aspect_ratio = getattr(self.config, 'image_aspect_ratio', 'square')
@@ -733,7 +732,7 @@ def prepare_inputs_labels_for_multimodal(
733732
# currently image_feature is a tensor of shape (4, num_patches, hidden_size)
734733
# we want to first unflatten it to (2, 2, h, w, hidden_size)
735734
# rank0_print("At least we are reaching here")
736-
# import pdb; pdb.set_trace()
735+
737736
if image_idx in video_idx_in_batch: # video operations
738737
# rank0_print("Video")
739738
if mm_newline_position == 'grid':
@@ -1032,7 +1031,6 @@ def prepare_inputs_labels_for_multimodal(
10321031

10331032
cur_new_input_embeds = [x.to(self.device) for x in cur_new_input_embeds]
10341033

1035-
# import pdb; pdb.set_trace()
10361034
cur_new_input_embeds = torch.cat(cur_new_input_embeds)
10371035
cur_new_labels = torch.cat(cur_new_labels)
10381036

@@ -1157,7 +1155,7 @@ def prepare_inputs_labels_for_multimodal(
11571155
right_add = random.randint(left_add, self.config.pos_skipping_range)
11581156
position_ids[:, :split_position] += left_add
11591157
position_ids[:, split_position:] += right_add
1160-
# import pdb; pdb.set_trace()
1158+
11611159
# rank0_print("Finish preparing")
11621160
return (
11631161
None,

llmc/models/base_model.py

Lines changed: 1 addition & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -378,40 +378,7 @@ def get_extra_modules(self, block):
378378
def get_moe_gate(self, block):
379379
return None
380380

381-
def replace_vision_module_all(self, module, params_dict, keep_device=False):
382-
vision_model_linears = self.get_block_linears(self.vision_model)
383-
for name, m in vision_model_linears.items():
384-
M = module.new(m, **params_dict)
385-
386-
name_tmp = name.rsplit('.', 1)
387-
if len(name_tmp) == 2:
388-
parent_name = name_tmp[0]
389-
parent = self.vision_model.get_submodule(parent_name)
390-
child_name = name_tmp[1]
391-
elif len(name_tmp) == 1:
392-
parent = self.vision_model
393-
child_name = name_tmp[0]
394-
395-
setattr(parent, child_name, M)
396-
397-
gc.collect()
398-
torch.cuda.empty_cache()
399-
logger.info(f'The Replaced vision_model: {self.vision_model}')
400-
401-
def replace_language_module_all(self, module, params_dict, keep_device=False):
402-
for block_idx in range(len(self.blocks)):
403-
logger.info(f'Replace block index: {block_idx}/{len(self.blocks)}')
404-
if keep_device:
405-
self.replace_module_block(module, self.blocks[block_idx], block_idx, params_dict)
406-
else:
407-
self.blocks[block_idx].cuda()
408-
self.replace_module_block(module, self.blocks[block_idx], block_idx, params_dict)
409-
self.blocks[block_idx].cpu()
410-
gc.collect()
411-
torch.cuda.empty_cache()
412-
logger.info(f'The Replaced model: {self.model}')
413-
414-
def replace_video_gen_module_all(self, module, params_dict, keep_device=False):
381+
def replace_module_all(self, module, params_dict, keep_device=False):
415382
for block_idx in range(len(self.blocks)):
416383
logger.info(f'Replace block index: {block_idx}/{len(self.blocks)}')
417384
if keep_device:
@@ -422,7 +389,6 @@ def replace_video_gen_module_all(self, module, params_dict, keep_device=False):
422389
self.blocks[block_idx].cpu()
423390
gc.collect()
424391
torch.cuda.empty_cache()
425-
logger.info(f'The Replaced model: {self.model}')
426392

427393
def replace_module_block(self, module, block, block_idx, params_dict):
428394
if module in _LLMC_LN_TYPES_ + _TRANSFORMERS_LN_TYPES_:

llmc/models/internvl3_5.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def __new__(cls, config, device_map=None, use_cache=False):
157157
if language_part == 'qwen3':
158158
from .qwen3 import Qwen3
159159

160-
class NewClass(InternVL2SharedBehavior, Qwen3):
160+
class NewClass(InternVL3_5SharedBehavior, Qwen3):
161161
def __init__(self, config, device_map=None, use_cache=False):
162162
super().__init__(config, device_map, use_cache)
163163
setattr(
@@ -170,7 +170,7 @@ def __init__(self, config, device_map=None, use_cache=False):
170170
return NewClass(config, device_map, use_cache)
171171

172172

173-
class InternVL2SharedBehavior():
173+
class InternVL3_5SharedBehavior():
174174
def build_model(self):
175175
self.eval_name = 'InternVL3_5Eval'
176176
self.vlm_model_config = AutoConfig.from_pretrained(

llmc/models/qwen2.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ def find_embed_layers(self):
2222

2323
def find_block_name(self):
2424
self.block_name_prefix = 'model.layers'
25-
self.pairs = {'q_proj': 'qkv', 'o_proj': 'out', 'up_proj': 'fc1'}
2625

2726
def get_embed_layers(self):
2827
return [self.embed_tokens]

0 commit comments

Comments
 (0)