Skip to content

Commit 9b42a09

Browse files
committed
cleanup
Signed-off-by: Kinjal Patel <kinjalpravin@nvidia.com>
1 parent 9e249ee commit 9b42a09

1 file changed

Lines changed: 3 additions & 6 deletions

File tree

examples/vllm_serve/hf_ptq_export.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ def is_model_on_gpu(model) -> bool:
128128
"""Returns if the model is fully loaded on GPUs."""
129129
return all("cuda" in str(param.device) for param in model.parameters())
130130

131+
131132
def get_tokenizer(ckpt_path, trust_remote_code=False):
132133
"""Returns the tokenizer from the model ckpt_path."""
133134
print(f"Initializing tokenizer from {ckpt_path}")
@@ -143,6 +144,7 @@ def get_tokenizer(ckpt_path, trust_remote_code=False):
143144

144145
return tokenizer
145146

147+
146148
def quantize_and_export_model(
147149
args: argparse.Namespace,
148150
):
@@ -188,7 +190,7 @@ def quantize_and_export_model(
188190
else:
189191
print("Model is already quantized, Skipping quantization...")
190192
quantized_model = model
191-
193+
192194
mtq.print_quant_summary(quantized_model)
193195
if not model_is_already_quantized:
194196
print("--------")
@@ -199,11 +201,6 @@ def quantize_and_export_model(
199201
print(f"example outputs after ptq: {generated_str_after_ptq}")
200202

201203
export_hf_vllm_fq_checkpoint(quantized_model, args.export_path)
202-
# from modelopt.torch.quantization.utils import get_quantizer_state_dict
203-
# quantized_model.save_pretrained(args.export_path, state_dict=quantized_model.state_dict(), save_modelopt_state=False)
204-
# modelopt_state = mto.modelopt_state(quantized_model)
205-
# modelopt_state["modelopt_state_weights"] = get_quantizer_state_dict(quantized_model)
206-
# torch.save(modelopt_state, f"{args.export_path}/modelopt_state.pth")
207204
tokenizer.save_pretrained(args.export_path)
208205
print(f"Model exported to {args.export_path}")
209206

0 commit comments

Comments
 (0)