fix qwen2vl image process (#1082)

SangChengC · sangchengmeng · web-flow · commit 32911095c81a · 2025-10-15T18:36:24.000+08:00
Co-authored-by: sangchengmeng &lt;sangchengmeng@sensetime.com&gt;
diff --git a/lightllm/models/qwen2_vl/model.py b/lightllm/models/qwen2_vl/model.py
@@ -51,8 +51,9 @@ def init_audioitem_extral_params(
 
     def get_image_token_length(self, img: ImageItem):
         width, height = img.image_w, img.image_h
+        factor = self.patch_size * self.merge_size
         resized_height, resized_width = smart_resize(
-            height=height, width=width, min_pixels=self.min_pixel, max_pixels=self.max_pixel
+            height=height, width=width, factor=factor, min_pixels=self.min_pixel, max_pixels=self.max_pixel
         )
         grid_h, grid_w = resized_height // self.patch_size, resized_width // self.patch_size
         token_num = (grid_h * grid_w) // (self.merge_size ** 2)
diff --git a/lightllm/models/qwen2_vl/qwen2_visual.py b/lightllm/models/qwen2_vl/qwen2_visual.py
@@ -311,7 +311,12 @@ def encode(self, images: List[ImageItem]):
                 uuids.append(img.uuid)
                 image_data = read_shm(get_shm_name_data(img.uuid))
                 image_data = Image.open(BytesIO(image_data))
-                image_data = resize_image(image_data)
+                image_data = resize_image(
+                    image_file=image_data,
+                    factor=self.processor.patch_size * self.processor.merge_size,
+                    min_pixels=self.processor.min_pixels,
+                    max_pixels=self.processor.max_pixels,
+                )
                 pixel_values, image_grid_thw = self.processor.preprocess(image_data)
                 img_tensors.append(pixel_values)
                 img_grids.append(image_grid_thw)
diff --git a/lightllm/models/qwen2_vl/vision_process.py b/lightllm/models/qwen2_vl/vision_process.py
@@ -35,34 +35,36 @@ def smart_resize(
     height: int, width: int, factor: int = IMAGE_FACTOR, min_pixels: int = MIN_PIXELS, max_pixels: int = MAX_PIXELS
 ) -> tuple[int, int]:
 
-    if max(height, width) / min(height, width) > MAX_RATIO:
+    if max(height, width) / min(height, width) > 200:
         raise ValueError(
-            f"absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(height, width) / min(height, width)}"
+            f"absolute aspect ratio must be smaller than 200, got {max(height, width) / min(height, width)}"
         )
-    h_bar = max(factor, round(height / factor) * factor)
-    w_bar = max(factor, round(width / factor) * factor)
+    h_bar = round(height / factor) * factor
+    w_bar = round(width / factor) * factor
     if h_bar * w_bar > max_pixels:
         beta = math.sqrt((height * width) / max_pixels)
-        h_bar = math.floor(height / beta / factor) * factor
-        w_bar = math.floor(width / beta / factor) * factor
+        h_bar = max(factor, math.floor(height / beta / factor) * factor)
+        w_bar = max(factor, math.floor(width / beta / factor) * factor)
     elif h_bar * w_bar < min_pixels:
         beta = math.sqrt(min_pixels / (height * width))
         h_bar = math.ceil(height * beta / factor) * factor
         w_bar = math.ceil(width * beta / factor) * factor
     return h_bar, w_bar
 
 
-def resize_image(image_file: Image.Image, size_factor: int = IMAGE_FACTOR) -> tuple[Image.Image, int, int]:
+def resize_image(
+    image_file: Image.Image, factor: int = IMAGE_FACTOR, min_pixels: int = MIN_PIXELS, max_pixels: int = MAX_PIXELS
+) -> tuple[Image.Image, int, int]:
 
     image = image_file.convert("RGB")
     width, height = image.size
 
     resized_height, resized_width = smart_resize(
         height,
         width,
-        factor=size_factor,
-        min_pixels=MIN_PIXELS,
-        max_pixels=MAX_PIXELS,
+        factor=factor,
+        min_pixels=min_pixels,
+        max_pixels=max_pixels,
     )
     image = image.resize((resized_width, resized_height))