Skip to content

Commit 3291109

Browse files
SangChengCsangchengmeng
andauthored
fix qwen2vl image process (#1082)
Co-authored-by: sangchengmeng <[email protected]>
1 parent 071161a commit 3291109

File tree

3 files changed

+20
-12
lines changed

3 files changed

+20
-12
lines changed

lightllm/models/qwen2_vl/model.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,9 @@ def init_audioitem_extral_params(
5151

5252
def get_image_token_length(self, img: ImageItem):
5353
width, height = img.image_w, img.image_h
54+
factor = self.patch_size * self.merge_size
5455
resized_height, resized_width = smart_resize(
55-
height=height, width=width, min_pixels=self.min_pixel, max_pixels=self.max_pixel
56+
height=height, width=width, factor=factor, min_pixels=self.min_pixel, max_pixels=self.max_pixel
5657
)
5758
grid_h, grid_w = resized_height // self.patch_size, resized_width // self.patch_size
5859
token_num = (grid_h * grid_w) // (self.merge_size ** 2)

lightllm/models/qwen2_vl/qwen2_visual.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,12 @@ def encode(self, images: List[ImageItem]):
311311
uuids.append(img.uuid)
312312
image_data = read_shm(get_shm_name_data(img.uuid))
313313
image_data = Image.open(BytesIO(image_data))
314-
image_data = resize_image(image_data)
314+
image_data = resize_image(
315+
image_file=image_data,
316+
factor=self.processor.patch_size * self.processor.merge_size,
317+
min_pixels=self.processor.min_pixels,
318+
max_pixels=self.processor.max_pixels,
319+
)
315320
pixel_values, image_grid_thw = self.processor.preprocess(image_data)
316321
img_tensors.append(pixel_values)
317322
img_grids.append(image_grid_thw)

lightllm/models/qwen2_vl/vision_process.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,34 +35,36 @@ def smart_resize(
3535
height: int, width: int, factor: int = IMAGE_FACTOR, min_pixels: int = MIN_PIXELS, max_pixels: int = MAX_PIXELS
3636
) -> tuple[int, int]:
3737

38-
if max(height, width) / min(height, width) > MAX_RATIO:
38+
if max(height, width) / min(height, width) > 200:
3939
raise ValueError(
40-
f"absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(height, width) / min(height, width)}"
40+
f"absolute aspect ratio must be smaller than 200, got {max(height, width) / min(height, width)}"
4141
)
42-
h_bar = max(factor, round(height / factor) * factor)
43-
w_bar = max(factor, round(width / factor) * factor)
42+
h_bar = round(height / factor) * factor
43+
w_bar = round(width / factor) * factor
4444
if h_bar * w_bar > max_pixels:
4545
beta = math.sqrt((height * width) / max_pixels)
46-
h_bar = math.floor(height / beta / factor) * factor
47-
w_bar = math.floor(width / beta / factor) * factor
46+
h_bar = max(factor, math.floor(height / beta / factor) * factor)
47+
w_bar = max(factor, math.floor(width / beta / factor) * factor)
4848
elif h_bar * w_bar < min_pixels:
4949
beta = math.sqrt(min_pixels / (height * width))
5050
h_bar = math.ceil(height * beta / factor) * factor
5151
w_bar = math.ceil(width * beta / factor) * factor
5252
return h_bar, w_bar
5353

5454

55-
def resize_image(image_file: Image.Image, size_factor: int = IMAGE_FACTOR) -> tuple[Image.Image, int, int]:
55+
def resize_image(
56+
image_file: Image.Image, factor: int = IMAGE_FACTOR, min_pixels: int = MIN_PIXELS, max_pixels: int = MAX_PIXELS
57+
) -> tuple[Image.Image, int, int]:
5658

5759
image = image_file.convert("RGB")
5860
width, height = image.size
5961

6062
resized_height, resized_width = smart_resize(
6163
height,
6264
width,
63-
factor=size_factor,
64-
min_pixels=MIN_PIXELS,
65-
max_pixels=MAX_PIXELS,
65+
factor=factor,
66+
min_pixels=min_pixels,
67+
max_pixels=max_pixels,
6668
)
6769
image = image.resize((resized_width, resized_height))
6870

0 commit comments

Comments
 (0)