-
Notifications
You must be signed in to change notification settings - Fork 9.5k
Open
Description
🔎 Search before asking
- I have searched the PaddleOCR Docs and found no similar bug report.
- I have searched the PaddleOCR Issues and found no similar bug report.
- I have searched the PaddleOCR Discussions and found no similar bug report.
🐛 Bug (问题描述)
from PIL import Image
import torch
from transformers import AutoModelForCausalLM, AutoProcessor
# ---- Settings ----
model_path = "PaddlePaddle/PaddleOCR-VL"
image_path = "test.png"
task = "ocr" # Options: 'ocr' | 'table' | 'chart' | 'formula'
# ------------------
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
PROMPTS = {
"ocr": "OCR:",
"table": "Table Recognition:",
"formula": "Formula Recognition:",
"chart": "Chart Recognition:",
}
image = Image.open(image_path).convert("RGB")
model = AutoModelForCausalLM.from_pretrained(
model_path, trust_remote_code=True, torch_dtype=torch.bfloat16
).to(DEVICE).eval()
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
messages = [
{"role": "user",
"content": [
{"type": "image", "image": image},
{"type": "text", "text": PROMPTS[task]},
]
}
]
inputs = processor.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_dict=True,
return_tensors="pt"
).to(DEVICE)
outputs = model.generate(**inputs, max_new_tokens=1024)
outputs = processor.batch_decode(outputs, skip_special_tokens=True)[0]
print(outputs)
report error: TypeError: check_model_inputs..wrapped_fn() got an unexpected keyword argument 'input_ids'
🏃♂️ Environment (运行环境)
Name: transformers
Version: 4.57.3
🌰 Minimal Reproducible Example (最小可复现问题的Demo)
from PIL import Image
import torch
from transformers import AutoModelForCausalLM, AutoProcessor
# ---- Settings ----
model_path = "PaddlePaddle/PaddleOCR-VL"
image_path = "test.png"
task = "ocr" # Options: 'ocr' | 'table' | 'chart' | 'formula'
# ------------------
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
PROMPTS = {
"ocr": "OCR:",
"table": "Table Recognition:",
"formula": "Formula Recognition:",
"chart": "Chart Recognition:",
}
image = Image.open(image_path).convert("RGB")
model = AutoModelForCausalLM.from_pretrained(
model_path, trust_remote_code=True, torch_dtype=torch.bfloat16
).to(DEVICE).eval()
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
messages = [
{"role": "user",
"content": [
{"type": "image", "image": image},
{"type": "text", "text": PROMPTS[task]},
]
}
]
inputs = processor.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_dict=True,
return_tensors="pt"
).to(DEVICE)
outputs = model.generate(**inputs, max_new_tokens=1024)
outputs = processor.batch_decode(outputs, skip_special_tokens=True)[0]
print(outputs)
Metadata
Metadata
Assignees
Labels
No labels