Skip to content

Commit 437bbf6

Browse files
authored
Concurrency limits for Nemotron PDF reader (#1220)
1 parent 0c0eed7 commit 437bbf6

File tree

2 files changed

+19
-5
lines changed

2 files changed

+19
-5
lines changed

packages/paper-qa-nemotron/src/paperqa_nemotron/api.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,8 @@ async def _call_nvidia_api(
301301
if response.choices[0].finish_reason == "length":
302302
raise NemotronLengthError(
303303
f"Model response {response} indicates the input"
304-
f" image of shape {image.shape} is too large or the model started babbling."
304+
f" image of shape {image.shape} is too large or the model started babbling.",
305+
response.choices[0], # Include if callers want
305306
)
306307
if (
307308
response.choices[0].finish_reason != "tool_calls"
@@ -438,7 +439,8 @@ async def _call_sagemaker_api(
438439
if response.choices[0].finish_reason == "length":
439440
raise NemotronLengthError(
440441
f"Model response {response} indicates the input"
441-
f" image of shape {image.shape} is too large or the model started babbling."
442+
f" image of shape {image.shape} is too large or the model started babbling.",
443+
response.choices[0], # Include if callers want
442444
)
443445
if (
444446
response.choices[0].finish_reason != "stop"

packages/paper-qa-nemotron/src/paperqa_nemotron/reader.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from typing import Any, cast
1010

1111
import pypdfium2 as pdfium
12+
from lmi.utils import gather_with_concurrency
1213
from paperqa.types import ParsedMedia, ParsedMetadata, ParsedText
1314
from paperqa.utils import ImpossibleParsingError
1415
from tenacity import RetryError
@@ -31,6 +32,7 @@ async def parse_pdf_to_pages(
3132
full_page: bool = False,
3233
dpi: int | None = 300,
3334
api_params: Mapping[str, Any] | None = None,
35+
concurrency: int | asyncio.Semaphore | None = 128,
3436
**_: Any,
3537
) -> ParsedText:
3638
"""Parse a PDF using Nvidia's nemotron-parse VLM.
@@ -48,6 +50,10 @@ async def parse_pdf_to_pages(
4850
dpi: Optional DPI (dots per inch) for image resolution,
4951
if set as None then pypdfium2's default 1 scale will be employed.
5052
api_params: Optional parameters to pass to the nemotron-parse API.
53+
concurrency: Optional concurrency semaphore on concurrent processing of pages,
54+
use to put a ceiling on memory usage. Default is 128 to prioritize reader
55+
speed over memory, but not get obliterated by huge 1000-page PDFs.
56+
Set as None to disable concurrency limits, processing all pages at once.
5157
**_: Thrown away kwargs.
5258
5359
Returns:
@@ -203,9 +209,15 @@ async def process_page(
203209

204210
content: dict[str, str | tuple[str, list[ParsedMedia]]] = {}
205211
total_length = count_media = 0
206-
for page_num, page_content in await asyncio.gather(
207-
*(process_page(i) for i in range(start_page, end_page))
208-
):
212+
213+
gather = (
214+
asyncio.gather(*(process_page(i) for i in range(start_page, end_page)))
215+
if concurrency is None
216+
else gather_with_concurrency(
217+
concurrency, (process_page(i) for i in range(start_page, end_page))
218+
)
219+
)
220+
for page_num, page_content in await gather:
209221
content[page_num] = page_content
210222
if parse_media:
211223
page_text, page_media = page_content # type: ignore[misc]

0 commit comments

Comments
 (0)