Skip to content

RuntimeError: Attempted to send an sync request with an AsyncClient instance - kserve mlflow pytorch model error #2276

@matiaschaud

Description

@matiaschaud

Hello, I'm trying to request succesfully to my mlflow kserve end point.

I really appreciate your help with solving this problem!

The pytorch MLModel has this signature:

signature:
  inputs: '[{"name": "user", "type": "tensor", "tensor-spec": {"dtype": "int64", "shape":
    [-1]}}, {"name": "movie", "type": "tensor", "tensor-spec": {"dtype": "int64",
    "shape": [-1]}}]'
  outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float32", "shape": [-1,
    1]}}]'

I'm serving the docker image generates by the mlflow :

MODEL_URI="models:/recommender_production/1"

mlflow models build-docker \
    --model-uri $MODEL_URI \
    --name $IMAGE_NAME \
    --enable-mlserver \
    --env-manager conda

I'm trying this code:

from kserve import (
    RESTConfig, InferenceRESTClient,
    Model,
    ModelServer,
    InferRequest,
    InferInput,
    InferResponse,
    model_server,
)

from mlserver.types import InferenceRequest, Parameters, RequestInput
from mlserver.codecs import PandasCodec, NumpyCodec

config = RESTConfig(protocol="v2", retries=5, timeout=30)
client = InferenceRESTClient(config)
base_url = "http://movie-recommender-gpu.kubeflow-user-example-com.svc.cluster.local"
model_name = "recommender_production"
# Define the correct V2 payload structure
data_v2 = InferenceRequest(model_name = model_name,
    inputs=[
            RequestInput(name= "user", 
                  shape= [-1],  
                  datatype= "INT64",
                  data= [123,123],
                  parameters=Parameters(content_type=NumpyCodec.ContentType),),
            RequestInput(name="movie", 
                  shape=[-1],  
                  datatype="INT64",
                  data=[321,512],
                  parameters=Parameters(content_type=NumpyCodec.ContentType),)
    ],
    parameters=Parameters(content_type=PandasCodec.ContentType)
)

result = await client.infer(base_url, data_v2, model_name=model_name)
print(result)

But I'm getting this error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[50], line 35
     18 # Define the correct V2 payload structure
     19 data_v2 = InferenceRequest(model_name = model_name,
     20     inputs=[
     21             RequestInput(name= "user", 
   (...)     32     parameters=Parameters(content_type=PandasCodec.ContentType)
     33 )
---> 35 result = await client.infer(base_url, data_v2, model_name=model_name)
     36 print(result)

File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/kserve/inference_client.py:501, in InferenceRESTClient.infer(self, base_url, data, model_name, headers, response_headers, is_graph_endpoint, timeout)
    499 if isinstance(data, dict):
    500     data = orjson.dumps(data)
--> 501 response = await self._client.post(
    502     url, content=data, headers=headers, timeout=timeout
    503 )
    504 if self._config.verbose:
    505     logger.info(
    506         "response code: %s, content: %s", response.status_code, response.text
    507     )

File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/httpx/_client.py:1905, in AsyncClient.post(self, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)
   1884 async def post(
   1885     self,
   1886     url: URL | str,
   (...)   1898     extensions: RequestExtensions | None = None,
   1899 ) -> Response:
   1900     """
   1901     Send a `POST` request.
   1902 
   1903     **Parameters**: See `httpx.request`.
   1904     """
-> 1905     return await self.request(
   1906         "POST",
   1907         url,
   1908         content=content,
   1909         data=data,
   1910         files=files,
   1911         json=json,
   1912         params=params,
   1913         headers=headers,
   1914         cookies=cookies,
   1915         auth=auth,
   1916         follow_redirects=follow_redirects,
   1917         timeout=timeout,
   1918         extensions=extensions,
   1919     )

File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/httpx/_client.py:1585, in AsyncClient.request(self, method, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)
   1570     warnings.warn(message, DeprecationWarning)
   1572 request = self.build_request(
   1573     method=method,
   1574     url=url,
   (...)   1583     extensions=extensions,
   1584 )
-> 1585 return await self.send(request, auth=auth, follow_redirects=follow_redirects)

File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/httpx/_client.py:1674, in AsyncClient.send(self, request, stream, auth, follow_redirects)
   1670 self._set_timeout(request)
   1672 auth = self._build_request_auth(request, auth)
-> 1674 response = await self._send_handling_auth(
   1675     request,
   1676     auth=auth,
   1677     follow_redirects=follow_redirects,
   1678     history=[],
   1679 )
   1680 try:
   1681     if not stream:

File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/httpx/_client.py:1702, in AsyncClient._send_handling_auth(self, request, auth, follow_redirects, history)
   1699 request = await auth_flow.__anext__()
   1701 while True:
-> 1702     response = await self._send_handling_redirects(
   1703         request,
   1704         follow_redirects=follow_redirects,
   1705         history=history,
   1706     )
   1707     try:
   1708         try:

File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/httpx/_client.py:1739, in AsyncClient._send_handling_redirects(self, request, follow_redirects, history)
   1736 for hook in self._event_hooks["request"]:
   1737     await hook(request)
-> 1739 response = await self._send_single_request(request)
   1740 try:
   1741     for hook in self._event_hooks["response"]:

File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/httpx/_client.py:1771, in AsyncClient._send_single_request(self, request)
   1768 await timer.async_start()
   1770 if not isinstance(request.stream, AsyncByteStream):
-> 1771     raise RuntimeError(
   1772         "Attempted to send an sync request with an AsyncClient instance."
   1773     )
   1775 with request_context(request=request):
   1776     response = await transport.handle_async_request(request)

RuntimeError: Attempted to send an sync request with an AsyncClient instance.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions