-
Notifications
You must be signed in to change notification settings - Fork 212
Open
Description
Hello, I'm trying to request succesfully to my mlflow kserve end point.
I really appreciate your help with solving this problem!
The pytorch MLModel has this signature:
signature:
inputs: '[{"name": "user", "type": "tensor", "tensor-spec": {"dtype": "int64", "shape":
[-1]}}, {"name": "movie", "type": "tensor", "tensor-spec": {"dtype": "int64",
"shape": [-1]}}]'
outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float32", "shape": [-1,
1]}}]'I'm serving the docker image generates by the mlflow :
MODEL_URI="models:/recommender_production/1"
mlflow models build-docker \
--model-uri $MODEL_URI \
--name $IMAGE_NAME \
--enable-mlserver \
--env-manager condaI'm trying this code:
from kserve import (
RESTConfig, InferenceRESTClient,
Model,
ModelServer,
InferRequest,
InferInput,
InferResponse,
model_server,
)
from mlserver.types import InferenceRequest, Parameters, RequestInput
from mlserver.codecs import PandasCodec, NumpyCodec
config = RESTConfig(protocol="v2", retries=5, timeout=30)
client = InferenceRESTClient(config)
base_url = "http://movie-recommender-gpu.kubeflow-user-example-com.svc.cluster.local"
model_name = "recommender_production"
# Define the correct V2 payload structure
data_v2 = InferenceRequest(model_name = model_name,
inputs=[
RequestInput(name= "user",
shape= [-1],
datatype= "INT64",
data= [123,123],
parameters=Parameters(content_type=NumpyCodec.ContentType),),
RequestInput(name="movie",
shape=[-1],
datatype="INT64",
data=[321,512],
parameters=Parameters(content_type=NumpyCodec.ContentType),)
],
parameters=Parameters(content_type=PandasCodec.ContentType)
)
result = await client.infer(base_url, data_v2, model_name=model_name)
print(result)But I'm getting this error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[50], line 35
18 # Define the correct V2 payload structure
19 data_v2 = InferenceRequest(model_name = model_name,
20 inputs=[
21 RequestInput(name= "user",
(...) 32 parameters=Parameters(content_type=PandasCodec.ContentType)
33 )
---> 35 result = await client.infer(base_url, data_v2, model_name=model_name)
36 print(result)
File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/kserve/inference_client.py:501, in InferenceRESTClient.infer(self, base_url, data, model_name, headers, response_headers, is_graph_endpoint, timeout)
499 if isinstance(data, dict):
500 data = orjson.dumps(data)
--> 501 response = await self._client.post(
502 url, content=data, headers=headers, timeout=timeout
503 )
504 if self._config.verbose:
505 logger.info(
506 "response code: %s, content: %s", response.status_code, response.text
507 )
File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/httpx/_client.py:1905, in AsyncClient.post(self, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)
1884 async def post(
1885 self,
1886 url: URL | str,
(...) 1898 extensions: RequestExtensions | None = None,
1899 ) -> Response:
1900 """
1901 Send a `POST` request.
1902
1903 **Parameters**: See `httpx.request`.
1904 """
-> 1905 return await self.request(
1906 "POST",
1907 url,
1908 content=content,
1909 data=data,
1910 files=files,
1911 json=json,
1912 params=params,
1913 headers=headers,
1914 cookies=cookies,
1915 auth=auth,
1916 follow_redirects=follow_redirects,
1917 timeout=timeout,
1918 extensions=extensions,
1919 )
File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/httpx/_client.py:1585, in AsyncClient.request(self, method, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)
1570 warnings.warn(message, DeprecationWarning)
1572 request = self.build_request(
1573 method=method,
1574 url=url,
(...) 1583 extensions=extensions,
1584 )
-> 1585 return await self.send(request, auth=auth, follow_redirects=follow_redirects)
File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/httpx/_client.py:1674, in AsyncClient.send(self, request, stream, auth, follow_redirects)
1670 self._set_timeout(request)
1672 auth = self._build_request_auth(request, auth)
-> 1674 response = await self._send_handling_auth(
1675 request,
1676 auth=auth,
1677 follow_redirects=follow_redirects,
1678 history=[],
1679 )
1680 try:
1681 if not stream:
File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/httpx/_client.py:1702, in AsyncClient._send_handling_auth(self, request, auth, follow_redirects, history)
1699 request = await auth_flow.__anext__()
1701 while True:
-> 1702 response = await self._send_handling_redirects(
1703 request,
1704 follow_redirects=follow_redirects,
1705 history=history,
1706 )
1707 try:
1708 try:
File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/httpx/_client.py:1739, in AsyncClient._send_handling_redirects(self, request, follow_redirects, history)
1736 for hook in self._event_hooks["request"]:
1737 await hook(request)
-> 1739 response = await self._send_single_request(request)
1740 try:
1741 for hook in self._event_hooks["response"]:
File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/httpx/_client.py:1771, in AsyncClient._send_single_request(self, request)
1768 await timer.async_start()
1770 if not isinstance(request.stream, AsyncByteStream):
-> 1771 raise RuntimeError(
1772 "Attempted to send an sync request with an AsyncClient instance."
1773 )
1775 with request_context(request=request):
1776 response = await transport.handle_async_request(request)
RuntimeError: Attempted to send an sync request with an AsyncClient instance.Metadata
Metadata
Assignees
Labels
No labels