Skip to content

Commit 145ad9b

Browse files
nmoellermoonbox3
andauthored
Python: Upgrade Onnx Connector to use 0.9.0 (#13162)
### Motivation and Context Fixes : #13001 ### Description Package version 0.9.0 introduced very helpful methods like applying chat templates automatically from the tokenizer and also support for multi audio & image files. This PR adds the following functionalities: - [x] Add Inference with multiple Images - [x] Add Inference with multiple Audios - [x] Chat Templates for non Multimodal Models are read via onnx ### Samples Text Sample with ONNX: <img width="1728" height="90" alt="image" src="https://github.com/user-attachments/assets/b93d9fc1-4e38-4fa9-b535-cc316e0900ed" /> Image Sample with ONNX : <img width="1721" height="91" alt="image" src="https://github.com/user-attachments/assets/47761beb-b728-4936-a6b6-fd253e041689" /> ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄 --------- Co-authored-by: Evan Mattson <[email protected]>
1 parent b1ecee2 commit 145ad9b

File tree

7 files changed

+4300
-4034
lines changed

7 files changed

+4300
-4034
lines changed

python/samples/concepts/setup/chat_completion_services.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -332,13 +332,9 @@ def get_onnx_chat_completion_service_and_request_settings() -> tuple[
332332
Please refer to the Semantic Kernel Python documentation for more information:
333333
https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
334334
"""
335-
from semantic_kernel.connectors.ai.onnx import (
336-
OnnxGenAIChatCompletion,
337-
OnnxGenAIPromptExecutionSettings,
338-
ONNXTemplate,
339-
)
335+
from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings
340336

341-
chat_service = OnnxGenAIChatCompletion(ONNXTemplate.PHI3, service_id=service_id)
337+
chat_service = OnnxGenAIChatCompletion(template="phi4mm", service_id=service_id)
342338
request_settings = OnnxGenAIPromptExecutionSettings(service_id=service_id)
343339

344340
return chat_service, request_settings

python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Copyright (c) Microsoft. All rights reserved.
22

3+
import json
34
import logging
45
import sys
56
from collections.abc import AsyncGenerator
@@ -10,7 +11,6 @@
1011
else:
1112
from typing_extensions import override # pragma: no cover
1213

13-
1414
from pydantic import ValidationError
1515

1616
from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
@@ -20,6 +20,7 @@
2020
from semantic_kernel.connectors.ai.onnx.utils import ONNXTemplate, apply_template
2121
from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
2222
from semantic_kernel.contents import (
23+
AudioContent,
2324
ChatHistory,
2425
ChatMessageContent,
2526
ImageContent,
@@ -37,12 +38,12 @@
3738
class OnnxGenAIChatCompletion(ChatCompletionClientBase, OnnxGenAICompletionBase):
3839
"""OnnxGenAI text completion service."""
3940

40-
template: ONNXTemplate
41+
template: ONNXTemplate | None
4142
SUPPORTS_FUNCTION_CALLING: ClassVar[bool] = False
4243

4344
def __init__(
4445
self,
45-
template: ONNXTemplate,
46+
template: ONNXTemplate | None = None,
4647
ai_model_path: str | None = None,
4748
ai_model_id: str | None = None,
4849
env_file_path: str | None = None,
@@ -80,6 +81,12 @@ def __init__(
8081

8182
super().__init__(ai_model_id=ai_model_id, ai_model_path=settings.chat_model_folder, template=template, **kwargs)
8283

84+
if self.enable_multi_modality and template is None:
85+
raise ServiceInitializationError(
86+
"When using a multi-modal model, a template must be specified."
87+
" Please provide a ONNXTemplate in the constructor."
88+
)
89+
8390
@override
8491
async def _inner_get_chat_message_contents(
8592
self,
@@ -101,7 +108,8 @@ async def _inner_get_chat_message_contents(
101108
assert isinstance(settings, OnnxGenAIPromptExecutionSettings) # nosec
102109
prompt = self._prepare_chat_history_for_request(chat_history)
103110
images = self._get_images_from_history(chat_history)
104-
choices = await self._generate_next_token(prompt, settings, images)
111+
audios = self._get_audios_from_history(chat_history)
112+
choices = await self._generate_next_token(prompt, settings, images=images, audios=audios)
105113
return [self._create_chat_message_content(choice) for choice in choices]
106114

107115
@override
@@ -127,7 +135,8 @@ async def _inner_get_streaming_chat_message_contents(
127135
assert isinstance(settings, OnnxGenAIPromptExecutionSettings) # nosec
128136
prompt = self._prepare_chat_history_for_request(chat_history)
129137
images = self._get_images_from_history(chat_history)
130-
async for chunk in self._generate_next_token_async(prompt, settings, images):
138+
audios = self._get_audios_from_history(chat_history)
139+
async for chunk in self._generate_next_token_async(prompt, settings, images=images, audios=audios):
131140
yield [
132141
self._create_streaming_chat_message_content(choice_index, new_token, function_invoke_attempt)
133142
for choice_index, new_token in enumerate(chunk)
@@ -159,9 +168,21 @@ def _create_streaming_chat_message_content(
159168
def _prepare_chat_history_for_request(
160169
self, chat_history: ChatHistory, role_key: str = "role", content_key: str = "content"
161170
) -> Any:
162-
return apply_template(chat_history, self.template)
171+
if self.template:
172+
return apply_template(chat_history, self.template)
173+
return self.tokenizer.apply_chat_template(
174+
json.dumps(self._chat_messages_to_dicts(chat_history)),
175+
add_generation_prompt=True,
176+
)
177+
178+
def _chat_messages_to_dicts(self, chat_history: "ChatHistory") -> list[dict[str, Any]]:
179+
return [
180+
message.to_dict(role_key="role", content_key="content")
181+
for message in chat_history.messages
182+
if isinstance(message, ChatMessageContent)
183+
]
163184

164-
def _get_images_from_history(self, chat_history: "ChatHistory") -> ImageContent | None:
185+
def _get_images_from_history(self, chat_history: "ChatHistory") -> list[ImageContent] | None:
165186
images = []
166187
for message in chat_history.messages:
167188
for image in message.items:
@@ -174,11 +195,22 @@ def _get_images_from_history(self, chat_history: "ChatHistory") -> ImageContent
174195
raise ServiceInvalidExecutionSettingsError(
175196
"Image Content URI needs to be set, because onnx can only work with file paths"
176197
)
177-
# Currently Onnx Runtime only supports one image
178-
# Later we will add support for multiple images
179-
if len(images) > 1:
180-
raise ServiceInvalidExecutionSettingsError("The model does not support more than one image")
181-
return images[-1] if images else None
198+
return images if len(images) else None
199+
200+
def _get_audios_from_history(self, chat_history: "ChatHistory") -> list[AudioContent] | None:
201+
audios = []
202+
for message in chat_history.messages:
203+
for audio in message.items:
204+
if isinstance(audio, AudioContent):
205+
if not self.enable_multi_modality:
206+
raise ServiceInvalidExecutionSettingsError("The model does not support multi-modality")
207+
if audio.uri:
208+
audios.append(audio)
209+
else:
210+
raise ServiceInvalidExecutionSettingsError(
211+
"Audio Content URI needs to be set, because onnx can only work with file paths"
212+
)
213+
return audios if len(audios) else None
182214

183215
@override
184216
def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"]:

python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from typing import Any
77

88
from semantic_kernel.connectors.ai.onnx.onnx_gen_ai_prompt_execution_settings import OnnxGenAIPromptExecutionSettings
9-
from semantic_kernel.contents import ImageContent
9+
from semantic_kernel.contents import AudioContent, ImageContent
1010
from semantic_kernel.exceptions import ServiceInitializationError, ServiceInvalidResponseError
1111
from semantic_kernel.kernel_pydantic import KernelBaseModel
1212

@@ -50,7 +50,7 @@ def __init__(self, ai_model_path: str, **kwargs) -> None:
5050
tokenizer = OnnxRuntimeGenAi.Tokenizer(model)
5151
tokenizer_stream = tokenizer.create_stream()
5252
except Exception as ex:
53-
raise ServiceInitializationError("Failed to initialize OnnxTextCompletion service", ex) from ex
53+
raise ServiceInitializationError("Failed to initialize OnnxCompletion service", ex) from ex
5454

5555
super().__init__(
5656
model=model,
@@ -64,25 +64,27 @@ async def _generate_next_token_async(
6464
self,
6565
prompt: str,
6666
settings: OnnxGenAIPromptExecutionSettings,
67-
image: ImageContent | None = None,
67+
images: list[ImageContent] | None = None,
68+
audios: list[AudioContent] | None = None,
6869
) -> AsyncGenerator[list[str], Any]:
6970
try:
7071
params = OnnxRuntimeGenAi.GeneratorParams(self.model)
7172
params.set_search_options(**settings.prepare_settings_dict())
73+
generator = OnnxRuntimeGenAi.Generator(self.model, params)
7274
if not self.enable_multi_modality:
7375
input_tokens = self.tokenizer.encode(prompt)
74-
params.input_ids = input_tokens
76+
generator.append_tokens(input_tokens)
7577
else:
76-
if image is not None:
77-
# With the use of Pybind there is currently no way to load images from bytes
78-
# We can only open images from a file path currently
79-
image = OnnxRuntimeGenAi.Images.open(str(image.uri))
80-
input_tokens = self.tokenizer(prompt, images=image)
81-
params.set_inputs(input_tokens)
82-
generator = OnnxRuntimeGenAi.Generator(self.model, params)
78+
# With the use of Pybind in ONNX there is currently no way to load images from bytes
79+
# We can only open images & audios from a file path currently
80+
if images is not None:
81+
images = OnnxRuntimeGenAi.Images.open(*[str(image.uri) for image in images])
82+
if audios is not None:
83+
audios = OnnxRuntimeGenAi.Audios.open(*[str(audio.uri) for audio in audios])
84+
input_tokens = self.tokenizer(prompt, images=images, audios=audios)
85+
generator.set_inputs(input_tokens)
8386

8487
while not generator.is_done():
85-
generator.compute_logits()
8688
generator.generate_next_token()
8789
new_token_choices = [self.tokenizer_stream.decode(token) for token in generator.get_next_tokens()]
8890
yield new_token_choices
@@ -94,10 +96,11 @@ async def _generate_next_token(
9496
self,
9597
prompt: str,
9698
settings: OnnxGenAIPromptExecutionSettings,
97-
image: ImageContent | None = None,
99+
images: list[ImageContent] | None = None,
100+
audios: list[AudioContent] | None = None,
98101
):
99102
token_choices: list[str] = []
100-
async for new_token_choice in self._generate_next_token_async(prompt, settings, image):
103+
async for new_token_choice in self._generate_next_token_async(prompt, settings, images, audios=audios):
101104
# zip only works if the lists are the same length
102105
if len(token_choices) == 0:
103106
token_choices = new_token_choice

python/semantic_kernel/connectors/ai/onnx/utils.py

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from enum import Enum
33

44
from semantic_kernel.contents import AuthorRole, ChatHistory, ImageContent, TextContent
5+
from semantic_kernel.contents.audio_content import AudioContent
56
from semantic_kernel.exceptions import ServiceException, ServiceInvalidRequestError
67

78

@@ -19,6 +20,8 @@ class ONNXTemplate(str, Enum):
1920

2021
PHI3 = "phi3"
2122
PHI3V = "phi3v"
23+
PHI4 = "phi4"
24+
PHI4MM = "phi4mm"
2225
GEMMA = "gemma"
2326
LLAMA = "llama"
2427
NONE = "none"
@@ -39,9 +42,11 @@ def apply_template(history: ChatHistory, template: ONNXTemplate) -> str:
3942
"""
4043
template_functions = {
4144
ONNXTemplate.PHI3: phi3_template,
45+
ONNXTemplate.PHI4: phi4_template,
4246
ONNXTemplate.GEMMA: gemma_template,
4347
ONNXTemplate.LLAMA: llama_template,
4448
ONNXTemplate.PHI3V: phi3v_template,
49+
ONNXTemplate.PHI4MM: phi4mm_template,
4550
ONNXTemplate.NONE: lambda text: text,
4651
}
4752

@@ -67,6 +72,22 @@ def phi3_template(history: ChatHistory) -> str:
6772
return phi3_input
6873

6974

75+
def phi4_template(history: ChatHistory) -> str:
76+
"""Generates a formatted string from the chat history for use with the phi4 model.
77+
78+
Args:
79+
history (ChatHistory): An object containing the chat history with a list of messages.
80+
81+
Returns:
82+
str: A formatted string where each message is prefixed with the role and suffixed with an end marker.
83+
"""
84+
phi4_input = ""
85+
for message in history.messages:
86+
phi4_input += f"<|{message.role.value}|>\n{message.content}<|end|>\n"
87+
phi4_input += "<|assistant|>\n"
88+
return phi4_input
89+
90+
7091
def phi3v_template(history: ChatHistory) -> str:
7192
"""Generates a formatted string from a given chat history for use with the phi3v model.
7293
@@ -78,22 +99,56 @@ def phi3v_template(history: ChatHistory) -> str:
7899
the role of each message (system, user, assistant) and the type of content (text, image).
79100
"""
80101
phi3v_input = ""
102+
image_count = 0
81103
for message in history.messages:
82104
if message.role == AuthorRole.SYSTEM:
83105
phi3v_input += f"<|system|>\n{message.content}<|end|>\n"
84106
if message.role == AuthorRole.USER:
85107
for item in message.items:
86108
if isinstance(item, TextContent):
87109
phi3v_input += f"<|user|>\n{item.text}<|end|>\n"
88-
# Currently only one image is supported in Onnx
89110
if isinstance(item, ImageContent):
90-
phi3v_input += "<|image_1|>\n"
111+
phi3v_input += f"<|image_{image_count + 1}|>\n"
112+
image_count += 1
91113
if message.role == AuthorRole.ASSISTANT:
92114
phi3v_input += f"<|assistant|>\n{message.content}<|end|>\n"
93115
phi3v_input += "<|assistant|>\n"
94116
return phi3v_input
95117

96118

119+
def phi4mm_template(history: ChatHistory) -> str:
120+
"""Generates a formatted string from a given chat history for use with the phi4mm model.
121+
122+
Args:
123+
history (ChatHistory): An object containing the chat history with messages.
124+
125+
Returns:
126+
str: A formatted string representing the chat history, with special tokens indicating
127+
the role of each message (system, user, assistant) and the type of content (text, image).
128+
"""
129+
phi4mm_input = ""
130+
image_count = 0
131+
audio_count = 0
132+
for message in history.messages:
133+
if message.role == AuthorRole.SYSTEM:
134+
phi4mm_input += f"<|system|>\n{message.content}<|end|>\n"
135+
if message.role == AuthorRole.USER:
136+
for item in message.items:
137+
if isinstance(item, TextContent):
138+
phi4mm_input += f"<|user|>\n{item.text}<|end|>\n"
139+
# Currently only one image is supported in Onnx
140+
if isinstance(item, ImageContent):
141+
phi4mm_input += f"<|image_{image_count + 1}|>\n"
142+
image_count += 1
143+
if isinstance(item, AudioContent):
144+
phi4mm_input += f"<|audio_{audio_count + 1}|>\n"
145+
audio_count += 1
146+
if message.role == AuthorRole.ASSISTANT:
147+
phi4mm_input += f"<|assistant|>\n{message.content}<|end|>\n"
148+
phi4mm_input += "<|assistant|>\n"
149+
return phi4mm_input
150+
151+
97152
def gemma_template(history: ChatHistory) -> str:
98153
"""Generates a formatted string for the Gemma model based on the provided chat history.
99154

python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,9 @@ def test_onnx_chat_completion_with_invalid_model():
5252
)
5353

5454

55-
def test_onnx_chat_completion_without_prompt_template():
56-
with pytest.raises(TypeError):
55+
@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config_vision))
56+
def test_onnx_chat_completion_with_multimodality_without_prompt_template(gen_ai_config_vision):
57+
with pytest.raises(ServiceInitializationError):
5758
OnnxGenAIChatCompletion()
5859

5960

@@ -147,7 +148,7 @@ def patch_open(*args, **kwargs):
147148
)
148149

149150
last_image = chat_completion._get_images_from_history(history)
150-
assert last_image == image_content
151+
assert last_image == [image_content]
151152

152153

153154
@patch("onnxruntime_genai.Model")

0 commit comments

Comments
 (0)