Add OpenAI docs and integration tests

christinaexyou · christinaexyou · commit 416ac3985a1f · 2025-12-02T16:47:36.000-05:00
diff --git a/docs/user-guides/community/openai.md b/docs/user-guides/community/openai.md
@@ -0,0 +1,16 @@
+## OpenAI API Compatibility for NeMo Guardrails
+
+NeMo Guardrails provides server-side compatibility with OpenAI API endpoints, enabling applications that use OpenAI clients to seamlessly integrate with NeMo Guardrails for adding guardrails to LLM interactions. Point your OpenAI client to `http://localhost:8000` (or your server URL) and use the standard `/v1/chat/completions` endpoint.
+
+## Feature Support Matrix
+
+The following table outlines which OpenAI API features are currently supported when using NeMo Guardrails:
+
+| Feature | Status | Notes |
+| :------ | :----: | :---- |
+| **Basic Chat Completion** | ✔ Supported | Full support for standard chat completions with guardrails applied |
+| **Streaming Responses** | ✔ Supported | Server-Sent Events (SSE) streaming with `stream=true` |
+| **Multimodal Input** | ✖ Unsupported | Support for text and image inputs (vision models) with guardrails but not yet OpenAI compatible  |
+| **Function Calling** | ✖ Unsupported | Not yet implemented; guardrails need structured output support |
+| **Tools** | ✖ Unsupported | Related to function calling; requires action flow integration |
+| **Response Format (JSON Mode)** | ✖ Unsupported | Structured output with guardrails requires additional validation logic |
diff --git a/nemoguardrails/server/api.py b/nemoguardrails/server/api.py
@@ -28,7 +28,8 @@
 
 from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
-from openai.types.chat.chat_completion import ChatCompletion, Choice
+from openai.types.chat.chat_completion import Choice
+from openai.types.chat.chat_completion_message import ChatCompletionMessage
 from openai.types.model import Model
 from pydantic import BaseModel, Field, root_validator, validator
 from starlette.responses import StreamingResponse
@@ -191,7 +192,7 @@ async def root_handler():
 app.single_config_id = None
 
 
-class RequestBody(ChatCompletion):
+class RequestBody(BaseModel):
     config_id: Optional[str] = Field(
         default=os.getenv("DEFAULT_CONFIG_ID", None),
         description="The id of the configuration to be used. If not set, the default configuration will be used.",
@@ -208,70 +209,67 @@ class RequestBody(ChatCompletion):
         max_length=255,
         description="The id of an existing thread to which the messages should be added.",
     )
-    model: Optional[str] = Field(
-        default=None,
-        description="The model used for the chat completion.",
+    messages: Optional[List[dict]] = Field(
+        default=None, description="The list of messages in the current conversation."
     )
-    id: Optional[str] = Field(
+    context: Optional[dict] = Field(
         default=None,
-        description="The id of the chat completion.",
+        description="Additional context data to be added to the conversation.",
+    )
+    stream: Optional[bool] = Field(
+        default=False,
+        description="If set, partial message deltas will be sent, like in ChatGPT. "
+        "Tokens will be sent as data-only server-sent events as they become "
+        "available, with the stream terminated by a data: [DONE] message.",
     )
-    object: Optional[str] = Field(
-        default="chat.completion",
-        description="The object type, which is always chat.completion",
+    options: GenerationOptions = Field(
+        default_factory=GenerationOptions,
+        description="Additional options for controlling the generation.",
     )
-    created: Optional[int] = Field(
+    state: Optional[dict] = Field(
         default=None,
-        description="The Unix timestamp (in seconds) of when the chat completion was created.",
+        description="A state object that should be used to continue the interaction.",
     )
-    choices: Optional[List[Choice]] = Field(
+    # Standard OpenAI completion parameters
+    model: Optional[str] = Field(
         default=None,
-        description="The list of choices for the chat completion.",
+        description="The model to use for chat completion. Maps to config_id for backward compatibility.",
     )
     max_tokens: Optional[int] = Field(
         default=None,
         description="The maximum number of tokens to generate.",
     )
     temperature: Optional[float] = Field(
         default=None,
-        description="The temperature to use for the chat completion.",
+        description="Sampling temperature to use.",
     )
     top_p: Optional[float] = Field(
         default=None,
-        description="The top p to use for the chat completion.",
+        description="Top-p sampling parameter.",
     )
-    stop: Optional[Union[str, List[str]]] = Field(
+    stop: Optional[str] = Field(
         default=None,
-        description="The stop sequences to use for the chat completion.",
+        description="Stop sequences.",
     )
     presence_penalty: Optional[float] = Field(
         default=None,
-        description="The presence penalty to use for the chat completion.",
+        description="Presence penalty parameter.",
     )
     frequency_penalty: Optional[float] = Field(
         default=None,
-        description="The frequency penalty to use for the chat completion.",
+        description="Frequency penalty parameter.",
     )
-    messages: Optional[List[dict]] = Field(
-        default=None, description="The list of messages in the current conversation."
-    )
-    context: Optional[dict] = Field(
+    function_call: Optional[dict] = Field(
         default=None,
-        description="Additional context data to be added to the conversation.",
+        description="Function call parameter.",
     )
-    stream: Optional[bool] = Field(
-        default=False,
-        description="If set, partial message deltas will be sent, like in ChatGPT. "
-        "Tokens will be sent as data-only server-sent events as they become "
-        "available, with the stream terminated by a data: [DONE] message.",
-    )
-    options: GenerationOptions = Field(
-        default_factory=GenerationOptions,
-        description="Additional options for controlling the generation.",
+    logit_bias: Optional[dict] = Field(
+        default=None,
+        description="Logit bias parameter.",
     )
-    state: Optional[dict] = Field(
+    log_probs: Optional[bool] = Field(
         default=None,
-        description="A state object that should be used to continue the interaction.",
+        description="Log probabilities parameter.",
     )
 
     @root_validator(pre=True)
@@ -453,7 +451,7 @@ async def _format_streaming_response(
                 "choices": [
                     {
                         "delta": chunk,
-                        "index": None,
+                        "index": 0,
                         "finish_reason": None,
                     }
                 ],
@@ -472,7 +470,7 @@ async def _format_streaming_response(
                     "choices": [
                         {
                             "delta": {"content": chunk},
-                            "index": None,
+                            "index": 0,
                             "finish_reason": None,
                         }
                     ],
@@ -487,7 +485,7 @@ async def _format_streaming_response(
                 "choices": [
                     {
                         "delta": {"content": str(chunk)},
-                        "index": None,
+                        "index": 0,
                         "finish_reason": None,
                     }
                 ],
@@ -536,16 +534,16 @@ async def chat_completion(body: RequestBody, request: Request):
             id=f"chatcmpl-{uuid.uuid4()}",
             object="chat.completion",
             created=int(time.time()),
-            model=config_ids[0] if config_ids else None,
+            model=config_ids[0] if config_ids else "unknown",
             choices=[
                 Choice(
                     index=0,
-                    message={
-                        "content": f"Could not load the {config_ids} guardrails configuration. "
+                    message=ChatCompletionMessage(
+                        content=f"Could not load the {config_ids} guardrails configuration. "
                         f"An internal error has occurred.",
-                        "role": "assistant",
-                    },
-                    finish_reason="error",
+                        role="assistant",
+                    ),
+                    finish_reason="stop",
                     logprobs=None,
                 )
             ],
@@ -569,15 +567,15 @@ async def chat_completion(body: RequestBody, request: Request):
                     id=f"chatcmpl-{uuid.uuid4()}",
                     object="chat.completion",
                     created=int(time.time()),
-                    model=None,
+                    model=config_ids[0] if config_ids else "unknown",
                     choices=[
                         Choice(
                             index=0,
-                            message={
-                                "content": "The `thread_id` must have a minimum length of 16 characters.",
-                                "role": "assistant",
-                            },
-                            finish_reason="error",
+                            message=ChatCompletionMessage(
+                                content="The `thread_id` must have a minimum length of 16 characters.",
+                                role="assistant",
+                            ),
+                            finish_reason="stop",
                             logprobs=None,
                         )
                     ],
@@ -661,11 +659,14 @@ async def chat_completion(body: RequestBody, request: Request):
                 "id": f"chatcmpl-{uuid.uuid4()}",
                 "object": "chat.completion",
                 "created": int(time.time()),
-                "model": config_ids[0] if config_ids else None,
+                "model": config_ids[0] if config_ids else "unknown",
                 "choices": [
                     Choice(
                         index=0,
-                        message=bot_message,
+                        message=ChatCompletionMessage(
+                            role="assistant",
+                            content=bot_message["content"],
+                        ),
                         finish_reason="stop",
                         logprobs=None,
                     )
@@ -687,15 +688,15 @@ async def chat_completion(body: RequestBody, request: Request):
             id=f"chatcmpl-{uuid.uuid4()}",
             object="chat.completion",
             created=int(time.time()),
-            model=None,
+            model="unknown",
             choices=[
                 Choice(
                     index=0,
-                    message={
-                        "content": "Internal server error",
-                        "role": "assistant",
-                    },
-                    finish_reason="error",
+                    message=ChatCompletionMessage(
+                        content="Internal server error",
+                        role="assistant",
+                    ),
+                    finish_reason="stop",
                     logprobs=None,
                 )
             ],
diff --git a/nemoguardrails/server/schemas/openai.py b/nemoguardrails/server/schemas/openai.py
@@ -17,7 +17,7 @@
 
 from typing import List, Optional
 
-from openai.types.chat.chat_completion import ChatCompletion, Choice
+from openai.types.chat.chat_completion import ChatCompletion
 from openai.types.model import Model
 from pydantic import BaseModel, Field
 
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -71,13 +71,13 @@ starlette = ">=0.49.1"
 typer = ">=0.8"
 uvicorn = ">=0.23"
 watchdog = ">=3.0.0,"
+aiofiles = ">=24.1.0"
+openai = ">=1.0.0, <2.0.0"
 
 # tracing
 opentelemetry-api = { version = ">=1.27.0,<2.0.0", optional = true }
-aiofiles = { version = ">=24.1.0", optional = true }
 
 # openai
-openai = { version = ">=1.0.0, <2.0.0", optional = true }
 langchain-openai = { version = ">=0.1.0", optional = true }
 
 # eval
@@ -111,7 +111,7 @@ sdd = ["presidio-analyzer", "presidio-anonymizer"]
 eval = ["tqdm", "numpy", "streamlit", "tornado"]
 openai = ["langchain-openai"]
 gcp = ["google-cloud-language"]
-tracing = ["opentelemetry-api", "aiofiles"]
+tracing = ["opentelemetry-api"]
 nvidia = ["langchain-nvidia-ai-endpoints"]
 jailbreak = ["yara-python"]
 # Poetry does not support recursive dependencies, so we need to add all the dependencies here.
@@ -126,7 +126,6 @@ all = [
   "langchain-openai",
   "google-cloud-language",
   "opentelemetry-api",
-  "aiofiles",
   "langchain-nvidia-ai-endpoints",
   "yara-python",
 ]
diff --git a/tests/test_api.py b/tests/test_api.py
@@ -480,7 +480,7 @@ async def collector():
 
     choice = j["choices"][0]
     assert "delta" in choice
-    assert choice["index"] is None
+    assert choice["index"] == 0
     assert choice["finish_reason"] is None
 
 
diff --git a/tests/test_openai_integration.py b/tests/test_openai_integration.py