From aef49c333f5435489d0d2f646d1e86f99179705e Mon Sep 17 00:00:00 2001
From: Annie Ehler <annie.ehler.4@gmail.com>
Date: Fri, 21 Nov 2025 15:16:35 -0800
Subject: [PATCH 1/3] Non-OpenAI APIs that implement a simmilar interface don't
 always include the `output[].content[].annotations` path

---
 async-openai/src/types/responses/response.rs | 1 +
 1 file changed, 1 insertion(+)
diff --git a/async-openai/src/types/responses/response.rs b/async-openai/src/types/responses/response.rs
index 62b548e2..6cfa1fa8 100644
--- a/async-openai/src/types/responses/response.rs
+++ b/async-openai/src/types/responses/response.rs
@@ -1522,6 +1522,7 @@ pub struct ResponseLogProb {
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct OutputTextContent {
     /// The annotations of the text output.
+    #[serde(default)]
     pub annotations: Vec<Annotation>,
     pub logprobs: Option<Vec<LogProb>>,
     /// The text output from the model.

From 2e58310df6d3f65a6fd95f6f820695ba23a07a8f Mon Sep 17 00:00:00 2001
From: Annie Ehler <annie.ehler.4@gmail.com>
Date: Fri, 21 Nov 2025 15:42:47 -0800
Subject: [PATCH 2/3] The LM Studio API doesn't always output token information

---
 async-openai/src/types/responses/response.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/async-openai/src/types/responses/response.rs b/async-openai/src/types/responses/response.rs
index 6cfa1fa8..ceeffdc3 100644
--- a/async-openai/src/types/responses/response.rs
+++ b/async-openai/src/types/responses/response.rs
@@ -2432,21 +2432,22 @@ pub struct MCPApprovalRequest {
     pub server_label: String,
 }
 
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
 pub struct InputTokenDetails {
     /// The number of tokens that were retrieved from the cache.
     /// [More on prompt caching](https://platform.openai.com/docs/guides/prompt-caching).
     pub cached_tokens: u32,
 }
 
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
 pub struct OutputTokenDetails {
     /// The number of reasoning tokens.
     pub reasoning_tokens: u32,
 }
 
 /// Usage statistics for a response.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
+#[serde(default)]
 pub struct ResponseUsage {
     /// The number of input tokens.
     pub input_tokens: u32,

From fc08fd47a9eaf7f5b6745ce32308e4f2a64f08a8 Mon Sep 17 00:00:00 2001
From: Annie Ehler <annie.ehler.4@gmail.com>
Date: Wed, 26 Nov 2025 12:45:41 -0800
Subject: [PATCH 3/3] Move the ResponseUsage struct to match upstream

---
 async-openai/src/types/responses/response.rs  | 29 -------------------
 .../src/types/shared/response_usage.rs        |  7 +++--
 2 files changed, 4 insertions(+), 32 deletions(-)

diff --git a/async-openai/src/types/responses/response.rs b/async-openai/src/types/responses/response.rs
index 8067ef29..64eb420f 100644
--- a/async-openai/src/types/responses/response.rs
+++ b/async-openai/src/types/responses/response.rs
@@ -2350,35 +2350,6 @@ pub struct MCPApprovalRequest {
     pub server_label: String,
 }
 
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
-pub struct InputTokenDetails {
-    /// The number of tokens that were retrieved from the cache.
-    /// [More on prompt caching](https://platform.openai.com/docs/guides/prompt-caching).
-    pub cached_tokens: u32,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
-pub struct OutputTokenDetails {
-    /// The number of reasoning tokens.
-    pub reasoning_tokens: u32,
-}
-
-/// Usage statistics for a response.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
-#[serde(default)]
-pub struct ResponseUsage {
-    /// The number of input tokens.
-    pub input_tokens: u32,
-    /// A detailed breakdown of the input tokens.
-    pub input_tokens_details: InputTokenDetails,
-    /// The number of output tokens.
-    pub output_tokens: u32,
-    /// A detailed breakdown of the output tokens.
-    pub output_tokens_details: OutputTokenDetails,
-    /// The total number of tokens used.
-    pub total_tokens: u32,
-}
-
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged)]
 pub enum Instructions {
diff --git a/async-openai/src/types/shared/response_usage.rs b/async-openai/src/types/shared/response_usage.rs
index 92fc99da..09cd7bb7 100644
--- a/async-openai/src/types/shared/response_usage.rs
+++ b/async-openai/src/types/shared/response_usage.rs
@@ -1,20 +1,21 @@
 use serde::{Deserialize, Serialize};
 
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
 pub struct InputTokenDetails {
     /// The number of tokens that were retrieved from the cache.
     /// [More on prompt caching](https://platform.openai.com/docs/guides/prompt-caching).
     pub cached_tokens: u32,
 }
 
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
 pub struct OutputTokenDetails {
     /// The number of reasoning tokens.
     pub reasoning_tokens: u32,
 }
 
 /// Usage statistics for a response.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
+#[serde(default)]
 pub struct ResponseUsage {
     /// The number of input tokens.
     pub input_tokens: u32,