From aef49c333f5435489d0d2f646d1e86f99179705e Mon Sep 17 00:00:00 2001 From: Annie Ehler Date: Fri, 21 Nov 2025 15:16:35 -0800 Subject: [PATCH 1/3] Non-OpenAI APIs that implement a simmilar interface don't always include the `output[].content[].annotations` path --- async-openai/src/types/responses/response.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/async-openai/src/types/responses/response.rs b/async-openai/src/types/responses/response.rs index 62b548e2..6cfa1fa8 100644 --- a/async-openai/src/types/responses/response.rs +++ b/async-openai/src/types/responses/response.rs @@ -1522,6 +1522,7 @@ pub struct ResponseLogProb { #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct OutputTextContent { /// The annotations of the text output. + #[serde(default)] pub annotations: Vec, pub logprobs: Option>, /// The text output from the model. From 2e58310df6d3f65a6fd95f6f820695ba23a07a8f Mon Sep 17 00:00:00 2001 From: Annie Ehler Date: Fri, 21 Nov 2025 15:42:47 -0800 Subject: [PATCH 2/3] The LM Studio API doesn't always output token information --- async-openai/src/types/responses/response.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/async-openai/src/types/responses/response.rs b/async-openai/src/types/responses/response.rs index 6cfa1fa8..ceeffdc3 100644 --- a/async-openai/src/types/responses/response.rs +++ b/async-openai/src/types/responses/response.rs @@ -2432,21 +2432,22 @@ pub struct MCPApprovalRequest { pub server_label: String, } -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] pub struct InputTokenDetails { /// The number of tokens that were retrieved from the cache. /// [More on prompt caching](https://platform.openai.com/docs/guides/prompt-caching). pub cached_tokens: u32, } -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] pub struct OutputTokenDetails { /// The number of reasoning tokens. pub reasoning_tokens: u32, } /// Usage statistics for a response. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(default)] pub struct ResponseUsage { /// The number of input tokens. pub input_tokens: u32, From fc08fd47a9eaf7f5b6745ce32308e4f2a64f08a8 Mon Sep 17 00:00:00 2001 From: Annie Ehler Date: Wed, 26 Nov 2025 12:45:41 -0800 Subject: [PATCH 3/3] Move the ResponseUsage struct to match upstream --- async-openai/src/types/responses/response.rs | 29 ------------------- .../src/types/shared/response_usage.rs | 7 +++-- 2 files changed, 4 insertions(+), 32 deletions(-) diff --git a/async-openai/src/types/responses/response.rs b/async-openai/src/types/responses/response.rs index 8067ef29..64eb420f 100644 --- a/async-openai/src/types/responses/response.rs +++ b/async-openai/src/types/responses/response.rs @@ -2350,35 +2350,6 @@ pub struct MCPApprovalRequest { pub server_label: String, } -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] -pub struct InputTokenDetails { - /// The number of tokens that were retrieved from the cache. - /// [More on prompt caching](https://platform.openai.com/docs/guides/prompt-caching). - pub cached_tokens: u32, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] -pub struct OutputTokenDetails { - /// The number of reasoning tokens. - pub reasoning_tokens: u32, -} - -/// Usage statistics for a response. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] -#[serde(default)] -pub struct ResponseUsage { - /// The number of input tokens. - pub input_tokens: u32, - /// A detailed breakdown of the input tokens. - pub input_tokens_details: InputTokenDetails, - /// The number of output tokens. - pub output_tokens: u32, - /// A detailed breakdown of the output tokens. - pub output_tokens_details: OutputTokenDetails, - /// The total number of tokens used. - pub total_tokens: u32, -} - #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[serde(untagged)] pub enum Instructions { diff --git a/async-openai/src/types/shared/response_usage.rs b/async-openai/src/types/shared/response_usage.rs index 92fc99da..09cd7bb7 100644 --- a/async-openai/src/types/shared/response_usage.rs +++ b/async-openai/src/types/shared/response_usage.rs @@ -1,20 +1,21 @@ use serde::{Deserialize, Serialize}; -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] pub struct InputTokenDetails { /// The number of tokens that were retrieved from the cache. /// [More on prompt caching](https://platform.openai.com/docs/guides/prompt-caching). pub cached_tokens: u32, } -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] pub struct OutputTokenDetails { /// The number of reasoning tokens. pub reasoning_tokens: u32, } /// Usage statistics for a response. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(default)] pub struct ResponseUsage { /// The number of input tokens. pub input_tokens: u32,