Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changeset/plenty-forks-double.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
'@ai-sdk/provider': patch
'@ai-sdk/openai': patch
'ai': patch
---

feat: expose usage tokens for 'generateImage' function
1 change: 1 addition & 0 deletions examples/ai-core/src/generate-image/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ async function main() {
console.log({
prompt,
revisedPrompt,
usage: result.usage,
});

await presentImages([result.image]);
Expand Down
6 changes: 6 additions & 0 deletions packages/ai/src/generate-image/generate-image-result.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import {
ImageModelProviderMetadata,
} from '../types/image-model';
import { ImageModelResponseMetadata } from '../types/image-model-response-metadata';
import { ImageModelUsage } from '../types/usage';

/**
The result of a `generateImage` call.
Expand Down Expand Up @@ -35,4 +36,9 @@ Response metadata from the provider. There may be multiple responses if we made
* results that can be fully encapsulated in the provider.
*/
readonly providerMetadata: ImageModelProviderMetadata;

/**
Combined token usage across all underlying provider calls for this image generation.
*/
readonly usage: ImageModelUsage;
}
82 changes: 82 additions & 0 deletions packages/ai/src/generate-image/generate-image.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -651,4 +651,86 @@ describe('generateImage', () => {
},
});
});

it('should expose empty usage when provider does not report usage', async () => {
const result = await generateImage({
model: new MockImageModelV3({
doGenerate: async () =>
createMockResponse({
images: [pngBase64],
}),
}),
prompt,
});

expect(result.usage).toStrictEqual({
inputTokens: undefined,
outputTokens: undefined,
totalTokens: undefined,
});
});

it('should aggregate usage across multiple provider calls', async () => {
let callCount = 0;

const result = await generateImage({
model: new MockImageModelV3({
maxImagesPerCall: 1,
doGenerate: async () => {
switch (callCount++) {
case 0:
return {
images: [pngBase64],
warnings: [],
providerMetadata: {
testProvider: { images: [null] },
},
response: {
timestamp: new Date(),
modelId: 'mock-model-id',
headers: {},
},
usage: {
inputTokens: 10,
outputTokens: 0,
totalTokens: 10,
},
};
case 1:
return {
images: [jpegBase64],
warnings: [],
providerMetadata: {
testProvider: { images: [null] },
},
response: {
timestamp: new Date(),
modelId: 'mock-model-id',
headers: {},
},
usage: {
inputTokens: 5,
outputTokens: 0,
totalTokens: 5,
},
};
default:
throw new Error('Unexpected call');
}
},
}),
prompt,
n: 2,
});

expect(result.images.map(image => image.base64)).toStrictEqual([
pngBase64,
jpegBase64,
]);
expect(result.usage).toStrictEqual({
inputTokens: 15,
outputTokens: 0,
totalTokens: 15,
});
});
});
14 changes: 14 additions & 0 deletions packages/ai/src/generate-image/generate-image.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import { ImageModelResponseMetadata } from '../types/image-model-response-metada
import { GenerateImageResult } from './generate-image-result';
import { logWarnings } from '../logger/log-warnings';
import { VERSION } from '../version';
import { addImageModelUsage, ImageModelUsage } from '../types/usage';

/**
Generates images using an image model.
Expand Down Expand Up @@ -172,6 +173,11 @@ Only applicable for HTTP-based providers.
const warnings: Array<ImageGenerationWarning> = [];
const responses: Array<ImageModelResponseMetadata> = [];
const providerMetadata: ImageModelV3ProviderMetadata = {};
let totalUsage: ImageModelUsage = {
inputTokens: undefined,
outputTokens: undefined,
totalTokens: undefined,
};
for (const result of results) {
images.push(
...result.images.map(
Expand All @@ -188,6 +194,10 @@ Only applicable for HTTP-based providers.
);
warnings.push(...result.warnings);

if (result.usage != null) {
totalUsage = addImageModelUsage(totalUsage, result.usage);
}

if (result.providerMetadata) {
for (const [providerName, metadata] of Object.entries<{
images: unknown;
Expand All @@ -213,6 +223,7 @@ Only applicable for HTTP-based providers.
warnings,
responses,
providerMetadata,
usage: totalUsage,
});
}

Expand All @@ -221,17 +232,20 @@ class DefaultGenerateImageResult implements GenerateImageResult {
readonly warnings: Array<ImageGenerationWarning>;
readonly responses: Array<ImageModelResponseMetadata>;
readonly providerMetadata: ImageModelV3ProviderMetadata;
readonly usage: ImageModelUsage;

constructor(options: {
images: Array<GeneratedFile>;
warnings: Array<ImageGenerationWarning>;
responses: Array<ImageModelResponseMetadata>;
providerMetadata: ImageModelV3ProviderMetadata;
usage: ImageModelUsage;
}) {
this.images = options.images;
this.warnings = options.warnings;
this.responses = options.responses;
this.providerMetadata = options.providerMetadata;
this.usage = options.usage;
}

get image() {
Expand Down
20 changes: 20 additions & 0 deletions packages/ai/src/types/usage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,23 @@ function addTokenCounts(
? undefined
: (tokenCount1 ?? 0) + (tokenCount2 ?? 0);
}

/**
Usage information for an image model call.
*/
export type ImageModelUsage = {
inputTokens: number | undefined;
outputTokens: number | undefined;
totalTokens: number | undefined;
};

export function addImageModelUsage(
usage1: ImageModelUsage,
usage2: ImageModelUsage,
): ImageModelUsage {
return {
inputTokens: addTokenCounts(usage1.inputTokens, usage2.inputTokens),
outputTokens: addTokenCounts(usage1.outputTokens, usage2.outputTokens),
totalTokens: addTokenCounts(usage1.totalTokens, usage2.totalTokens),
};
}
13 changes: 13 additions & 0 deletions packages/openai/src/image/openai-image-api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,19 @@ export const openaiImageResponseSchema = lazySchema(() =>
revised_prompt: z.string().nullish(),
}),
),
usage: z
.object({
input_tokens: z.number().nullish(),
output_tokens: z.number().nullish(),
total_tokens: z.number().nullish(),
input_tokens_details: z
.object({
image_tokens: z.number().nullish(),
text_tokens: z.number().nullish(),
})
.nullish(),
})
.nullish(),
}),
),
);
38 changes: 38 additions & 0 deletions packages/openai/src/image/openai-image-model.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -311,4 +311,42 @@ describe('doGenerate', () => {
},
});
});

it('should map OpenAI usage to usage', async () => {
server.urls['https://api.openai.com/v1/images/generations'].response = {
type: 'json-value',
body: {
created: 1733837122,
data: [
{
b64_json: 'base64-image-1',
},
],
usage: {
input_tokens: 12,
output_tokens: 0,
total_tokens: 12,
input_tokens_details: {
image_tokens: 7,
text_tokens: 5,
},
},
},
};

const result = await provider.image('gpt-image-1').doGenerate({
prompt,
n: 1,
size: '1024x1024',
aspectRatio: undefined,
seed: undefined,
providerOptions: {},
});

expect(result.usage).toStrictEqual({
inputTokens: 12,
outputTokens: 0,
totalTokens: 12,
});
});
});
8 changes: 8 additions & 0 deletions packages/openai/src/image/openai-image-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,14 @@ export class OpenAIImageModel implements ImageModelV3 {
return {
images: response.data.map(item => item.b64_json),
warnings,
usage:
response.usage != null
? {
inputTokens: response.usage.input_tokens ?? undefined,
outputTokens: response.usage.output_tokens ?? undefined,
totalTokens: response.usage.total_tokens ?? undefined,
}
: undefined,
response: {
timestamp: currentDate,
modelId: this.modelId,
Expand Down
19 changes: 19 additions & 0 deletions packages/provider/src/image-model/v3/image-model-v3-usage.ts
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could reasoningTokens and/or cachedInputTokens be relevant for images?

Compare

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i had initially added those 2 fields in the spec but updated it since OpenAI didn't return them. For the sake of consistency however, we can still add them but ultimately it would depend on if any provider returns that data

Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/**
Usage information for an image model call.
*/
export type ImageModelV3Usage = {
/**
The number of input (prompt) tokens used.
*/
inputTokens: number | undefined;

/**
The number of output tokens used, if reported by the provider.
*/
outputTokens: number | undefined;

/**
The total number of tokens as reported by the provider.
*/
totalTokens: number | undefined;
};
6 changes: 6 additions & 0 deletions packages/provider/src/image-model/v3/image-model-v3.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { JSONArray, JSONValue } from '../../json-value';
import { ImageModelV3Usage } from './image-model-v3-usage';
import { ImageModelV3CallOptions } from './image-model-v3-call-options';
import { ImageModelV3CallWarning } from './image-model-v3-call-warning';

Expand Down Expand Up @@ -100,5 +101,10 @@ Response headers.
*/
headers: Record<string, string> | undefined;
};

/**
Optional token usage for the image generation call (if the provider reports it).
*/
usage?: ImageModelV3Usage;
}>;
};
Loading