diff --git a/app/extension/src/__tests__/AssistantMessage.test.tsx b/app/extension/src/__tests__/AssistantMessage.test.tsx new file mode 100644 index 0000000..2b9eb41 --- /dev/null +++ b/app/extension/src/__tests__/AssistantMessage.test.tsx @@ -0,0 +1,231 @@ +/** @jest-environment jsdom */ + +/// + +import { afterEach, describe, expect, it, jest } from "@jest/globals"; +import { act } from "react-dom/test-utils"; +import { createRoot } from "react-dom/client"; + +import { AssistantMessage } from "../sidepanel/components/AssistantMessage"; +import type { ChatMessage } from "../sidepanel/types"; + +jest.mock("../sidepanel/components/AssistantStatusCard", () => ({ + AssistantStatusCard: () => null, +})); + +jest.mock("../sidepanel/components/IconButton", () => ({ + IconButton: ({ + children, + onClick, + }: { + children: React.ReactNode; + onClick?: () => void; + }) => { + const React = require("react"); + return React.createElement("button", { type: "button", onClick }, children); + }, +})); + +jest.mock("../sidepanel/components/LinkCardsBlock", () => ({ + LinkCardsBlock: () => null, +})); + +jest.mock("../sidepanel/components/MarkdownContent", () => ({ + MarkdownContent: ({ text }: { text: string }) => { + const React = require("react"); + return React.createElement("div", null, text); + }, +})); + +jest.mock("../sidepanel/components/MessageFooter", () => ({ + MessageFooter: ({ children }: { children: React.ReactNode }) => { + const React = require("react"); + return React.createElement("div", null, children); + }, +})); + +jest.mock("../sidepanel/components/ReasoningBlock", () => ({ + ReasoningBlock: ({ text }: { text: string }) => { + const React = require("react"); + return React.createElement("div", null, text); + }, +})); + +jest.mock("../sidepanel/components/ToolCallBlock", () => ({ + ToolCallBlock: () => null, +})); + +jest.mock("../i18n", () => ({ + useI18n: () => ({ + t: (key: string) => key, + }), +})); + +( + globalThis as typeof globalThis & { + IS_REACT_ACT_ENVIRONMENT?: boolean; + } +).IS_REACT_ACT_ENVIRONMENT = true; + +function renderAssistantMessage( + props: Partial> = {} +) { + const container = document.createElement("div"); + document.body.appendChild(container); + const root = createRoot(container); + const message: ChatMessage = { + id: "assistant-1", + role: "assistant", + parts: [], + status: "running", + }; + + act(() => { + root.render( + + ); + }); + + return { + container, + cleanup: () => { + act(() => root.unmount()); + container.remove(); + }, + }; +} + +describe("AssistantMessage", () => { + afterEach(() => { + document.body.innerHTML = ""; + }); + + it("shows a preparing response indicator before assistant text arrives", () => { + const { container, cleanup } = renderAssistantMessage(); + const indicator = container.querySelector('[role="status"]'); + + expect(indicator?.getAttribute("aria-label")).toBe("common.loading"); + expect(container.querySelectorAll(".claude-dot")).toHaveLength(3); + expect(indicator?.className).not.toContain("rounded-full"); + expect(indicator?.className).not.toContain("border"); + + cleanup(); + }); + + it("keeps the preparing indicator visible for a leading step-start part", () => { + const { container, cleanup } = renderAssistantMessage({ + message: { + id: "assistant-2", + role: "assistant", + parts: [{ type: "step-start" }], + status: "running", + }, + }); + + expect(container.querySelector('[role="status"]')).not.toBeNull(); + expect(container.querySelectorAll(".claude-dot")).toHaveLength(3); + + cleanup(); + }); + + it("keeps the preparing indicator visible while reasoning is streaming", () => { + const { container, cleanup } = renderAssistantMessage({ + message: { + id: "assistant-3", + role: "assistant", + parts: [{ type: "reasoning", text: "Thinking", streaming: true }], + status: "running", + }, + thinkingMode: true, + }); + + expect(container.querySelector('[role="status"]')).not.toBeNull(); + expect(container.textContent).toContain("Thinking"); + + cleanup(); + }); + + it("keeps the preparing indicator visible while a tool call is running", () => { + const { container, cleanup } = renderAssistantMessage({ + message: { + id: "assistant-4", + role: "assistant", + parts: [ + { + type: "tool-call", + toolCallId: "tool-1", + toolName: "search_web", + args: { query: "huntly" }, + }, + ], + status: "running", + }, + }); + + expect(container.querySelector('[role="status"]')).not.toBeNull(); + + cleanup(); + }); + + it("shows the preparing indicator again after earlier text when a tool call starts", () => { + const { container, cleanup } = renderAssistantMessage({ + message: { + id: "assistant-5", + role: "assistant", + parts: [ + { type: "text", text: "先给你一个结论。" }, + { + type: "tool-call", + toolCallId: "tool-2", + toolName: "search_web", + args: { query: "huntly" }, + }, + ], + status: "running", + }, + }); + + expect(container.querySelector('[role="status"]')).not.toBeNull(); + + cleanup(); + }); + + it("shows the preparing indicator again after earlier text when a new step starts", () => { + const { container, cleanup } = renderAssistantMessage({ + message: { + id: "assistant-6", + role: "assistant", + parts: [ + { type: "text", text: "先给你一个结论。" }, + { type: "step-start" }, + ], + status: "running", + }, + }); + + expect(container.querySelector('[role="status"]')).not.toBeNull(); + + cleanup(); + }); + + it("hides the preparing indicator once visible text arrives", () => { + const { container, cleanup } = renderAssistantMessage({ + message: { + id: "assistant-7", + role: "assistant", + parts: [{ type: "text", text: "hello" }], + status: "running", + }, + }); + + expect(container.querySelector('[role="status"]')).toBeNull(); + + cleanup(); + }); +}); \ No newline at end of file diff --git a/app/extension/src/__tests__/providers.test.ts b/app/extension/src/__tests__/providers.test.ts index 35fcd24..6127c8c 100644 --- a/app/extension/src/__tests__/providers.test.ts +++ b/app/extension/src/__tests__/providers.test.ts @@ -2,6 +2,7 @@ import { getOpenAICompatibleBaseUrl, getOllamaBaseUrl, getOllamaOpenAIBaseUrl, + usesRawOpenAICompatibleStream, } from "../ai/openAICompatibleProviders"; import { getEffectiveApiFormat, PROVIDER_REGISTRY } from "../ai/types"; @@ -44,6 +45,64 @@ describe("providers helpers", () => { ); }); + it("uses raw OpenAI-compatible streaming for providers that need explicit thinking control", () => { + expect( + usesRawOpenAICompatibleStream({ + type: "qwen", + enabled: true, + apiKey: "test", + baseUrl: "", + enabledModels: ["qwen3.5-plus"], + updatedAt: Date.now(), + }) + ).toBe(true); + + expect( + usesRawOpenAICompatibleStream({ + type: "zhipu", + enabled: true, + apiKey: "test", + baseUrl: "", + enabledModels: ["glm-5"], + updatedAt: Date.now(), + }) + ).toBe(true); + + expect( + usesRawOpenAICompatibleStream({ + type: "openai", + enabled: true, + apiKey: "test", + baseUrl: "https://api.openai.com/v1", + enabledModels: ["gpt-4.1"], + updatedAt: Date.now(), + }) + ).toBe(false); + + expect( + usesRawOpenAICompatibleStream({ + type: "openai", + enabled: true, + apiKey: "test", + baseUrl: "https://dashscope.aliyuncs.com/compatible-mode/v1", + enabledModels: ["qwen-plus"], + updatedAt: Date.now(), + }) + ).toBe(false); + + expect( + usesRawOpenAICompatibleStream({ + type: "qwen", + enabled: true, + apiKey: "test", + baseUrl: "", + enabledModels: ["qwen3.5-plus"], + updatedAt: Date.now(), + apiFormat: "anthropic", + }) + ).toBe(false); + }); + it("falls back to the provider native format when no override is given", () => { expect(getEffectiveApiFormat({ type: "qwen" })).toBe("openai"); expect(getEffectiveApiFormat({ type: "anthropic" })).toBe("anthropic"); diff --git a/app/extension/src/__tests__/thinkingMode.test.ts b/app/extension/src/__tests__/thinkingMode.test.ts new file mode 100644 index 0000000..5a7ba03 --- /dev/null +++ b/app/extension/src/__tests__/thinkingMode.test.ts @@ -0,0 +1,8 @@ +import { getThinkingModeOptions } from "../ai/thinkingMode"; + +describe("thinking mode helpers", () => { + it("always sends an explicit enable_thinking flag", () => { + expect(getThinkingModeOptions(true)).toEqual({ enable_thinking: true }); + expect(getThinkingModeOptions(false)).toEqual({ enable_thinking: false }); + }); +}); \ No newline at end of file diff --git a/app/extension/src/ai/openAICompatibleProviders.ts b/app/extension/src/ai/openAICompatibleProviders.ts index fa77887..4e8e8cb 100644 --- a/app/extension/src/ai/openAICompatibleProviders.ts +++ b/app/extension/src/ai/openAICompatibleProviders.ts @@ -1,4 +1,8 @@ -import { AIProviderConfig, PROVIDER_REGISTRY } from "./types"; +import { + AIProviderConfig, + getEffectiveApiFormat, + PROVIDER_REGISTRY, +} from "./types"; function trimTrailingSlash(url: string): string { return url.replace(/\/+$/, ""); @@ -12,6 +16,24 @@ export function getProviderBaseUrl(config: AIProviderConfig): string | undefined ); } +export function usesRawOpenAICompatibleStream( + config: AIProviderConfig +): boolean { + const format = getEffectiveApiFormat({ + type: config.type, + apiFormat: config.apiFormat, + }); + if (format !== "openai") { + return false; + } + + if (PROVIDER_REGISTRY[config.type]?.requiresRawOpenAICompatibleStream) { + return true; + } + + return false; +} + /** * @deprecated use {@link getProviderBaseUrl}. Kept for call sites still being migrated. */ diff --git a/app/extension/src/ai/openAICompatibleStream.ts b/app/extension/src/ai/openAICompatibleStream.ts new file mode 100644 index 0000000..6b11f0b --- /dev/null +++ b/app/extension/src/ai/openAICompatibleStream.ts @@ -0,0 +1,194 @@ +import { combineUrl } from "../utils"; + +export interface OpenAICompatibleStreamDelta { + contentDelta: string; + reasoningDelta: string; + done: boolean; +} + +interface StreamOpenAICompatibleChatCompletionOptions { + apiKey: string; + baseUrl: string; + modelId: string; + systemPrompt: string; + userPrompt: string; + maxTokens: number; + requestBodyExtras?: Record; + abortSignal: AbortSignal; + onDelta: (delta: OpenAICompatibleStreamDelta) => void; +} + +function buildOpenAICompatibleChatCompletionBody({ + modelId, + systemPrompt, + userPrompt, + maxTokens, + requestBodyExtras = {}, +}: Pick< + StreamOpenAICompatibleChatCompletionOptions, + "modelId" | "systemPrompt" | "userPrompt" | "maxTokens" | "requestBodyExtras" +>) { + return { + model: modelId, + stream: true, + max_tokens: maxTokens, + messages: [ + ...(systemPrompt.trim() + ? [{ role: "system", content: systemPrompt }] + : []), + { role: "user", content: userPrompt }, + ], + ...requestBodyExtras, + }; +} + +function extractOpenAICompatibleStreamDelta( + data: string +): OpenAICompatibleStreamDelta { + if (data.trim() === "[DONE]") { + return { + contentDelta: "", + reasoningDelta: "", + done: true, + }; + } + + let parsed: unknown; + try { + parsed = JSON.parse(data); + } catch { + return { + contentDelta: "", + reasoningDelta: "", + done: false, + }; + } + + const delta = + (parsed as { choices?: Array<{ delta?: Record }> }) + ?.choices?.[0]?.delta ?? {}; + + return { + contentDelta: typeof delta.content === "string" ? delta.content : "", + reasoningDelta: + typeof delta.reasoning_content === "string" + ? delta.reasoning_content + : typeof delta.reasoning === "string" + ? delta.reasoning + : "", + done: false, + }; +} + +export async function streamOpenAICompatibleChatCompletion({ + apiKey, + baseUrl, + modelId, + systemPrompt, + userPrompt, + maxTokens, + requestBodyExtras, + abortSignal, + onDelta, +}: StreamOpenAICompatibleChatCompletionOptions): Promise { + const response = await fetch(combineUrl(baseUrl, "chat/completions"), { + method: "POST", + headers: { + "Content-Type": "application/json", + Accept: "text/event-stream", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify( + buildOpenAICompatibleChatCompletionBody({ + modelId, + systemPrompt, + userPrompt, + maxTokens, + requestBodyExtras, + }) + ), + signal: abortSignal, + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error( + errorText || + `HTTP error! status: ${response.status} ${response.statusText}` + ); + } + + if (!response.body) { + throw new Error("No response body available"); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + let dataLines: string[] = []; + let done = false; + + const processEvent = () => { + if (dataLines.length === 0) { + return; + } + + const eventData = dataLines.join("\n"); + dataLines = []; + + const delta = extractOpenAICompatibleStreamDelta(eventData); + if (delta.done) { + done = true; + return; + } + + onDelta(delta); + }; + + const processBuffer = () => { + let newlineIndex = buffer.indexOf("\n"); + while (newlineIndex !== -1) { + let line = buffer.slice(0, newlineIndex); + buffer = buffer.slice(newlineIndex + 1); + + if (line.endsWith("\r")) { + line = line.slice(0, -1); + } + + if (line === "") { + processEvent(); + } else if (line.startsWith("data:")) { + dataLines.push( + line.startsWith("data: ") ? line.slice(6) : line.slice(5) + ); + } + + if (done) { + return; + } + + newlineIndex = buffer.indexOf("\n"); + } + }; + + try { + while (!done) { + const { value, done: streamDone } = await reader.read(); + if (streamDone) { + break; + } + + buffer += decoder.decode(value, { stream: true }); + processBuffer(); + } + + buffer += decoder.decode(); + if (buffer.length > 0) { + buffer += "\n"; + processBuffer(); + } + processEvent(); + } finally { + reader.releaseLock(); + } +} \ No newline at end of file diff --git a/app/extension/src/ai/thinkingMode.ts b/app/extension/src/ai/thinkingMode.ts new file mode 100644 index 0000000..05f4691 --- /dev/null +++ b/app/extension/src/ai/thinkingMode.ts @@ -0,0 +1,12 @@ +/** + * Returns extra request body options for thinking mode. + * Some OpenAI-compatible providers default to thinking enabled, so callers + * need an explicit false to turn it off. + */ +export function getThinkingModeOptions( + thinkingModeEnabled: boolean +): Record { + return { + enable_thinking: thinkingModeEnabled, + }; +} \ No newline at end of file diff --git a/app/extension/src/ai/types.ts b/app/extension/src/ai/types.ts index 2f6ff1b..95005fa 100644 --- a/app/extension/src/ai/types.ts +++ b/app/extension/src/ai/types.ts @@ -68,6 +68,14 @@ export interface ProviderMeta { * endpoints for the same provider (qwen / zhipu / minimax currently). */ supportsCustomApiFormat?: boolean; + /** + * Some OpenAI-compatible providers need the preview/background pipeline to + * use the raw chat-completions stream path so request-body flags such as + * `enable_thinking` can be sent explicitly. + * + * Only meaningful when the effective API format is `openai`. + */ + requiresRawOpenAICompatibleStream?: boolean; } // Model information @@ -264,6 +272,7 @@ export const PROVIDER_REGISTRY: Record = { defaultBaseUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1', nativeApiFormat: 'openai', supportsCustomApiFormat: true, + requiresRawOpenAICompatibleStream: true, defaultModels: [ { id: 'qwen3.5-plus' }, { id: 'qwen3-max' }, @@ -288,6 +297,7 @@ export const PROVIDER_REGISTRY: Record = { defaultBaseUrl: 'https://open.bigmodel.cn/api/paas/v4', nativeApiFormat: 'openai', supportsCustomApiFormat: true, + requiresRawOpenAICompatibleStream: true, defaultModels: [ { id: 'glm-5' }, { id: 'glm-4.7' }, @@ -313,6 +323,7 @@ export const PROVIDER_REGISTRY: Record = { defaultBaseUrl: 'https://api.minimax.chat/v1', nativeApiFormat: 'openai', supportsCustomApiFormat: true, + requiresRawOpenAICompatibleStream: true, defaultModels: [ { id: 'MiniMax-M2.5' }, { id: 'MiniMax-M2.5-highspeed' }, diff --git a/app/extension/src/background.ts b/app/extension/src/background.ts index 0432035..4e840b7 100644 --- a/app/extension/src/background.ts +++ b/app/extension/src/background.ts @@ -27,12 +27,18 @@ import { } from "./ai/storage"; import { PROVIDER_REGISTRY, ProviderType } from "./ai/types"; import { createProviderModel } from "./ai/providers"; +import { + getOpenAICompatibleBaseUrl, + usesRawOpenAICompatibleStream, +} from "./ai/openAICompatibleProviders"; +import { streamOpenAICompatibleChatCompletion } from "./ai/openAICompatibleStream"; import { applyStreamingPreviewChunk, createStreamingPreviewState, getStreamingPreviewResult, hasStreamingPreviewStateChanged, } from "./ai/streamingPreview"; +import { getThinkingModeOptions } from "./ai/thinkingMode"; import { translateUi } from "./uiMessages"; import { streamText } from "ai"; import type { ProviderOptions } from "@ai-sdk/provider-utils"; @@ -73,6 +79,7 @@ const pendingSidepanelContextCommands = const SAVED_BADGE_TEXT = "✓"; const SAVED_BADGE_BG = "#15803D"; const AI_MAX_OUTPUT_TOKENS = 20000; +const RAW_OPENAI_COMPATIBLE_MAX_TOKENS = 8000; const ANTHROPIC_THINKING_BUDGET_TOKENS = 4000; function buildThinkingProviderOptions(): ProviderOptions { @@ -451,51 +458,98 @@ async function startProcessingWithVercelAI(task: any) { let streamState = createStreamingPreviewState(); const includeReasoningPreview = Boolean(thinkingModeEnabled); - // Create the model - const model = createProviderModel(config, modelId); - if (!model) { - throw new Error(`Failed to create model for ${providerType}`); - } + if (usesRawOpenAICompatibleStream(config)) { + const baseUrl = getOpenAICompatibleBaseUrl(config); + if (!baseUrl) { + throw new Error(`Provider ${providerType} base URL is not configured`); + } - // Use streamText for streaming response with abort signal - const result = streamText({ - model, - system: systemPrompt, - prompt: userPrompt, - maxOutputTokens: AI_MAX_OUTPUT_TOKENS, - abortSignal: abortController.signal, - providerOptions: thinkingModeEnabled - ? buildThinkingProviderOptions() - : undefined, - }); + await streamOpenAICompatibleChatCompletion({ + apiKey: config.apiKey, + baseUrl, + modelId, + systemPrompt, + userPrompt, + maxTokens: RAW_OPENAI_COMPATIBLE_MAX_TOKENS, + requestBodyExtras: getThinkingModeOptions(Boolean(thinkingModeEnabled)), + abortSignal: abortController.signal, + onDelta: ({ contentDelta, reasoningDelta }) => { + let nextStreamState = streamState; + + if (reasoningDelta) { + nextStreamState = applyStreamingPreviewChunk( + nextStreamState, + { + type: "reasoning", + textDelta: reasoningDelta, + }, + { + includeReasoning: includeReasoningPreview, + } + ); + } - // Process the full stream so providers that emit reasoning deltas before - // text deltas (for example, glm-5) still produce visible incremental output. - for await (const chunk of result.fullStream) { - // Check if aborted - if (abortController.signal.aborted) { - break; - } + if (contentDelta) { + nextStreamState = applyStreamingPreviewChunk(nextStreamState, { + type: "text-delta", + textDelta: contentDelta, + }); + } + + if (!hasStreamingPreviewStateChanged(streamState, nextStreamState)) { + return; + } - const nextStreamState = applyStreamingPreviewChunk(streamState, chunk, { - includeReasoning: includeReasoningPreview, + streamState = nextStreamState; + sendStreamingPreviewUpdate( + streamState, + contentDelta || reasoningDelta + ); + }, }); - if (!hasStreamingPreviewStateChanged(streamState, nextStreamState)) { - continue; + } else { + const model = createProviderModel(config, modelId); + if (!model) { + throw new Error(`Failed to create model for ${providerType}`); } - streamState = nextStreamState; - // Send streaming data to preview - try { - sendStreamingPreviewUpdate( - streamState, - chunk.type === "text-delta" || chunk.type === "reasoning-delta" - ? chunk.text - : "" - ); - } catch (error) { - console.warn("Failed to send shortcuts_process_data message:", error); - break; + const result = streamText({ + model, + system: systemPrompt, + prompt: userPrompt, + maxOutputTokens: AI_MAX_OUTPUT_TOKENS, + abortSignal: abortController.signal, + providerOptions: thinkingModeEnabled + ? buildThinkingProviderOptions() + : undefined, + }); + + // Process the full stream so providers that emit reasoning deltas before + // text deltas (for example, glm-5) still produce visible incremental output. + for await (const chunk of result.fullStream) { + if (abortController.signal.aborted) { + break; + } + + const nextStreamState = applyStreamingPreviewChunk(streamState, chunk, { + includeReasoning: includeReasoningPreview, + }); + if (!hasStreamingPreviewStateChanged(streamState, nextStreamState)) { + continue; + } + streamState = nextStreamState; + + try { + sendStreamingPreviewUpdate( + streamState, + chunk.type === "text-delta" || chunk.type === "reasoning-delta" + ? chunk.text + : "" + ); + } catch (error) { + console.warn("Failed to send shortcuts_process_data message:", error); + break; + } } } diff --git a/app/extension/src/sidepanel/components/AssistantMessage.tsx b/app/extension/src/sidepanel/components/AssistantMessage.tsx index dd3c14f..eca0012 100644 --- a/app/extension/src/sidepanel/components/AssistantMessage.tsx +++ b/app/extension/src/sidepanel/components/AssistantMessage.tsx @@ -22,6 +22,17 @@ interface AssistantMessageProps { onRetryLastRun?: () => void; } +function findLastResponseTextIndex(message: ChatMessage): number { + for (let index = message.parts.length - 1; index >= 0; index -= 1) { + const part = message.parts[index]; + if (part.type === "text" && part.text?.trim()) { + return index; + } + } + + return -1; +} + const AssistantMessageImpl: FC = ({ message, isLast, @@ -39,9 +50,18 @@ const AssistantMessageImpl: FC = ({ const hasOnlyStatusParts = message.parts.length > 0 && message.parts.every((part) => part.type === "status"); + const text = useMemo(() => getMessageText(message.parts), [message.parts]); + const lastResponseTextIndex = useMemo( + () => findLastResponseTextIndex(message), + [message] + ); const showThinkingPreview = thinkingMode && isLast && isRunning && !hasReasoningText; - const text = useMemo(() => getMessageText(message.parts), [message.parts]); + const showPreparingResponse = + isLast && + isRunning && + (lastResponseTextIndex === -1 || + lastResponseTextIndex < message.parts.length - 1); const [copyFeedbackVisible, setCopyFeedbackVisible] = useState(false); const copyResetTimeoutRef = useRef(null); const footerButtonClassName = @@ -139,11 +159,17 @@ const AssistantMessageImpl: FC = ({ return null; })} - {isLast && isRunning && message.parts.length === 0 && ( -
- - - + {showPreparingResponse && ( +
+
)}