diff --git a/app/extension/src/__tests__/AssistantMessage.test.tsx b/app/extension/src/__tests__/AssistantMessage.test.tsx
new file mode 100644
index 0000000..2b9eb41
--- /dev/null
+++ b/app/extension/src/__tests__/AssistantMessage.test.tsx
@@ -0,0 +1,231 @@
+/** @jest-environment jsdom */
+
+/// <reference types="node" />
+
+import { afterEach, describe, expect, it, jest } from "@jest/globals";
+import { act } from "react-dom/test-utils";
+import { createRoot } from "react-dom/client";
+
+import { AssistantMessage } from "../sidepanel/components/AssistantMessage";
+import type { ChatMessage } from "../sidepanel/types";
+
+jest.mock("../sidepanel/components/AssistantStatusCard", () => ({
+  AssistantStatusCard: () => null,
+}));
+
+jest.mock("../sidepanel/components/IconButton", () => ({
+  IconButton: ({
+    children,
+    onClick,
+  }: {
+    children: React.ReactNode;
+    onClick?: () => void;
+  }) => {
+    const React = require("react");
+    return React.createElement("button", { type: "button", onClick }, children);
+  },
+}));
+
+jest.mock("../sidepanel/components/LinkCardsBlock", () => ({
+  LinkCardsBlock: () => null,
+}));
+
+jest.mock("../sidepanel/components/MarkdownContent", () => ({
+  MarkdownContent: ({ text }: { text: string }) => {
+    const React = require("react");
+    return React.createElement("div", null, text);
+  },
+}));
+
+jest.mock("../sidepanel/components/MessageFooter", () => ({
+  MessageFooter: ({ children }: { children: React.ReactNode }) => {
+    const React = require("react");
+    return React.createElement("div", null, children);
+  },
+}));
+
+jest.mock("../sidepanel/components/ReasoningBlock", () => ({
+  ReasoningBlock: ({ text }: { text: string }) => {
+    const React = require("react");
+    return React.createElement("div", null, text);
+  },
+}));
+
+jest.mock("../sidepanel/components/ToolCallBlock", () => ({
+  ToolCallBlock: () => null,
+}));
+
+jest.mock("../i18n", () => ({
+  useI18n: () => ({
+    t: (key: string) => key,
+  }),
+}));
+
+(
+  globalThis as typeof globalThis & {
+    IS_REACT_ACT_ENVIRONMENT?: boolean;
+  }
+).IS_REACT_ACT_ENVIRONMENT = true;
+
+function renderAssistantMessage(
+  props: Partial<React.ComponentProps<typeof AssistantMessage>> = {}
+) {
+  const container = document.createElement("div");
+  document.body.appendChild(container);
+  const root = createRoot(container);
+  const message: ChatMessage = {
+    id: "assistant-1",
+    role: "assistant",
+    parts: [],
+    status: "running",
+  };
+
+  act(() => {
+    root.render(
+      <AssistantMessage
+        isLast={true}
+        isRunning={true}
+        message={message}
+        thinkingMode={false}
+        {...props}
+      />
+    );
+  });
+
+  return {
+    container,
+    cleanup: () => {
+      act(() => root.unmount());
+      container.remove();
+    },
+  };
+}
+
+describe("AssistantMessage", () => {
+  afterEach(() => {
+    document.body.innerHTML = "";
+  });
+
+  it("shows a preparing response indicator before assistant text arrives", () => {
+    const { container, cleanup } = renderAssistantMessage();
+    const indicator = container.querySelector('[role="status"]');
+
+    expect(indicator?.getAttribute("aria-label")).toBe("common.loading");
+    expect(container.querySelectorAll(".claude-dot")).toHaveLength(3);
+    expect(indicator?.className).not.toContain("rounded-full");
+    expect(indicator?.className).not.toContain("border");
+
+    cleanup();
+  });
+
+  it("keeps the preparing indicator visible for a leading step-start part", () => {
+    const { container, cleanup } = renderAssistantMessage({
+      message: {
+        id: "assistant-2",
+        role: "assistant",
+        parts: [{ type: "step-start" }],
+        status: "running",
+      },
+    });
+
+    expect(container.querySelector('[role="status"]')).not.toBeNull();
+    expect(container.querySelectorAll(".claude-dot")).toHaveLength(3);
+
+    cleanup();
+  });
+
+  it("keeps the preparing indicator visible while reasoning is streaming", () => {
+    const { container, cleanup } = renderAssistantMessage({
+      message: {
+        id: "assistant-3",
+        role: "assistant",
+        parts: [{ type: "reasoning", text: "Thinking", streaming: true }],
+        status: "running",
+      },
+      thinkingMode: true,
+    });
+
+    expect(container.querySelector('[role="status"]')).not.toBeNull();
+    expect(container.textContent).toContain("Thinking");
+
+    cleanup();
+  });
+
+  it("keeps the preparing indicator visible while a tool call is running", () => {
+    const { container, cleanup } = renderAssistantMessage({
+      message: {
+        id: "assistant-4",
+        role: "assistant",
+        parts: [
+          {
+            type: "tool-call",
+            toolCallId: "tool-1",
+            toolName: "search_web",
+            args: { query: "huntly" },
+          },
+        ],
+        status: "running",
+      },
+    });
+
+    expect(container.querySelector('[role="status"]')).not.toBeNull();
+
+    cleanup();
+  });
+
+  it("shows the preparing indicator again after earlier text when a tool call starts", () => {
+    const { container, cleanup } = renderAssistantMessage({
+      message: {
+        id: "assistant-5",
+        role: "assistant",
+        parts: [
+          { type: "text", text: "先给你一个结论。" },
+          {
+            type: "tool-call",
+            toolCallId: "tool-2",
+            toolName: "search_web",
+            args: { query: "huntly" },
+          },
+        ],
+        status: "running",
+      },
+    });
+
+    expect(container.querySelector('[role="status"]')).not.toBeNull();
+
+    cleanup();
+  });
+
+  it("shows the preparing indicator again after earlier text when a new step starts", () => {
+    const { container, cleanup } = renderAssistantMessage({
+      message: {
+        id: "assistant-6",
+        role: "assistant",
+        parts: [
+          { type: "text", text: "先给你一个结论。" },
+          { type: "step-start" },
+        ],
+        status: "running",
+      },
+    });
+
+    expect(container.querySelector('[role="status"]')).not.toBeNull();
+
+    cleanup();
+  });
+
+  it("hides the preparing indicator once visible text arrives", () => {
+    const { container, cleanup } = renderAssistantMessage({
+      message: {
+        id: "assistant-7",
+        role: "assistant",
+        parts: [{ type: "text", text: "hello" }],
+        status: "running",
+      },
+    });
+
+    expect(container.querySelector('[role="status"]')).toBeNull();
+
+    cleanup();
+  });
+});
\ No newline at end of file
diff --git a/app/extension/src/__tests__/providers.test.ts b/app/extension/src/__tests__/providers.test.ts
index 35fcd24..6127c8c 100644
--- a/app/extension/src/__tests__/providers.test.ts
+++ b/app/extension/src/__tests__/providers.test.ts
@@ -2,6 +2,7 @@ import {
   getOpenAICompatibleBaseUrl,
   getOllamaBaseUrl,
   getOllamaOpenAIBaseUrl,
+  usesRawOpenAICompatibleStream,
 } from "../ai/openAICompatibleProviders";
 import { getEffectiveApiFormat, PROVIDER_REGISTRY } from "../ai/types";
 
@@ -44,6 +45,64 @@ describe("providers helpers", () => {
     );
   });
 
+  it("uses raw OpenAI-compatible streaming for providers that need explicit thinking control", () => {
+    expect(
+      usesRawOpenAICompatibleStream({
+        type: "qwen",
+        enabled: true,
+        apiKey: "test",
+        baseUrl: "",
+        enabledModels: ["qwen3.5-plus"],
+        updatedAt: Date.now(),
+      })
+    ).toBe(true);
+
+    expect(
+      usesRawOpenAICompatibleStream({
+        type: "zhipu",
+        enabled: true,
+        apiKey: "test",
+        baseUrl: "",
+        enabledModels: ["glm-5"],
+        updatedAt: Date.now(),
+      })
+    ).toBe(true);
+
+    expect(
+      usesRawOpenAICompatibleStream({
+        type: "openai",
+        enabled: true,
+        apiKey: "test",
+        baseUrl: "https://api.openai.com/v1",
+        enabledModels: ["gpt-4.1"],
+        updatedAt: Date.now(),
+      })
+    ).toBe(false);
+
+    expect(
+      usesRawOpenAICompatibleStream({
+        type: "openai",
+        enabled: true,
+        apiKey: "test",
+        baseUrl: "https://dashscope.aliyuncs.com/compatible-mode/v1",
+        enabledModels: ["qwen-plus"],
+        updatedAt: Date.now(),
+      })
+    ).toBe(false);
+
+    expect(
+      usesRawOpenAICompatibleStream({
+        type: "qwen",
+        enabled: true,
+        apiKey: "test",
+        baseUrl: "",
+        enabledModels: ["qwen3.5-plus"],
+        updatedAt: Date.now(),
+        apiFormat: "anthropic",
+      })
+    ).toBe(false);
+  });
+
   it("falls back to the provider native format when no override is given", () => {
     expect(getEffectiveApiFormat({ type: "qwen" })).toBe("openai");
     expect(getEffectiveApiFormat({ type: "anthropic" })).toBe("anthropic");
diff --git a/app/extension/src/__tests__/thinkingMode.test.ts b/app/extension/src/__tests__/thinkingMode.test.ts
new file mode 100644
index 0000000..5a7ba03
--- /dev/null
+++ b/app/extension/src/__tests__/thinkingMode.test.ts
@@ -0,0 +1,8 @@
+import { getThinkingModeOptions } from "../ai/thinkingMode";
+
+describe("thinking mode helpers", () => {
+  it("always sends an explicit enable_thinking flag", () => {
+    expect(getThinkingModeOptions(true)).toEqual({ enable_thinking: true });
+    expect(getThinkingModeOptions(false)).toEqual({ enable_thinking: false });
+  });
+});
\ No newline at end of file
diff --git a/app/extension/src/ai/openAICompatibleProviders.ts b/app/extension/src/ai/openAICompatibleProviders.ts
index fa77887..4e8e8cb 100644
--- a/app/extension/src/ai/openAICompatibleProviders.ts
+++ b/app/extension/src/ai/openAICompatibleProviders.ts
@@ -1,4 +1,8 @@
-import { AIProviderConfig, PROVIDER_REGISTRY } from "./types";
+import {
+  AIProviderConfig,
+  getEffectiveApiFormat,
+  PROVIDER_REGISTRY,
+} from "./types";
 
 function trimTrailingSlash(url: string): string {
   return url.replace(/\/+$/, "");
@@ -12,6 +16,24 @@ export function getProviderBaseUrl(config: AIProviderConfig): string | undefined
   );
 }
 
+export function usesRawOpenAICompatibleStream(
+  config: AIProviderConfig
+): boolean {
+  const format = getEffectiveApiFormat({
+    type: config.type,
+    apiFormat: config.apiFormat,
+  });
+  if (format !== "openai") {
+    return false;
+  }
+
+  if (PROVIDER_REGISTRY[config.type]?.requiresRawOpenAICompatibleStream) {
+    return true;
+  }
+
+  return false;
+}
+
 /**
  * @deprecated use {@link getProviderBaseUrl}. Kept for call sites still being migrated.
  */
diff --git a/app/extension/src/ai/openAICompatibleStream.ts b/app/extension/src/ai/openAICompatibleStream.ts
new file mode 100644
index 0000000..6b11f0b
--- /dev/null
+++ b/app/extension/src/ai/openAICompatibleStream.ts
@@ -0,0 +1,194 @@
+import { combineUrl } from "../utils";
+
+export interface OpenAICompatibleStreamDelta {
+  contentDelta: string;
+  reasoningDelta: string;
+  done: boolean;
+}
+
+interface StreamOpenAICompatibleChatCompletionOptions {
+  apiKey: string;
+  baseUrl: string;
+  modelId: string;
+  systemPrompt: string;
+  userPrompt: string;
+  maxTokens: number;
+  requestBodyExtras?: Record<string, unknown>;
+  abortSignal: AbortSignal;
+  onDelta: (delta: OpenAICompatibleStreamDelta) => void;
+}
+
+function buildOpenAICompatibleChatCompletionBody({
+  modelId,
+  systemPrompt,
+  userPrompt,
+  maxTokens,
+  requestBodyExtras = {},
+}: Pick<
+  StreamOpenAICompatibleChatCompletionOptions,
+  "modelId" | "systemPrompt" | "userPrompt" | "maxTokens" | "requestBodyExtras"
+>) {
+  return {
+    model: modelId,
+    stream: true,
+    max_tokens: maxTokens,
+    messages: [
+      ...(systemPrompt.trim()
+        ? [{ role: "system", content: systemPrompt }]
+        : []),
+      { role: "user", content: userPrompt },
+    ],
+    ...requestBodyExtras,
+  };
+}
+
+function extractOpenAICompatibleStreamDelta(
+  data: string
+): OpenAICompatibleStreamDelta {
+  if (data.trim() === "[DONE]") {
+    return {
+      contentDelta: "",
+      reasoningDelta: "",
+      done: true,
+    };
+  }
+
+  let parsed: unknown;
+  try {
+    parsed = JSON.parse(data);
+  } catch {
+    return {
+      contentDelta: "",
+      reasoningDelta: "",
+      done: false,
+    };
+  }
+
+  const delta =
+    (parsed as { choices?: Array<{ delta?: Record<string, unknown> }> })
+      ?.choices?.[0]?.delta ?? {};
+
+  return {
+    contentDelta: typeof delta.content === "string" ? delta.content : "",
+    reasoningDelta:
+      typeof delta.reasoning_content === "string"
+        ? delta.reasoning_content
+        : typeof delta.reasoning === "string"
+          ? delta.reasoning
+          : "",
+    done: false,
+  };
+}
+
+export async function streamOpenAICompatibleChatCompletion({
+  apiKey,
+  baseUrl,
+  modelId,
+  systemPrompt,
+  userPrompt,
+  maxTokens,
+  requestBodyExtras,
+  abortSignal,
+  onDelta,
+}: StreamOpenAICompatibleChatCompletionOptions): Promise<void> {
+  const response = await fetch(combineUrl(baseUrl, "chat/completions"), {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Accept: "text/event-stream",
+      Authorization: `Bearer ${apiKey}`,
+    },
+    body: JSON.stringify(
+      buildOpenAICompatibleChatCompletionBody({
+        modelId,
+        systemPrompt,
+        userPrompt,
+        maxTokens,
+        requestBodyExtras,
+      })
+    ),
+    signal: abortSignal,
+  });
+
+  if (!response.ok) {
+    const errorText = await response.text();
+    throw new Error(
+      errorText ||
+        `HTTP error! status: ${response.status} ${response.statusText}`
+    );
+  }
+
+  if (!response.body) {
+    throw new Error("No response body available");
+  }
+
+  const reader = response.body.getReader();
+  const decoder = new TextDecoder();
+  let buffer = "";
+  let dataLines: string[] = [];
+  let done = false;
+
+  const processEvent = () => {
+    if (dataLines.length === 0) {
+      return;
+    }
+
+    const eventData = dataLines.join("\n");
+    dataLines = [];
+
+    const delta = extractOpenAICompatibleStreamDelta(eventData);
+    if (delta.done) {
+      done = true;
+      return;
+    }
+
+    onDelta(delta);
+  };
+
+  const processBuffer = () => {
+    let newlineIndex = buffer.indexOf("\n");
+    while (newlineIndex !== -1) {
+      let line = buffer.slice(0, newlineIndex);
+      buffer = buffer.slice(newlineIndex + 1);
+
+      if (line.endsWith("\r")) {
+        line = line.slice(0, -1);
+      }
+
+      if (line === "") {
+        processEvent();
+      } else if (line.startsWith("data:")) {
+        dataLines.push(
+          line.startsWith("data: ") ? line.slice(6) : line.slice(5)
+        );
+      }
+
+      if (done) {
+        return;
+      }
+
+      newlineIndex = buffer.indexOf("\n");
+    }
+  };
+
+  try {
+    while (!done) {
+      const { value, done: streamDone } = await reader.read();
+      if (streamDone) {
+        break;
+      }
+
+      buffer += decoder.decode(value, { stream: true });
+      processBuffer();
+    }
+
+    buffer += decoder.decode();
+    if (buffer.length > 0) {
+      buffer += "\n";
+      processBuffer();
+    }
+    processEvent();
+  } finally {
+    reader.releaseLock();
+  }
+}
\ No newline at end of file
diff --git a/app/extension/src/ai/thinkingMode.ts b/app/extension/src/ai/thinkingMode.ts
new file mode 100644
index 0000000..05f4691
--- /dev/null
+++ b/app/extension/src/ai/thinkingMode.ts
@@ -0,0 +1,12 @@
+/**
+ * Returns extra request body options for thinking mode.
+ * Some OpenAI-compatible providers default to thinking enabled, so callers
+ * need an explicit false to turn it off.
+ */
+export function getThinkingModeOptions(
+  thinkingModeEnabled: boolean
+): Record<string, unknown> {
+  return {
+    enable_thinking: thinkingModeEnabled,
+  };
+}
\ No newline at end of file
diff --git a/app/extension/src/ai/types.ts b/app/extension/src/ai/types.ts
index 2f6ff1b..95005fa 100644
--- a/app/extension/src/ai/types.ts
+++ b/app/extension/src/ai/types.ts
@@ -68,6 +68,14 @@ export interface ProviderMeta {
    * endpoints for the same provider (qwen / zhipu / minimax currently).
    */
   supportsCustomApiFormat?: boolean;
+  /**
+   * Some OpenAI-compatible providers need the preview/background pipeline to
+   * use the raw chat-completions stream path so request-body flags such as
+   * `enable_thinking` can be sent explicitly.
+   *
+   * Only meaningful when the effective API format is `openai`.
+   */
+  requiresRawOpenAICompatibleStream?: boolean;
 }
 
 // Model information
@@ -264,6 +272,7 @@ export const PROVIDER_REGISTRY: Record<ProviderType, ProviderMeta> = {
     defaultBaseUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
     nativeApiFormat: 'openai',
     supportsCustomApiFormat: true,
+    requiresRawOpenAICompatibleStream: true,
     defaultModels: [
       { id: 'qwen3.5-plus' },
       { id: 'qwen3-max' },
@@ -288,6 +297,7 @@ export const PROVIDER_REGISTRY: Record<ProviderType, ProviderMeta> = {
     defaultBaseUrl: 'https://open.bigmodel.cn/api/paas/v4',
     nativeApiFormat: 'openai',
     supportsCustomApiFormat: true,
+    requiresRawOpenAICompatibleStream: true,
     defaultModels: [
       { id: 'glm-5' },
       { id: 'glm-4.7' },
@@ -313,6 +323,7 @@ export const PROVIDER_REGISTRY: Record<ProviderType, ProviderMeta> = {
     defaultBaseUrl: 'https://api.minimax.chat/v1',
     nativeApiFormat: 'openai',
     supportsCustomApiFormat: true,
+    requiresRawOpenAICompatibleStream: true,
     defaultModels: [
       { id: 'MiniMax-M2.5' },
       { id: 'MiniMax-M2.5-highspeed' },
diff --git a/app/extension/src/background.ts b/app/extension/src/background.ts
index 0432035..4e840b7 100644
--- a/app/extension/src/background.ts
+++ b/app/extension/src/background.ts
@@ -27,12 +27,18 @@ import {
 } from "./ai/storage";
 import { PROVIDER_REGISTRY, ProviderType } from "./ai/types";
 import { createProviderModel } from "./ai/providers";
+import {
+  getOpenAICompatibleBaseUrl,
+  usesRawOpenAICompatibleStream,
+} from "./ai/openAICompatibleProviders";
+import { streamOpenAICompatibleChatCompletion } from "./ai/openAICompatibleStream";
 import {
   applyStreamingPreviewChunk,
   createStreamingPreviewState,
   getStreamingPreviewResult,
   hasStreamingPreviewStateChanged,
 } from "./ai/streamingPreview";
+import { getThinkingModeOptions } from "./ai/thinkingMode";
 import { translateUi } from "./uiMessages";
 import { streamText } from "ai";
 import type { ProviderOptions } from "@ai-sdk/provider-utils";
@@ -73,6 +79,7 @@ const pendingSidepanelContextCommands =
 const SAVED_BADGE_TEXT = "✓";
 const SAVED_BADGE_BG = "#15803D";
 const AI_MAX_OUTPUT_TOKENS = 20000;
+const RAW_OPENAI_COMPATIBLE_MAX_TOKENS = 8000;
 const ANTHROPIC_THINKING_BUDGET_TOKENS = 4000;
 
 function buildThinkingProviderOptions(): ProviderOptions {
@@ -451,51 +458,98 @@ async function startProcessingWithVercelAI(task: any) {
     let streamState = createStreamingPreviewState();
     const includeReasoningPreview = Boolean(thinkingModeEnabled);
 
-    // Create the model
-    const model = createProviderModel(config, modelId);
-    if (!model) {
-      throw new Error(`Failed to create model for ${providerType}`);
-    }
+    if (usesRawOpenAICompatibleStream(config)) {
+      const baseUrl = getOpenAICompatibleBaseUrl(config);
+      if (!baseUrl) {
+        throw new Error(`Provider ${providerType} base URL is not configured`);
+      }
 
-    // Use streamText for streaming response with abort signal
-    const result = streamText({
-      model,
-      system: systemPrompt,
-      prompt: userPrompt,
-      maxOutputTokens: AI_MAX_OUTPUT_TOKENS,
-      abortSignal: abortController.signal,
-      providerOptions: thinkingModeEnabled
-        ? buildThinkingProviderOptions()
-        : undefined,
-    });
+      await streamOpenAICompatibleChatCompletion({
+        apiKey: config.apiKey,
+        baseUrl,
+        modelId,
+        systemPrompt,
+        userPrompt,
+        maxTokens: RAW_OPENAI_COMPATIBLE_MAX_TOKENS,
+        requestBodyExtras: getThinkingModeOptions(Boolean(thinkingModeEnabled)),
+        abortSignal: abortController.signal,
+        onDelta: ({ contentDelta, reasoningDelta }) => {
+          let nextStreamState = streamState;
+
+          if (reasoningDelta) {
+            nextStreamState = applyStreamingPreviewChunk(
+              nextStreamState,
+              {
+                type: "reasoning",
+                textDelta: reasoningDelta,
+              },
+              {
+                includeReasoning: includeReasoningPreview,
+              }
+            );
+          }
 
-    // Process the full stream so providers that emit reasoning deltas before
-    // text deltas (for example, glm-5) still produce visible incremental output.
-    for await (const chunk of result.fullStream) {
-      // Check if aborted
-      if (abortController.signal.aborted) {
-        break;
-      }
+          if (contentDelta) {
+            nextStreamState = applyStreamingPreviewChunk(nextStreamState, {
+              type: "text-delta",
+              textDelta: contentDelta,
+            });
+          }
+
+          if (!hasStreamingPreviewStateChanged(streamState, nextStreamState)) {
+            return;
+          }
 
-      const nextStreamState = applyStreamingPreviewChunk(streamState, chunk, {
-        includeReasoning: includeReasoningPreview,
+          streamState = nextStreamState;
+          sendStreamingPreviewUpdate(
+            streamState,
+            contentDelta || reasoningDelta
+          );
+        },
       });
-      if (!hasStreamingPreviewStateChanged(streamState, nextStreamState)) {
-        continue;
+    } else {
+      const model = createProviderModel(config, modelId);
+      if (!model) {
+        throw new Error(`Failed to create model for ${providerType}`);
       }
-      streamState = nextStreamState;
 
-      // Send streaming data to preview
-      try {
-        sendStreamingPreviewUpdate(
-          streamState,
-          chunk.type === "text-delta" || chunk.type === "reasoning-delta"
-            ? chunk.text
-            : ""
-        );
-      } catch (error) {
-        console.warn("Failed to send shortcuts_process_data message:", error);
-        break;
+      const result = streamText({
+        model,
+        system: systemPrompt,
+        prompt: userPrompt,
+        maxOutputTokens: AI_MAX_OUTPUT_TOKENS,
+        abortSignal: abortController.signal,
+        providerOptions: thinkingModeEnabled
+          ? buildThinkingProviderOptions()
+          : undefined,
+      });
+
+      // Process the full stream so providers that emit reasoning deltas before
+      // text deltas (for example, glm-5) still produce visible incremental output.
+      for await (const chunk of result.fullStream) {
+        if (abortController.signal.aborted) {
+          break;
+        }
+
+        const nextStreamState = applyStreamingPreviewChunk(streamState, chunk, {
+          includeReasoning: includeReasoningPreview,
+        });
+        if (!hasStreamingPreviewStateChanged(streamState, nextStreamState)) {
+          continue;
+        }
+        streamState = nextStreamState;
+
+        try {
+          sendStreamingPreviewUpdate(
+            streamState,
+            chunk.type === "text-delta" || chunk.type === "reasoning-delta"
+              ? chunk.text
+              : ""
+          );
+        } catch (error) {
+          console.warn("Failed to send shortcuts_process_data message:", error);
+          break;
+        }
       }
     }
 
diff --git a/app/extension/src/sidepanel/components/AssistantMessage.tsx b/app/extension/src/sidepanel/components/AssistantMessage.tsx
index dd3c14f..eca0012 100644
--- a/app/extension/src/sidepanel/components/AssistantMessage.tsx
+++ b/app/extension/src/sidepanel/components/AssistantMessage.tsx
@@ -22,6 +22,17 @@ interface AssistantMessageProps {
   onRetryLastRun?: () => void;
 }
 
+function findLastResponseTextIndex(message: ChatMessage): number {
+  for (let index = message.parts.length - 1; index >= 0; index -= 1) {
+    const part = message.parts[index];
+    if (part.type === "text" && part.text?.trim()) {
+      return index;
+    }
+  }
+
+  return -1;
+}
+
 const AssistantMessageImpl: FC<AssistantMessageProps> = ({
   message,
   isLast,
@@ -39,9 +50,18 @@ const AssistantMessageImpl: FC<AssistantMessageProps> = ({
   const hasOnlyStatusParts =
     message.parts.length > 0 &&
     message.parts.every((part) => part.type === "status");
+  const text = useMemo(() => getMessageText(message.parts), [message.parts]);
+  const lastResponseTextIndex = useMemo(
+    () => findLastResponseTextIndex(message),
+    [message]
+  );
   const showThinkingPreview =
     thinkingMode && isLast && isRunning && !hasReasoningText;
-  const text = useMemo(() => getMessageText(message.parts), [message.parts]);
+  const showPreparingResponse =
+    isLast &&
+    isRunning &&
+    (lastResponseTextIndex === -1 ||
+      lastResponseTextIndex < message.parts.length - 1);
   const [copyFeedbackVisible, setCopyFeedbackVisible] = useState(false);
   const copyResetTimeoutRef = useRef<number | null>(null);
   const footerButtonClassName =
@@ -139,11 +159,17 @@ const AssistantMessageImpl: FC<AssistantMessageProps> = ({
           return null;
         })}
 
-        {isLast && isRunning && message.parts.length === 0 && (
-          <div className="flex h-8 items-center gap-1">
-            <span className="claude-dot" />
-            <span className="claude-dot [animation-delay:120ms]" />
-            <span className="claude-dot [animation-delay:240ms]" />
+        {showPreparingResponse && (
+          <div
+            role="status"
+            aria-label={t("common.loading")}
+            className="inline-flex h-8 items-center justify-center px-1"
+          >
+            <span aria-hidden="true" className="flex items-center gap-1">
+              <span className="claude-dot" />
+              <span className="claude-dot [animation-delay:120ms]" />
+              <span className="claude-dot [animation-delay:240ms]" />
+            </span>
           </div>
         )}