tinyhumansai · senamakel · May 23, 2026 · May 21, 2026 · May 21, 2026 · graycyrus
@@ -3,7 +3,12 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 
 import type { ChatEventListeners } from '../../services/chatService';
 import { VISEMES } from './Mascot/visemes';
-import { ACK_FACE_HOLD_MS, pickViseme, useHumanMascot } from './useHumanMascot';
+import {
+  ACK_FACE_HOLD_MS,
+  pickConversationAckFace,
+  pickViseme,
+  useHumanMascot,
+} from './useHumanMascot';
 import { type PlaybackHandle, playBase64Audio } from './voice/audioPlayer';
 import { synthesizeSpeech } from './voice/ttsClient';
 
@@ -133,6 +138,46 @@ describe('pickViseme', () => {
   });
 });
 
+describe('pickConversationAckFace', () => {
+  it('prefers explicit reaction emoji from chat_done', () => {
+    expect(pickConversationAckFace({ full_response: 'Done', reaction_emoji: '✅' })).toBe('happy');
+    expect(pickConversationAckFace({ full_response: 'Done', reaction_emoji: '🤔' })).toBe(
+      'confused'
+    );
+    expect(pickConversationAckFace({ full_response: 'Done', reaction_emoji: '⚠️' })).toBe(
+      'concerned'
+    );
+  });
+
+  it('falls back to deterministic response text cues', () => {
+    expect(
+      pickConversationAckFace({ full_response: 'All set, this is fixed.', reaction_emoji: null })
+    ).toBe('happy');
+    expect(
+      pickConversationAckFace({
+        full_response: 'I need more detail to clarify which workspace you mean.',
+        reaction_emoji: null,
+      })
+    ).toBe('confused');
+    expect(
+      pickConversationAckFace({
+        full_response: 'Sorry, the provider failed and I cannot continue.',
+        reaction_emoji: null,
+      })
+    ).toBe('concerned');
+  });
+
+  it('returns null when there is no strong cue', () => {
+    expect(
+      pickConversationAckFace({ full_response: 'Here is the summary.', reaction_emoji: null })
+    ).toBeNull();
+  });
+
+  it('returns null when the response text is missing', () => {
+    expect(pickConversationAckFace({ reaction_emoji: null })).toBeNull();
+  });
+});
+
 describe('useHumanMascot state machine', () => {
   beforeEach(() => {
     capturedListeners = null;
@@ -226,6 +271,42 @@ describe('useHumanMascot state machine', () => {
     expect(result.current.face).toBe('idle');
   });
 
+  it('uses reaction emoji for the post-turn acknowledgement face', () => {
+    const { result } = renderHook(() => useHumanMascot({ speakReplies: false }));
+    act(() => {
+      capturedListeners?.onDone?.(
+        fakeEvent({
+          full_response: 'I need more detail before I can choose.',
+          reaction_emoji: '🤔',
+          rounds_used: 1,
+          total_input_tokens: 1,
+          total_output_tokens: 1,
+        })
+      );
+    });
+    expect(result.current.face).toBe('confused');
+    act(() => {
+      vi.advanceTimersByTime(ACK_FACE_HOLD_MS + 1);
+    });
+    expect(result.current.face).toBe('idle');
+  });
+
+  it('uses response text cues when no reaction emoji is present', () => {
+    const { result } = renderHook(() => useHumanMascot({ speakReplies: false }));
+    act(() => {
+      capturedListeners?.onDone?.(
+        fakeEvent({
+          full_response: 'Sorry, that failed because the provider is unavailable.',
+          reaction_emoji: null,
+          rounds_used: 1,
+          total_input_tokens: 1,
+          total_output_tokens: 1,
+        })
+      );
+    });
+    expect(result.current.face).toBe('concerned');
+  });
+
   it('holds concerned briefly on chat_error, then idles', () => {
     const { result } = renderHook(() => useHumanMascot());
     act(() => {
@@ -518,6 +599,28 @@ describe('useHumanMascot TTS playback', () => {
     expect(result.current.face).toBe('idle');
   });
 
+  it('shows concerned when audio playback cannot start', async () => {
+    (synthesizeSpeech as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+      audio_base64: 'AAA=',
+      audio_mime: 'audio/mpeg',
+      visemes: [{ viseme: 'aa', start_ms: 0, end_ms: 100 }],
+    });
+    (playBase64Audio as ReturnType<typeof vi.fn>).mockRejectedValueOnce(new Error('decode failed'));
+
+    const { result } = renderHook(() => useHumanMascot({ speakReplies: true }));
+    await act(async () => {
+      capturedListeners?.onDone?.(fakeDone('All set, this is fixed.'));
+      await Promise.resolve();
+      await Promise.resolve();
+      await Promise.resolve();
+    });
+    expect(result.current.face).toBe('concerned');
+    act(() => {
+      vi.advanceTimersByTime(ACK_FACE_HOLD_MS + 1);
+    });
+    expect(result.current.face).toBe('idle');
+  });
+
   // Issue #1762 — the user-selected mascot voice id flows through to
   // every TTS RPC the hook makes. The store-stub at module scope lets
   // these specs pin the prop without standing up a Redux Provider.

@@ -76,6 +76,40 @@ export function pickViseme(delta: string): VisemeShape {
   }
 }
 
+type ConversationAckFace = Extract<MascotFace, 'happy' | 'confused' | 'concerned'>;
+type ConversationAckEvent = { full_response?: string | null; reaction_emoji?: string | null };
+
+const HAPPY_REACTION_EMOJIS = new Set(['✅', '🎉', '🙌', '😊', '😄', '👍', '💪']);
+const CONFUSED_REACTION_EMOJIS = new Set(['🤔', '❓', '❔']);
+const CONCERNED_REACTION_EMOJIS = new Set(['⚠️', '⚠', '🚨', '❌', '😕', '😟']);
+
+const CONCERNED_TEXT_RE =
+  /\b(sorry|apolog(?:y|ize|ise)|failed|failure|error|cannot|can't|unable|blocked|problem)\b/i;
+const CONFUSED_TEXT_RE =
+  /\b(not sure|unclear|ambiguous|clarify|which one|need more|can you confirm|maybe)\b/i;
+const HAPPY_TEXT_RE = /\b(done|completed|fixed|success|successful|ready|all set|great|nice)\b/i;
+
+/**
+ * Map conversation-level meaning into the short acknowledgement face that
+ * follows a completed turn. Runtime activity still owns thinking/speaking
+ * states; this only decides the post-turn emotional beat.
+ */
+export function pickConversationAckFace(event: ConversationAckEvent): ConversationAckFace | null {
+  const reaction = event.reaction_emoji?.trim();
+  if (reaction) {
+    if (HAPPY_REACTION_EMOJIS.has(reaction)) return 'happy';
+    if (CONFUSED_REACTION_EMOJIS.has(reaction)) return 'confused';
+    if (CONCERNED_REACTION_EMOJIS.has(reaction)) return 'concerned';
+  }
+
+  const text = event.full_response?.trim() ?? '';
+  if (!text) return null;
+  if (CONCERNED_TEXT_RE.test(text)) return 'concerned';
+  if (CONFUSED_TEXT_RE.test(text)) return 'confused';
+  if (HAPPY_TEXT_RE.test(text)) return 'happy';
+  return null;
+}
+
 export interface UseHumanMascotOptions {
   /** When true, post-stream replies are sent to ElevenLabs and the mouth
    *  follows the returned viseme timeline while the audio plays. */
@@ -99,9 +133,9 @@ export interface UseHumanMascotResult {
  * - `iteration_start` round > 1 or `tool_call` → `confused` (heavy reasoning)
  * - `tool_result success=false` → `concerned` (held briefly)
  * - `text_delta` → `speaking`, pseudo-lipsync from the trailing letter
- * - `chat_done` (no TTS) → `happy` (held briefly), then `idle`
+ * - `chat_done` (no TTS) → message-aware ack face (held briefly), then `idle`
  * - `chat_done` (TTS enabled) → `thinking` while synthesizing → `speaking`
- *   with real visemes → `idle` when the audio ends
+ *   with real visemes → message-aware ack face when the audio ends
  * - `chat_error`, TTS failure → `concerned` (held briefly), then `idle`
  * - `listening` option override → `listening` (highest priority)
  *
@@ -187,13 +221,14 @@ export function useHumanMascot(options: UseHumanMascotOptions = {}): UseHumanMas
         lastDeltaAtRef.current = window.performance.now();
       },
       onDone: e => {
+        const ackFace = pickConversationAckFace(e) ?? 'happy';
         if (!speakRef.current || !e.full_response?.trim()) {
           // Soft acknowledgement beat instead of snapping back to idle.
-          holdThenIdle('happy');
+          holdThenIdle(ackFace);
           return;
         }
         // Fire-and-forget — startTtsPlayback owns its cleanup via finally.
-        void startTtsPlayback(e.full_response).catch(() => {});
+        void startTtsPlayback(e.full_response, ackFace).catch(() => {});
       },
       onError: () => {
         // Bump seq to invalidate any in-flight startTtsPlayback awaiters.
@@ -225,7 +260,10 @@ export function useHumanMascot(options: UseHumanMascotOptions = {}): UseHumanMas
     };
   }, []);
 
-  async function startTtsPlayback(text: string): Promise<void> {
+  async function startTtsPlayback(
+    text: string,
+    ackFace: ConversationAckFace = 'happy'
+  ): Promise<void> {
     // Cancel any in-flight playback so its handle.ended callback can't reset
     // state belonging to the new run.
     const prev = playbackRef.current;
@@ -313,14 +351,17 @@ export function useHumanMascot(options: UseHumanMascotOptions = {}): UseHumanMas
         // rethrow anything else so real decoder errors aren't masked.
         swallowAudioStop(err);
       }
+    } catch (err) {
+      if (isStillCurrent()) degraded = true;
+      throw err;
     } finally {
       if (isStillCurrent()) {
         playbackRef.current = null;
         visemeFramesRef.current = [];
         if (degraded) {
           holdThenIdle('concerned');
         } else {
-          holdThenIdle('happy');
+          holdThenIdle(ackFace);
         }
       }
     }

@@ -30,6 +30,8 @@ This is the headline use case and has its own page, see [Meeting Agents](meeting
 
 The mascot has mood states (idle, thinking, listening, talking, surprised, dreaming) and it transitions between them based on what the agent is doing. When you start typing it shifts into a listening pose. When the model is reasoning, it shows that. When a tool call returns something noteworthy, it reacts. When you stop interacting for a while, it drifts into idle.
 
+After a turn finishes, the desktop mascot also reads the conversation-level cue that arrives with the chat result. A success cue produces a short happy acknowledgement, uncertainty produces a confused acknowledgement, and warnings or failed outcomes produce a concerned acknowledgement. If no strong cue is present, it keeps the existing calm post-turn acknowledgement and falls back to idle.
+
 It is meant to feel alive, not animated-on-rails.
 
 ### It remembers you