-
Notifications
You must be signed in to change notification settings - Fork 2.4k
feat(mascot): react to conversation cues #2423
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -76,6 +76,40 @@ export function pickViseme(delta: string): VisemeShape { | |
| } | ||
| } | ||
|
|
||
| type ConversationAckFace = Extract<MascotFace, 'happy' | 'confused' | 'concerned'>; | ||
| type ConversationAckEvent = { full_response?: string | null; reaction_emoji?: string | null }; | ||
|
|
||
| const HAPPY_REACTION_EMOJIS = new Set(['✅', '🎉', '🙌', '😊', '😄', '👍', '💪']); | ||
| const CONFUSED_REACTION_EMOJIS = new Set(['🤔', '❓', '❔']); | ||
| const CONCERNED_REACTION_EMOJIS = new Set(['⚠️', '⚠', '🚨', '❌', '😕', '😟']); | ||
|
|
||
| const CONCERNED_TEXT_RE = | ||
| /\b(sorry|apolog(?:y|ize|ise)|failed|failure|error|cannot|can't|unable|blocked|problem)\b/i; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [minor] |
||
| const CONFUSED_TEXT_RE = | ||
| /\b(not sure|unclear|ambiguous|clarify|which one|need more|can you confirm|maybe)\b/i; | ||
| const HAPPY_TEXT_RE = /\b(done|completed|fixed|success|successful|ready|all set|great|nice)\b/i; | ||
|
|
||
| /** | ||
| * Map conversation-level meaning into the short acknowledgement face that | ||
| * follows a completed turn. Runtime activity still owns thinking/speaking | ||
| * states; this only decides the post-turn emotional beat. | ||
| */ | ||
| export function pickConversationAckFace(event: ConversationAckEvent): ConversationAckFace | null { | ||
| const reaction = event.reaction_emoji?.trim(); | ||
| if (reaction) { | ||
| if (HAPPY_REACTION_EMOJIS.has(reaction)) return 'happy'; | ||
| if (CONFUSED_REACTION_EMOJIS.has(reaction)) return 'confused'; | ||
| if (CONCERNED_REACTION_EMOJIS.has(reaction)) return 'concerned'; | ||
| } | ||
|
|
||
| const text = event.full_response?.trim() ?? ''; | ||
| if (!text) return null; | ||
| if (CONCERNED_TEXT_RE.test(text)) return 'concerned'; | ||
| if (CONFUSED_TEXT_RE.test(text)) return 'confused'; | ||
| if (HAPPY_TEXT_RE.test(text)) return 'happy'; | ||
| return null; | ||
| } | ||
|
|
||
| export interface UseHumanMascotOptions { | ||
| /** When true, post-stream replies are sent to ElevenLabs and the mouth | ||
| * follows the returned viseme timeline while the audio plays. */ | ||
|
|
@@ -99,9 +133,9 @@ export interface UseHumanMascotResult { | |
| * - `iteration_start` round > 1 or `tool_call` → `confused` (heavy reasoning) | ||
| * - `tool_result success=false` → `concerned` (held briefly) | ||
| * - `text_delta` → `speaking`, pseudo-lipsync from the trailing letter | ||
| * - `chat_done` (no TTS) → `happy` (held briefly), then `idle` | ||
| * - `chat_done` (no TTS) → message-aware ack face (held briefly), then `idle` | ||
| * - `chat_done` (TTS enabled) → `thinking` while synthesizing → `speaking` | ||
| * with real visemes → `idle` when the audio ends | ||
| * with real visemes → message-aware ack face when the audio ends | ||
| * - `chat_error`, TTS failure → `concerned` (held briefly), then `idle` | ||
| * - `listening` option override → `listening` (highest priority) | ||
| * | ||
|
|
@@ -187,13 +221,14 @@ export function useHumanMascot(options: UseHumanMascotOptions = {}): UseHumanMas | |
| lastDeltaAtRef.current = window.performance.now(); | ||
| }, | ||
| onDone: e => { | ||
| const ackFace = pickConversationAckFace(e) ?? 'happy'; | ||
| if (!speakRef.current || !e.full_response?.trim()) { | ||
| // Soft acknowledgement beat instead of snapping back to idle. | ||
| holdThenIdle('happy'); | ||
| holdThenIdle(ackFace); | ||
| return; | ||
| } | ||
| // Fire-and-forget — startTtsPlayback owns its cleanup via finally. | ||
| void startTtsPlayback(e.full_response).catch(() => {}); | ||
| void startTtsPlayback(e.full_response, ackFace).catch(() => {}); | ||
| }, | ||
| onError: () => { | ||
| // Bump seq to invalidate any in-flight startTtsPlayback awaiters. | ||
|
|
@@ -225,7 +260,10 @@ export function useHumanMascot(options: UseHumanMascotOptions = {}): UseHumanMas | |
| }; | ||
| }, []); | ||
|
|
||
| async function startTtsPlayback(text: string): Promise<void> { | ||
| async function startTtsPlayback( | ||
| text: string, | ||
| ackFace: ConversationAckFace = 'happy' | ||
| ): Promise<void> { | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| // Cancel any in-flight playback so its handle.ended callback can't reset | ||
| // state belonging to the new run. | ||
| const prev = playbackRef.current; | ||
|
|
@@ -313,14 +351,17 @@ export function useHumanMascot(options: UseHumanMascotOptions = {}): UseHumanMas | |
| // rethrow anything else so real decoder errors aren't masked. | ||
| swallowAudioStop(err); | ||
| } | ||
| } catch (err) { | ||
| if (isStillCurrent()) degraded = true; | ||
| throw err; | ||
| } finally { | ||
| if (isStillCurrent()) { | ||
| playbackRef.current = null; | ||
| visemeFramesRef.current = []; | ||
| if (degraded) { | ||
| holdThenIdle('concerned'); | ||
| } else { | ||
| holdThenIdle('happy'); | ||
| holdThenIdle(ackFace); | ||
| } | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[minor]
HAPPY_REACTION_EMOJIScontains👍but won't match skin-tone variants like👍🏽(they're distinct Unicode sequences). Same for other emoji sets. Probably fine if the backend sends base codepoints, but worth a note if you later see mismatches.