diff --git a/server/coding-cli/providers/claude.ts b/server/coding-cli/providers/claude.ts index 2720387d..a09ed738 100644 --- a/server/coding-cli/providers/claude.ts +++ b/server/coding-cli/providers/claude.ts @@ -7,7 +7,7 @@ import { getClaudeHome } from '../../claude-home.js' import type { CodingCliProvider } from '../provider.js' import { normalizeFirstUserMessage, type NormalizedEvent, type ParsedSessionMeta, type TokenSummary } from '../types.js' import { parseClaudeEvent, isMessageEvent, isResultEvent, isToolResultContent, isToolUseContent, isTextContent } from '../../claude-stream-types.js' -import { looksLikePath, isSystemContext, extractFromIdeContext, resolveGitRepoRoot } from '../utils.js' +import { looksLikePath, extractUserAuthoredText, resolveGitRepoRoot } from '../utils.js' export type JsonlMeta = { sessionId?: string @@ -376,7 +376,8 @@ export function parseSessionContent(content: string, options: ParseSessionOption if (typeof modelCandidate === 'string') model = modelCandidate } const userMessageText = extractUserMessageText(obj) - if (userMessageText !== undefined && !isSystemContext(userMessageText)) { + const userAuthoredText = typeof userMessageText === 'string' ? extractUserAuthoredText(userMessageText) : undefined + if (userAuthoredText !== undefined) { userMessageCount++ } @@ -393,20 +394,12 @@ export function parseSessionContent(content: string, options: ParseSessionOption } if (!title) { - const t = - obj?.title || - obj?.sessionTitle || - userMessageText - - if (typeof t === 'string' && t.trim()) { - // Try to extract user request from IDE-formatted context first - const ideRequest = extractFromIdeContext(t) - if (ideRequest) { - title = extractTitleFromMessage(ideRequest, 200) - } else if (!isSystemContext(t)) { - // Store up to 200 chars - UI truncates visually, tooltip shows full text - title = extractTitleFromMessage(t, 200) - } + const explicitTitle = obj?.title || obj?.sessionTitle + if (typeof explicitTitle === 'string' && explicitTitle.trim()) { + title = extractTitleFromMessage(explicitTitle, 200) + } else if (userAuthoredText) { + // Store up to 200 chars - UI truncates visually, tooltip shows full text + title = extractTitleFromMessage(userAuthoredText, 200) } } @@ -418,8 +411,8 @@ export function parseSessionContent(content: string, options: ParseSessionOption } if (!firstUserMessage) { - if (typeof userMessageText === 'string') { - const normalized = normalizeFirstUserMessage(userMessageText) + if (userAuthoredText) { + const normalized = normalizeFirstUserMessage(userAuthoredText) if (normalized) firstUserMessage = normalized } } diff --git a/server/coding-cli/providers/codex.ts b/server/coding-cli/providers/codex.ts index 1635bf04..37251538 100644 --- a/server/coding-cli/providers/codex.ts +++ b/server/coding-cli/providers/codex.ts @@ -4,7 +4,7 @@ import fsp from 'fs/promises' import { extractTitleFromMessage } from '../../title-utils.js' import type { CodingCliProvider } from '../provider.js' import { normalizeFirstUserMessage, type CodexTaskEventSnapshot, type NormalizedEvent, type ParsedSessionMeta, type TokenPayload, type TokenSummary } from '../types.js' -import { looksLikePath, isSystemContext, extractFromIdeContext, resolveGitRepoRoot } from '../utils.js' +import { looksLikePath, extractUserAuthoredText, resolveGitRepoRoot } from '../utils.js' const CODEX_MAX_PLAUSIBLE_CONTEXT_TOKENS_WITHOUT_WINDOW = 5_000_000 // Codex `model_context_window` is reduced by `effective_context_window_percent` (default 95%). @@ -318,22 +318,13 @@ export function parseCodexSessionContent(content: string): ParsedSessionMeta { if (obj?.type === 'response_item' && obj?.payload?.type === 'message' && obj?.payload?.role === 'user') { const text = extractTextContent(obj.payload.content) - const normalized = normalizeFirstUserMessage(text) + const userText = extractUserAuthoredText(text) + const normalized = userText ? normalizeFirstUserMessage(userText) : undefined if (!firstUserMessage && normalized) { firstUserMessage = normalized } - if (!title && text.trim()) { - // Try to extract user request from IDE-formatted context first - const ideRequest = extractFromIdeContext(text) - if (ideRequest) { - title = extractTitleFromMessage(ideRequest, 200) - } else if (!isSystemContext(text)) { - // Strip image markup tags so titles show the actual user request - const cleaned = text.replace(/<\/?image[^>]*>/g, '').trim() - if (cleaned) { - title = extractTitleFromMessage(cleaned, 200) - } - } + if (!title && userText) { + title = extractTitleFromMessage(userText, 200) } } diff --git a/server/coding-cli/session-indexer.ts b/server/coding-cli/session-indexer.ts index 04b99a5b..b9f2c7f1 100644 --- a/server/coding-cli/session-indexer.ts +++ b/server/coding-cli/session-indexer.ts @@ -10,7 +10,7 @@ import { extractTitleFromMessage } from '../title-utils.js' import type { CodingCliProvider } from './provider.js' import { makeSessionKey, type CodingCliSession, type CodingCliProviderName, type ProjectGroup } from './types.js' import { sanitizeCodexTaskEventsForTruncatedSnippet } from './providers/codex.js' -import { extractFromIdeContext, isSystemContext, resolveGitCheckoutRoot, resolveGitRepoRoot } from './utils.js' +import { extractUserAuthoredText, resolveGitCheckoutRoot, resolveGitRepoRoot } from './utils.js' import { diffProjects } from '../sessions-sync/diff.js' import type { SessionMetadataStore, SessionMetadataEntry } from '../session-metadata-store.js' @@ -297,9 +297,7 @@ async function readLightweightMeta(filePath: string): Promise]*>/g, '').trim() : '') + const candidate = rawText ? extractUserAuthoredText(rawText) : undefined if (candidate) { title = extractTitleFromMessage(candidate, 200) } diff --git a/server/coding-cli/utils.ts b/server/coding-cli/utils.ts index 5d16d32f..817a8058 100644 --- a/server/coding-cli/utils.ts +++ b/server/coding-cli/utils.ts @@ -334,6 +334,22 @@ export function isSystemContext(text: string): boolean { return false } +const USER_CONTEXT_TAGS = new Set([ + 'environment_context', + 'system_context', + 'system', + 'context', + 'instructions', + 'user_instructions', + 'permissions', + 'collaboration_mode', + 'skills_instructions', +]) + +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') +} + /** * Extract the actual user request from IDE-formatted context messages. * IDE context messages follow this format: @@ -362,3 +378,52 @@ export function extractFromIdeContext(text: string): string | undefined { return undefined } + +/** + * Returns only text authored as the user's task/request, excluding context that + * coding CLIs serialize as role:"user" records. + */ +export function extractUserAuthoredText(text: string): string | undefined { + const trimmed = text.trim() + if (!trimmed) return undefined + + const ideRequest = extractFromIdeContext(trimmed) + if (ideRequest) return ideRequest + + if (!isSystemContext(trimmed)) { + const cleaned = trimmed.replace(/<\/?image[^>]*>/g, '').trim() + return cleaned || undefined + } + + let rest = trimmed + let removedStructuredBlock = false + for (;;) { + const before = rest + rest = rest.trim() + + const agentsHeader = rest.match(/^#\s*AGENTS(?:\.md)? instructions[^\n]*(?:\n|$)/i) + if (agentsHeader) { + rest = rest.slice(agentsHeader[0].length) + continue + } + + const xmlOpen = rest.match(/^<([a-zA-Z_][\w-]*)\b[^>]*>/) + if (xmlOpen) { + const tag = xmlOpen[1].toLowerCase() + if (!USER_CONTEXT_TAGS.has(tag)) return undefined + const closePattern = new RegExp(``, 'i') + const close = closePattern.exec(rest) + if (!close) return undefined + rest = rest.slice((close.index ?? 0) + close[0].length) + removedStructuredBlock = true + continue + } + + if (rest === before) break + } + + if (!removedStructuredBlock) return undefined + + const cleaned = rest.replace(/<\/?image[^>]*>/g, '').trim() + return cleaned || undefined +} diff --git a/test/unit/server/coding-cli/claude-provider.test.ts b/test/unit/server/coding-cli/claude-provider.test.ts index 04d343e6..4a968236 100644 --- a/test/unit/server/coding-cli/claude-provider.test.ts +++ b/test/unit/server/coding-cli/claude-provider.test.ts @@ -776,6 +776,7 @@ describe('claude provider cross-platform tests', () => { const meta = parseSessionContent(content) expect(meta.title).toBe('Build the feature') + expect(meta.firstUserMessage).toBe('Build the feature') }) it('skips XML-wrapped system context', () => { @@ -859,6 +860,7 @@ describe('claude provider cross-platform tests', () => { const meta = parseSessionContent(content) expect(meta.title).toBeUndefined() + expect(meta.firstUserMessage).toBeUndefined() }) it('extracts user request from IDE context messages', () => { diff --git a/test/unit/server/coding-cli/codex-provider.test.ts b/test/unit/server/coding-cli/codex-provider.test.ts index f09174f2..21afa3ec 100644 --- a/test/unit/server/coding-cli/codex-provider.test.ts +++ b/test/unit/server/coding-cli/codex-provider.test.ts @@ -673,6 +673,7 @@ describe('codex-provider', () => { const meta = parseCodexSessionContent(content) expect(meta.title).toBe('Review the current code changes') + expect(meta.firstUserMessage).toBe('Review the current code changes') }) it('skips messages starting with XML tags like ', () => { @@ -897,6 +898,39 @@ describe('codex-provider', () => { const meta = parseCodexSessionContent(content) expect(meta.title).toBeUndefined() + expect(meta.firstUserMessage).toBeUndefined() + }) + + it('extracts firstUserMessage from the request inside IDE context', () => { + const ideMessage = [ + '# Context from my IDE setup:', + '', + '## My codebase', + 'This is a React project...', + '', + '## My request for Codex:', + 'Fix the authentication bug in the login form', + ].join('\n') + + const content = [ + JSON.stringify({ + type: 'session_meta', + payload: { id: 'session-ide-first-user', cwd: '/project' }, + }), + JSON.stringify({ + type: 'response_item', + payload: { + type: 'message', + role: 'user', + content: [{ type: 'input_text', text: ideMessage }], + }, + }), + ].join('\n') + + const meta = parseCodexSessionContent(content) + + expect(meta.title).toBe('Fix the authentication bug in the login form') + expect(meta.firstUserMessage).toBe('Fix the authentication bug in the login form') }) it('extracts user request from IDE context messages', () => { diff --git a/test/unit/server/coding-cli/utils.test.ts b/test/unit/server/coding-cli/utils.test.ts index 3aa2102c..5a091df1 100644 --- a/test/unit/server/coding-cli/utils.test.ts +++ b/test/unit/server/coding-cli/utils.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect } from 'vitest' -import { isSystemContext, extractFromIdeContext } from '../../../../server/coding-cli/utils' +import { isSystemContext, extractFromIdeContext, extractUserAuthoredText } from '../../../../server/coding-cli/utils' describe('isSystemContext()', () => { describe('XML-wrapped context', () => { @@ -200,3 +200,23 @@ describe('extractFromIdeContext()', () => { expect(extractFromIdeContext(text)).toBeUndefined() }) }) + +describe('extractUserAuthoredText()', () => { + it('skips leading AGENTS instructions and returns trailing user request', () => { + const text = [ + '# AGENTS.md instructions for /project', + '', + '', + 'Prefer bash to powershell.', + '', + '', + 'Find, root cause, investigate, etc.', + ].join('\n') + + expect(extractUserAuthoredText(text)).toBe('Find, root cause, investigate, etc.') + }) + + it('does not treat plain AGENTS instruction text as a user request', () => { + expect(extractUserAuthoredText('# AGENTS.md instructions\n\nFollow these rules...')).toBeUndefined() + }) +})