diff --git a/package-lock.json b/package-lock.json index 6e703490..0f664d5a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,6 +9,7 @@ "version": "9.0.0", "license": "MIT", "dependencies": { + "@paralleldrive/cuid2": "^3.3.0", "cheerio": "1.2.0", "dotignore": "^0.1.2", "error-causes": "^3.0.2", @@ -1162,6 +1163,18 @@ "node": ">= 0.4" } }, + "node_modules/@noble/hashes": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/@noble/hashes/-/hashes-2.0.1.tgz", + "integrity": "sha512-XlOlEbQcE9fmuXxrVTXCTlG2nlRXa9Rj3rr5Ue/+tX+nmkgbX720YHh0VR3hBF9xDvwnb8D2shVGOwNx+ulArw==", + "license": "MIT", + "engines": { + "node": ">= 20.19.0" + }, + "funding": { + "url": "https://paulmillr.com/funding/" + } + }, "node_modules/@nodeutils/defaults-deep": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@nodeutils/defaults-deep/-/defaults-deep-1.1.0.tgz", @@ -1338,6 +1351,20 @@ "@octokit/openapi-types": "^27.0.0" } }, + "node_modules/@paralleldrive/cuid2": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/@paralleldrive/cuid2/-/cuid2-3.3.0.tgz", + "integrity": "sha512-OqiFvSOF0dBSesELYY2CAMa4YINvlLpvKOz/rv6NeZEqiyttlHgv98Juwv4Ch+GrEV7IZ8jfI2VcEoYUjXXCjw==", + "license": "MIT", + "dependencies": { + "@noble/hashes": "^2.0.1", + "bignumber.js": "^9.3.1", + "error-causes": "^3.0.2" + }, + "bin": { + "cuid2": "bin/cuid2.js" + } + }, "node_modules/@phun-ky/typeof": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/@phun-ky/typeof/-/typeof-2.0.3.tgz", @@ -2127,6 +2154,15 @@ "dev": true, "license": "Apache-2.0" }, + "node_modules/bignumber.js": { + "version": "9.3.1", + "resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.3.1.tgz", + "integrity": "sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ==", + "license": "MIT", + "engines": { + "node": "*" + } + }, "node_modules/boolbase": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", diff --git a/package.json b/package.json index c8386f8d..7b2039f7 100644 --- a/package.json +++ b/package.json @@ -104,6 +104,7 @@ "watch": "^1.0.2" }, "dependencies": { + "@paralleldrive/cuid2": "^3.3.0", "cheerio": "1.2.0", "dotignore": "^0.1.2", "error-causes": "^3.0.2", diff --git a/source/agent-config.js b/source/agent-config.js index f49a18d5..2c771f1d 100644 --- a/source/agent-config.js +++ b/source/agent-config.js @@ -1,21 +1,9 @@ import { readFile } from 'fs/promises'; import { z } from 'zod'; import { createError } from 'error-causes'; -import { ValidationError } from './ai-errors.js'; +import { ValidationError, AgentConfigReadError, AgentConfigParseError, AgentConfigValidationError, formatZodError } from './ai-errors.js'; import { parseOpenCodeNDJSON } from './agent-parser.js'; -/** - * Format Zod validation errors into a human-readable message. - * @param {any} zodError - Zod validation error - * @returns {string} Formatted error message - */ -export const formatZodError = (zodError) => { - const issues = zodError.issues || zodError.errors; - return issues - ? issues.map(e => `${e.path.join('.')}: ${e.message}`).join('; ') - : zodError.message || 'Validation failed'; -}; - /** * Get agent configuration based on agent name. * Supports 'claude', 'opencode', and 'cursor' agents. @@ -62,9 +50,8 @@ const readAgentConfigFile = async ({ configPath }) => { return await readFile(configPath, 'utf-8'); } catch (err) { throw createError({ - ...ValidationError, + ...AgentConfigReadError, message: `Failed to read agent config file: ${configPath}`, - code: 'AGENT_CONFIG_READ_ERROR', cause: err }); } @@ -75,9 +62,8 @@ const parseJson = ({ configPath, raw }) => { return JSON.parse(raw); } catch (err) { throw createError({ - ...ValidationError, + ...AgentConfigParseError, message: `Agent config file is not valid JSON: ${configPath}`, - code: 'AGENT_CONFIG_PARSE_ERROR', cause: err }); } @@ -88,9 +74,8 @@ const validateAgentConfig = (parsed) => { return agentConfigFileSchema.parse(parsed); } catch (zodError) { throw createError({ - ...ValidationError, + ...AgentConfigValidationError, message: `Invalid agent config: ${formatZodError(zodError)}`, - code: 'AGENT_CONFIG_VALIDATION_ERROR', cause: zodError }); } diff --git a/source/agent-config.test.js b/source/agent-config.test.js index 964f9ded..c6d453dc 100644 --- a/source/agent-config.test.js +++ b/source/agent-config.test.js @@ -1,7 +1,8 @@ import { describe, test } from 'vitest'; import { assert } from './vitest.js'; import { Try } from './riteway.js'; -import { formatZodError, getAgentConfig, loadAgentConfig } from './agent-config.js'; +import { handleAIErrors, allNoop, formatZodError } from './ai-errors.js'; +import { getAgentConfig, loadAgentConfig } from './agent-config.js'; describe('formatZodError()', () => { test('formats a single issue', () => { @@ -162,18 +163,14 @@ describe('getAgentConfig()', () => { test('throws ValidationError for invalid agent name', () => { const error = Try(getAgentConfig, 'invalid-agent'); - assert({ - given: 'invalid agent name', - should: 'throw Error with cause', - actual: error instanceof Error && error.cause !== undefined, - expected: true - }); + const invoked = []; + handleAIErrors({ ...allNoop, ValidationError: () => invoked.push('ValidationError') })(error); assert({ given: 'invalid agent name', - should: 'have ValidationError name in cause', - actual: error?.cause?.name, - expected: 'ValidationError' + should: 'throw an error that routes to the ValidationError handler', + actual: invoked, + expected: ['ValidationError'] }); assert({ @@ -218,78 +215,45 @@ describe('loadAgentConfig()', () => { }); }); - test('throws ValidationError with AGENT_CONFIG_PARSE_ERROR for invalid JSON', async () => { + test('throws AgentConfigParseError for invalid JSON', async () => { const error = await Try(loadAgentConfig, './source/fixtures/invalid-agent-config.txt'); - assert({ - given: 'invalid JSON file', - should: 'throw Error with cause', - actual: error instanceof Error && error.cause !== undefined, - expected: true - }); - - assert({ - given: 'invalid JSON file', - should: 'have ValidationError name in cause', - actual: error?.cause?.name, - expected: 'ValidationError' - }); + const invoked = []; + handleAIErrors({ ...allNoop, AgentConfigParseError: () => invoked.push('AgentConfigParseError') })(error); assert({ given: 'invalid JSON file', - should: 'have AGENT_CONFIG_PARSE_ERROR code in cause', - actual: error?.cause?.code, - expected: 'AGENT_CONFIG_PARSE_ERROR' + should: 'throw an error that routes to the AgentConfigParseError handler', + actual: invoked, + expected: ['AgentConfigParseError'] }); }); - test('throws ValidationError with AGENT_CONFIG_VALIDATION_ERROR when command field missing', async () => { + test('throws AgentConfigValidationError when command field missing', async () => { const error = await Try(loadAgentConfig, './source/fixtures/no-command-agent-config.json'); - assert({ - given: 'config file missing command field', - should: 'throw Error with cause', - actual: error instanceof Error && error.cause !== undefined, - expected: true - }); - - assert({ - given: 'config file missing command field', - should: 'have ValidationError name in cause', - actual: error?.cause?.name, - expected: 'ValidationError' - }); + const invoked = []; + handleAIErrors({ ...allNoop, AgentConfigValidationError: () => invoked.push('AgentConfigValidationError') })(error); assert({ given: 'config file missing command field', - should: 'have AGENT_CONFIG_VALIDATION_ERROR code in cause', - actual: error?.cause?.code, - expected: 'AGENT_CONFIG_VALIDATION_ERROR' + should: 'throw an error that routes to the AgentConfigValidationError handler', + actual: invoked, + expected: ['AgentConfigValidationError'] }); }); - test('throws ValidationError with AGENT_CONFIG_READ_ERROR for nonexistent file', async () => { + test('throws AgentConfigReadError for nonexistent file', async () => { const error = await Try(loadAgentConfig, './nonexistent/path.json'); - assert({ - given: 'nonexistent file path', - should: 'throw Error with cause', - actual: error instanceof Error && error.cause !== undefined, - expected: true - }); - - assert({ - given: 'nonexistent file path', - should: 'have ValidationError name in cause', - actual: error?.cause?.name, - expected: 'ValidationError' - }); + const invoked = []; + handleAIErrors({ ...allNoop, AgentConfigReadError: () => invoked.push('AgentConfigReadError') })(error); assert({ given: 'nonexistent file path', - should: 'have AGENT_CONFIG_READ_ERROR code in cause', - actual: error?.cause?.code, - expected: 'AGENT_CONFIG_READ_ERROR' + should: 'throw an error that routes to the AgentConfigReadError handler', + actual: invoked, + expected: ['AgentConfigReadError'] }); }); }); diff --git a/source/ai-errors.js b/source/ai-errors.js index 6a214b8a..cff7a4a0 100644 --- a/source/ai-errors.js +++ b/source/ai-errors.js @@ -10,7 +10,10 @@ export const [aiErrors, handleAIErrors] = errorCauses({ AITestError: { code: 'AI_TEST_ERROR', message: 'AI test execution failed' }, OutputError: { code: 'OUTPUT_ERROR', message: 'Test output recording failed' }, ExtractionParseError: { code: 'EXTRACTION_PARSE_FAILURE', message: 'Failed to parse extraction result' }, - ExtractionValidationError: { code: 'EXTRACTION_VALIDATION_FAILURE', message: 'Invalid extraction result' } + ExtractionValidationError: { code: 'EXTRACTION_VALIDATION_FAILURE', message: 'Invalid extraction result' }, + AgentConfigReadError: { code: 'AGENT_CONFIG_READ_ERROR', message: 'Failed to read agent config file' }, + AgentConfigParseError: { code: 'AGENT_CONFIG_PARSE_ERROR', message: 'Agent config file is not valid JSON' }, + AgentConfigValidationError: { code: 'AGENT_CONFIG_VALIDATION_ERROR', message: 'Invalid agent config' } }); // handleAIErrors is exhaustive — every registered type must have a handler. @@ -26,5 +29,22 @@ export const { AITestError, OutputError, ExtractionParseError, - ExtractionValidationError + ExtractionValidationError, + AgentConfigReadError, + AgentConfigParseError, + AgentConfigValidationError } = aiErrors; + +/** + * Format Zod validation errors into a human-readable message. + * Lives here because ai-runner.js also needs Zod error formatting; importing + * a utility from agent-config.js would create a wrong dependency direction. + * @param {any} zodError - Zod validation error + * @returns {string} Formatted error message + */ +export const formatZodError = (zodError) => { + const issues = zodError.issues || zodError.errors; + return issues + ? issues.map(e => `${e.path.join('.')}: ${e.message}`).join('; ') + : zodError.message || 'Validation failed'; +}; diff --git a/source/ai-runner.js b/source/ai-runner.js new file mode 100644 index 00000000..53e190d9 --- /dev/null +++ b/source/ai-runner.js @@ -0,0 +1,231 @@ +import { readFile } from 'fs/promises'; +import { executeAgent } from './execute-agent.js'; +import { extractTests, buildResultPrompt, buildJudgePrompt } from './test-extractor.js'; +import { createDebugLogger } from './debug-logger.js'; +import { limitConcurrency } from './limit-concurrency.js'; +import { normalizeJudgment, aggregatePerAssertionResults } from './aggregation.js'; +import { parseTAPYAML } from './tap-yaml.js'; +import { verifyAgentAuthentication as verifyAuth } from './validation.js'; + +export const readTestFile = (filePath) => readFile(filePath, 'utf-8'); + +export const verifyAgentAuthentication = (options) => verifyAuth({ ...options, executeAgent }); + +const extractStructuredTests = async ({ + testContent, + testFilePath, + agentConfig, + timeout, + debug, + projectRoot, + logger +}) => { + logger.log(`\nExtracting tests from: ${testFilePath}`); + logger.log(`Test content length: ${testContent.length} characters`); + + const { userPrompt, promptUnderTest, assertions } = await extractTests({ + testContent, + testFilePath, + agentConfig, + timeout, + debug, + projectRoot, + logger + }); + + logger.log(`Extracted ${assertions.length} assertions`); + + const resultPrompt = buildResultPrompt({ userPrompt, promptUnderTest }); + + return { userPrompt, promptUnderTest, assertions, resultPrompt }; +}; + +const judgeAssertion = async ({ + assertion, + result, + userPrompt, + promptUnderTest, + runIndex, + assertionIndex, + totalAssertions, + agentConfig, + timeout, + debug, + logFile, + logger +}) => { + const judgePrompt = buildJudgePrompt({ + userPrompt, + promptUnderTest, + result, + requirement: assertion.requirement + }); + + logger.log(` Assertion ${assertionIndex + 1}/${totalAssertions}: ${assertion.requirement}`); + + const judgeOutput = await executeAgent({ + agentConfig, + prompt: judgePrompt, + timeout, + debug, + logFile, + rawOutput: true + }); + + const parsed = parseTAPYAML(judgeOutput); + return normalizeJudgment({ + judgeResponse: parsed, + requirement: assertion.requirement, + runIndex, + logger + }); +}; + +const executeSingleRun = async ({ + runIndex, + extracted, + resultPrompt, + runs, + agentConfig, + timeout, + debug, + logFile, + logger +}) => { + const { userPrompt, promptUnderTest, assertions } = extracted; + + logger.log(`\nRun ${runIndex + 1}/${runs}: Calling result agent...`); + + const result = await executeAgent({ + agentConfig, + prompt: resultPrompt, + timeout, + debug, + logFile, + rawOutput: true + }); + + logger.log(`Result obtained (${result.length} chars). Judging ${assertions.length} assertions...`); + + const judgments = await Promise.all( + assertions.map((assertion, assertionIndex) => + judgeAssertion({ + assertion, + result, + userPrompt, + promptUnderTest, + runIndex, + assertionIndex, + totalAssertions: assertions.length, + agentConfig, + timeout, + debug, + logFile, + logger + }) + ) + ); + + return judgments; +}; + +const executeRuns = ({ + extracted, + resultPrompt, + runs, + concurrency, + agentConfig, + timeout, + debug, + logFile, + logger +}) => { + const runTasks = Array.from({ length: runs }, (_, runIndex) => async () => + executeSingleRun({ + runIndex, + extracted, + resultPrompt, + runs, + agentConfig, + timeout, + debug, + logFile, + logger + }) + ); + + return limitConcurrency(runTasks, concurrency); +}; + +const aggregateResults = ({ assertions, allRunJudgments, threshold, runs }) => { + const perAssertionResults = assertions.map(({ requirement }, assertionIndex) => ({ + requirement, + runResults: allRunJudgments.map(runJudgments => runJudgments[assertionIndex]) + })); + + return aggregatePerAssertionResults({ perAssertionResults, threshold, runs }); +}; + +/** + * Run AI tests with two-agent pattern: result agent + judge agent. + * Pipeline: readTestFile → extractTests → result agent (once per run) → judge agents (per assertion, parallel) → aggregation. + * + * @param {Object} options + * @param {string} options.filePath - Path to test file + * @param {number} [options.runs=4] - Number of test runs per assertion + * @param {number} [options.threshold=75] - Required pass percentage (0-100) + * @param {Object} options.agentConfig - Agent CLI configuration + * @param {string} options.agentConfig.command - Command to execute + * @param {Array} [options.agentConfig.args=[]] - Command arguments + * @param {number} [options.timeout=300000] - Timeout in milliseconds (default: 5 minutes) + * @param {number} [options.concurrency=4] - Maximum concurrent runs + * @param {boolean} [options.debug=false] - Enable debug logging + * @param {string} [options.logFile] - Optional log file path for debug output + * @param {string} [options.projectRoot=process.cwd()] - Project root directory for resolving import paths + * @returns {Promise} Aggregated per-assertion test results + */ +export const runAITests = async ({ + filePath, + runs = 4, + threshold = 75, + timeout = 300000, + concurrency = 4, + debug = false, + logFile, + projectRoot = process.cwd(), + agentConfig = { + command: 'claude', + args: ['-p', '--output-format', 'json', '--no-session-persistence'] + } +}) => { + const logger = createDebugLogger({ debug, logFile }); + + const testContent = await readTestFile(filePath); + + const extracted = await extractStructuredTests({ + testContent, + testFilePath: filePath, + agentConfig, + timeout, + debug, + projectRoot, + logger + }); + + const { resultPrompt, assertions } = extracted; + + const allRunJudgments = await executeRuns({ + extracted, + resultPrompt, + runs, + concurrency, + agentConfig, + timeout, + debug, + logFile, + logger + }); + + logger.flush(); + return aggregateResults({ assertions, allRunJudgments, threshold, runs }); +}; diff --git a/source/ai-runner.test.js b/source/ai-runner.test.js new file mode 100644 index 00000000..f0d65fe6 --- /dev/null +++ b/source/ai-runner.test.js @@ -0,0 +1,263 @@ +import { describe, test } from 'vitest'; +import { assert } from './vitest.js'; +import { writeFileSync, mkdirSync, rmSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; +import { init } from '@paralleldrive/cuid2'; +import { readTestFile, runAITests } from './ai-runner.js'; + +const createSlug = init({ length: 5 }); + +// Mock agent for two-agent pattern: +// - Extraction calls (containing '') return extraction result (JSON) +// - Result agent calls (containing 'CONTEXT (Prompt Under Test)') return plain text +// - Judge agent calls (containing 'ACTUAL RESULT TO EVALUATE') return TAP YAML +const createTwoAgentMockArgs = ({ + extractedTests, + importPaths = ['prompt.mdc'], + resultText = 'Mock result from agent', + judgmentPassed = true, + judgmentScore = 85 +} = {}) => { + const extractionResult = { + userPrompt: 'What is 2+2?', + importPaths, + assertions: extractedTests + }; + const tapYAML = `--- +passed: ${judgmentPassed} +actual: "Mock actual output" +expected: "Mock expected output" +score: ${judgmentScore} +---`; + + return [ + '-e', + `const prompt = process.argv[process.argv.length - 1]; + if (prompt.includes('')) { + console.log(JSON.stringify(${JSON.stringify(extractionResult)})); + } else if (prompt.includes('ACTUAL RESULT TO EVALUATE')) { + console.log(\`${tapYAML}\`); + } else if (prompt.includes('CONTEXT (Prompt Under Test)')) { + console.log(${JSON.stringify(resultText)}); + }` + ]; +}; + +describe('readTestFile()', () => { + test('reads file contents from path', async () => { + const testDir = join(tmpdir(), 'riteway-test-' + createSlug()); + + try { + mkdirSync(testDir, { recursive: true }); + const testFile = join(testDir, 'test.sudo'); + const contents = 'describe("test", { requirements: ["should work"] })'; + writeFileSync(testFile, contents); + + assert({ + given: 'a test file path', + should: 'return the file contents', + actual: await readTestFile(testFile), + expected: contents + }); + } finally { + rmSync(testDir, { recursive: true, force: true }); + } + }); + + test('reads any file extension', async () => { + const testDir = join(tmpdir(), 'riteway-test-' + createSlug()); + + try { + mkdirSync(testDir, { recursive: true }); + const testFile = join(testDir, 'test.md'); + const contents = '# My Test\n\nSome markdown content'; + writeFileSync(testFile, contents); + + assert({ + given: 'a markdown file path', + should: 'return the file contents regardless of extension', + actual: await readTestFile(testFile), + expected: contents + }); + } finally { + rmSync(testDir, { recursive: true, force: true }); + } + }); +}); + +describe('runAITests()', () => { + test('extracts tests and returns per-assertion results', async () => { + const testDir = join(tmpdir(), 'riteway-test-' + createSlug()); + + try { + mkdirSync(testDir, { recursive: true }); + writeFileSync(join(testDir, 'prompt.mdc'), 'Test prompt context'); + const testFile = join(testDir, 'test.sudo'); + writeFileSync(testFile, '- Given addition, should add\n- Given format, should output JSON'); + + const extractedTests = [ + { id: 1, requirement: 'Given addition, should add' }, + { id: 2, requirement: 'Given format, should output JSON' } + ]; + + const result = await runAITests({ + filePath: testFile, + runs: 2, + threshold: 50, + projectRoot: testDir, + agentConfig: { + command: 'node', + args: createTwoAgentMockArgs({ extractedTests }) + } + }); + + assert({ + given: 'multi-assertion test file with all runs passing at 50% threshold', + should: 'return passed: true', + actual: result.passed, + expected: true + }); + + assert({ + given: 'two extracted assertions', + should: 'return assertions array of length 2', + actual: result.assertions.length, + expected: 2 + }); + + assert({ + given: 'first extracted assertion', + should: 'preserve the requirement text', + actual: result.assertions[0].requirement, + expected: 'Given addition, should add' + }); + } finally { + rmSync(testDir, { recursive: true, force: true }); + } + }); + + test('tracks pass count across N runs for each assertion', async () => { + const testDir = join(tmpdir(), 'riteway-test-' + createSlug()); + + try { + mkdirSync(testDir, { recursive: true }); + writeFileSync(join(testDir, 'prompt.mdc'), 'Test prompt context'); + const testFile = join(testDir, 'test.sudo'); + writeFileSync(testFile, '- Given a test, should pass'); + + const extractedTests = [{ id: 1, requirement: 'Given a test, should pass' }]; + + const result = await runAITests({ + filePath: testFile, + runs: 3, + threshold: 75, + projectRoot: testDir, + agentConfig: { + command: 'node', + args: createTwoAgentMockArgs({ extractedTests }) + } + }); + + assert({ + given: 'runs: 3 with one assertion', + should: 'execute 3 runs for the assertion', + actual: result.assertions[0].totalRuns, + expected: 3 + }); + + assert({ + given: 'all 3 runs passing', + should: 'have passCount 3', + actual: result.assertions[0].passCount, + expected: 3 + }); + } finally { + rmSync(testDir, { recursive: true, force: true }); + } + }); + + test('fails when assertion does not meet threshold', async () => { + const testDir = join(tmpdir(), 'riteway-test-' + createSlug()); + + try { + mkdirSync(testDir, { recursive: true }); + writeFileSync(join(testDir, 'prompt.mdc'), 'Test prompt context'); + const testFile = join(testDir, 'test.sudo'); + writeFileSync(testFile, '- Given a test, should fail'); + + const extractedTests = [{ id: 1, requirement: 'Given a test, should fail' }]; + + const result = await runAITests({ + filePath: testFile, + runs: 2, + threshold: 75, + projectRoot: testDir, + agentConfig: { + command: 'node', + args: createTwoAgentMockArgs({ + extractedTests, + judgmentPassed: false, + judgmentScore: 25 + }) + } + }); + + assert({ + given: 'all runs failing at 75% threshold', + should: 'return passed: false', + actual: result.passed, + expected: false + }); + + assert({ + given: 'the failing assertion', + should: 'have passCount 0', + actual: result.assertions[0].passCount, + expected: 0 + }); + } finally { + rmSync(testDir, { recursive: true, force: true }); + } + }); + + test('includes averageScore across all runs for each assertion', async () => { + const testDir = join(tmpdir(), 'riteway-test-' + createSlug()); + + try { + mkdirSync(testDir, { recursive: true }); + writeFileSync(join(testDir, 'prompt.mdc'), 'Test prompt context'); + const testFile = join(testDir, 'test.sudo'); + writeFileSync(testFile, '- Given a test, should pass'); + + const extractedTests = [{ id: 1, requirement: 'Given a test, should pass' }]; + + const result = await runAITests({ + filePath: testFile, + runs: 2, + threshold: 50, + projectRoot: testDir, + agentConfig: { + command: 'node', + args: createTwoAgentMockArgs({ extractedTests, judgmentScore: 85 }) + } + }); + + assert({ + given: 'judgment score of 85 on both runs', + should: 'include averageScore as a number', + actual: typeof result.assertions[0].averageScore, + expected: 'number' + }); + + assert({ + given: 'judgment score of 85 on both runs', + should: 'calculate correct average score', + actual: result.assertions[0].averageScore, + expected: 85 + }); + } finally { + rmSync(testDir, { recursive: true, force: true }); + } + }); +}); diff --git a/source/execute-agent.js b/source/execute-agent.js index ceac994f..e25776df 100644 --- a/source/execute-agent.js +++ b/source/execute-agent.js @@ -6,6 +6,9 @@ import { unwrapEnvelope, unwrapAgentResult } from './agent-parser.js'; const maxOutputPreviewLength = 500; +const truncateOutput = (str) => + str.length > maxOutputPreviewLength ? `${str.slice(0, maxOutputPreviewLength)}...` : str; + const withTimeout = (promise, ms, errorFactory) => Promise.race([ promise, @@ -97,7 +100,7 @@ const processAgentOutput = ({ agentConfig, rawOutput, logger }) => ({ stdout }) logger.flush(); return result; } catch (err) { - const truncatedStdout = stdout.length > maxOutputPreviewLength ? `${stdout.slice(0, maxOutputPreviewLength)}...` : stdout; + const truncatedStdout = truncateOutput(stdout); logger.log('JSON parsing failed:', err.message); logger.flush(); @@ -133,8 +136,8 @@ const runAgentProcess = async ({ agentConfig, prompt, timeout, logger }) => { logger.log(`Stderr length: ${stderr.length} characters`); if (code !== 0) { - const truncatedStdout = stdout.length > maxOutputPreviewLength ? `${stdout.slice(0, maxOutputPreviewLength)}...` : stdout; - const truncatedStderr = stderr.length > maxOutputPreviewLength ? `${stderr.slice(0, maxOutputPreviewLength)}...` : stderr; + const truncatedStdout = truncateOutput(stdout); + const truncatedStderr = truncateOutput(stderr); logger.log('Process failed with non-zero exit code'); logger.flush(); diff --git a/source/extraction-parser.js b/source/extraction-parser.js index 8a6ef970..42fc7799 100644 --- a/source/extraction-parser.js +++ b/source/extraction-parser.js @@ -12,17 +12,17 @@ const assertionRequiredFields = ['id', 'requirement']; * This allows legitimate cross-project imports (e.g., shared prompt libraries). * Test authors are responsible for not importing sensitive files (.env, credentials). * See PR #394 remediation epic (Wave 1, Task 2) for design rationale. + * + * @param {string[]} importPaths - Paths to resolve relative to projectRoot + * @param {string} projectRoot - Root directory for resolving relative paths + * @param {Object} logger - Debug logger instance (injected by test-extractor.js) */ -export const resolveImportPaths = async (importPaths, projectRoot, debug) => { - if (debug) { - console.error(`[DEBUG] Found ${importPaths.length} imports to resolve`); - } +export const resolveImportPaths = async (importPaths, projectRoot, logger) => { + logger.log(`Found ${importPaths.length} imports to resolve`); const importedContents = await Promise.all( importPaths.map(async importPath => { const resolvedPath = resolve(projectRoot, importPath); - if (debug) { - console.error(`[DEBUG] Reading import: ${importPath} -> ${resolvedPath}`); - } + logger.log(`Reading import: ${importPath} -> ${resolvedPath}`); try { return await readFile(resolvedPath, 'utf-8'); } catch (originalError) { @@ -38,9 +38,7 @@ export const resolveImportPaths = async (importPaths, projectRoot, debug) => { }) ); const result = importedContents.join('\n\n'); - if (debug) { - console.error(`[DEBUG] Imported content length: ${result.length} characters`); - } + logger.log(`Imported content length: ${result.length} characters`); return result; }; diff --git a/source/extraction-parser.test.js b/source/extraction-parser.test.js index 95b3d0b9..eb5d819d 100644 --- a/source/extraction-parser.test.js +++ b/source/extraction-parser.test.js @@ -172,10 +172,12 @@ describe('parseExtractionResult()', () => { }); describe('resolveImportPaths()', () => { + const noopLogger = { log: () => {} }; + test('resolves and joins file contents for valid import paths', async () => { readFile.mockResolvedValueOnce('content of file A').mockResolvedValueOnce('content of file B'); - const result = await resolveImportPaths(['a.mdc', 'b.mdc'], '/project', false); + const result = await resolveImportPaths(['a.mdc', 'b.mdc'], '/project', noopLogger); assert({ given: 'two readable import paths', @@ -188,7 +190,7 @@ describe('resolveImportPaths()', () => { test('throws ValidationError when a file cannot be read', async () => { readFile.mockRejectedValueOnce(new Error('ENOENT: no such file or directory')); - const error = await Try(resolveImportPaths, ['missing.mdc'], '/project', false); + const error = await Try(resolveImportPaths, ['missing.mdc'], '/project', noopLogger); const invoked = []; handleAIErrors({ ...allNoop, ValidationError: () => invoked.push('ValidationError') })(error); diff --git a/source/test-extractor.js b/source/test-extractor.js new file mode 100644 index 00000000..9a1a4088 --- /dev/null +++ b/source/test-extractor.js @@ -0,0 +1,193 @@ +import { createError } from 'error-causes'; +import { ValidationError } from './ai-errors.js'; +import { executeAgent } from './execute-agent.js'; +import { parseExtractionResult, resolveImportPaths } from './extraction-parser.js'; + +/** + * Build a prompt that instructs an LLM agent to extract individual + * assertions from a multi-assertion test file. + * + * IMPORTANT: This extraction prompt asks the agent to return STRUCTURED DATA, + * not executable prompts. This is a deliberate architectural decision: + * + * Why not ask the extraction agent to create executable prompts? + * 1. Reliability: Extraction agents may create prompts in inconsistent formats + * 2. Format control: We need guaranteed JSON response format ({passed: boolean}) + * 3. Testability: Template-based prompts are deterministic and testable + * 4. Debugging: Structured data allows us to inspect what was extracted + * + * Instead, we use a two-phase approach: + * Phase 1 (this function): Extract structured metadata (userPrompt, importPaths, requirement) + * Phase 2: Transform metadata into executable prompts for result and judge agents + */ +export const buildExtractionPrompt = (testContent) => + `You are a test extraction agent. Analyze the following test file and extract structured information. + +For each assertion or requirement in the test file (these may be formatted as +"Given X, should Y", bullet points, YAML entries, natural language sentences, +SudoLang expressions, or any other format): + +1. Identify the userPrompt (the prompt to be tested) +2. Extract the specific requirement from the assertion +3. Identify any import file paths (e.g., import 'path/to/file.mdc') + +Return a JSON object with: +- "userPrompt": the test prompt to execute (string) +- "importPaths": array of import file paths found in the test file (e.g., ["ai/rules/ui.mdc"]) +- "assertions": array of assertion objects, each with: + - "id": sequential integer starting at 1 + - "requirement": the assertion text (e.g., "Given X, should Y") + +Return ONLY valid JSON. No markdown fences, no explanation. + + +${testContent} +`; + +/** + * Build a result prompt that instructs an LLM to execute a user prompt + * and return plain text output (no JSON, no evaluation). + * + * This is part of the two-agent pattern where: + * - Result agent (this prompt): Execute the user prompt, return plain text + * - Judge agent (separate prompt): Evaluate the result against requirements + */ +export const buildResultPrompt = ({ userPrompt, promptUnderTest }) => { + const contextSection = promptUnderTest + ? `CONTEXT (Prompt Under Test):\n${promptUnderTest}\n\n` + : ''; + + return `You are an AI assistant. Execute the following prompt and return your response. + +${contextSection}USER PROMPT: +${userPrompt} + +INSTRUCTIONS: +1. Execute the user prompt above${promptUnderTest ? ', following the guidance in the prompt under test' : ''} +2. Return your complete response as plain text + +Respond naturally. Do NOT wrap your response in JSON, markdown fences, or any other structure. +Your entire output IS the result.`; +}; + +/** + * Build a judge prompt that instructs an LLM to evaluate a specific result + * against a single requirement. Returns TAP YAML diagnostic format. + * + * This is part of the two-agent pattern where: + * - Result agent: Execute the user prompt, return plain text + * - Judge agent (this prompt): Evaluate the result against ONE requirement + */ +export const buildJudgePrompt = ({ userPrompt, promptUnderTest, result, requirement }) => + `You are an AI judge. Evaluate whether a given result satisfies a specific requirement. + +CONTEXT (Prompt Under Test): +${promptUnderTest} + +ORIGINAL USER PROMPT: +${userPrompt} + +ACTUAL RESULT TO EVALUATE: +${result} + +REQUIREMENT: +${requirement} + +INSTRUCTIONS: +1. Read the actual result above +2. Determine whether it satisfies the requirement +3. Summarize what was actually produced (actual) vs what was expected (expected) +4. Assign a quality score from 0 (completely fails) to 100 (perfectly satisfies) + +Return your judgment as a TAP YAML diagnostic block: +--- +passed: true +actual: "summary of what was produced" +expected: "what was expected" +score: 85 +--- + +CRITICAL: Return ONLY the TAP YAML block. Start with --- on its own line, +end with --- on its own line. No markdown fences, no explanation outside the block.`; + +/** + * Extract individual test assertions from a multi-assertion test file + * by calling an LLM agent with a specialized extraction prompt. + * + * Pipeline: + * Phase 1: Extraction agent parses test file → {userPrompt, importPaths, assertions} + * Phase 1.5: Read agent-identified import files → promptUnderTest string + * Phase 2: Return validated structured data for two-agent execution + * + * Validation: + * - Missing userPrompt → ValidationError MISSING_USER_PROMPT + * - Missing promptUnderTest → ValidationError MISSING_PROMPT_UNDER_TEST + * - No assertions → ValidationError NO_ASSERTIONS_FOUND + * - Missing import file → ValidationError PROMPT_READ_FAILED (with cause) + * + * @param {Object} options + * @param {string} options.testContent - Raw contents of the test file + * @param {string} [options.testFilePath] - Path to the test file (for resolving imports) + * @param {Object} options.agentConfig - Agent CLI configuration + * @param {number} [options.timeout=300000] - Timeout in milliseconds + * @param {boolean} [options.debug=false] - Enable debug logging + * @param {string} [options.projectRoot=process.cwd()] - Project root for resolving import paths + * @param {Object} [options.logger={ log: () => {} }] - Debug logger instance (owned by caller; defaults to noop) + * @returns {Promise<{ userPrompt: string, promptUnderTest: string, assertions: Array<{ id: number, requirement: string }> }>} + */ +export const extractTests = async ({ + testContent, + testFilePath, + agentConfig, + timeout = 300000, + debug = false, + projectRoot = process.cwd(), + logger = { log: () => {} } +}) => { + logger.log('\nCalling extraction agent...'); + + const extractionPrompt = buildExtractionPrompt(testContent); + const result = await executeAgent({ agentConfig, prompt: extractionPrompt, timeout, debug }); + const extracted = parseExtractionResult(result); + + logger.log(`Extraction complete. Found ${extracted.assertions.length} assertions`); + + const promptUnderTest = testFilePath && extracted.importPaths.length > 0 + ? await resolveImportPaths(extracted.importPaths, projectRoot, logger) + : ''; + + const { userPrompt, assertions } = extracted; + + if (!userPrompt || userPrompt.trim() === '') { + throw createError({ + ...ValidationError, + message: 'Test file does not define a userPrompt. Every test file must include a user prompt (inline or imported).', + code: 'MISSING_USER_PROMPT', + testFile: testFilePath + }); + } + + if (!promptUnderTest || promptUnderTest.trim() === '') { + throw createError({ + ...ValidationError, + message: 'Test file does not declare a promptUnderTest import. Every test file must import the prompt under test.', + code: 'MISSING_PROMPT_UNDER_TEST', + testFile: testFilePath + }); + } + + if (!assertions || assertions.length === 0) { + throw createError({ + ...ValidationError, + message: 'Test file does not contain any assertions. Every test file must include at least one assertion (e.g., "Given X, should Y").', + code: 'NO_ASSERTIONS_FOUND', + testFile: testFilePath + }); + } + + return { + userPrompt, + promptUnderTest, + assertions: assertions.map(({ id, requirement }) => ({ id, requirement })) + }; +}; diff --git a/source/test-extractor.test.js b/source/test-extractor.test.js new file mode 100644 index 00000000..2b42bab6 --- /dev/null +++ b/source/test-extractor.test.js @@ -0,0 +1,524 @@ +import { describe, test } from 'vitest'; +import { assert } from './vitest.js'; +import { Try } from './riteway.js'; +import { handleAIErrors, allNoop } from './ai-errors.js'; +import { + buildExtractionPrompt, + buildResultPrompt, + buildJudgePrompt, + extractTests +} from './test-extractor.js'; +import { mkdirSync, writeFileSync, rmSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; +import { init } from '@paralleldrive/cuid2'; + +const createSlug = init({ length: 10 }); + +const createTempDir = () => { + const slug = createSlug(); + return join(tmpdir(), `riteway-extractor-test-${slug}`); +}; + +describe('buildExtractionPrompt()', () => { + test('returns complete extraction prompt with test content embedded in delimiters', () => { + const testContent = `import @promptUnderTest + +userPrompt = """ + What is 2 + 2? +""" + +- Given simple addition, should add correctly +- Given format, should output JSON`; + + const result = buildExtractionPrompt(testContent); + + const expected = `You are a test extraction agent. Analyze the following test file and extract structured information. + +For each assertion or requirement in the test file (these may be formatted as +"Given X, should Y", bullet points, YAML entries, natural language sentences, +SudoLang expressions, or any other format): + +1. Identify the userPrompt (the prompt to be tested) +2. Extract the specific requirement from the assertion +3. Identify any import file paths (e.g., import 'path/to/file.mdc') + +Return a JSON object with: +- "userPrompt": the test prompt to execute (string) +- "importPaths": array of import file paths found in the test file (e.g., ["ai/rules/ui.mdc"]) +- "assertions": array of assertion objects, each with: + - "id": sequential integer starting at 1 + - "requirement": the assertion text (e.g., "Given X, should Y") + +Return ONLY valid JSON. No markdown fences, no explanation. + + +${testContent} +`; + + assert({ + given: 'test content with assertions', + should: 'return complete extraction prompt with test content wrapped in delimiters', + actual: result, + expected + }); + }); +}); + +describe('buildResultPrompt()', () => { + test('returns complete result prompt with context section when promptUnderTest provided', () => { + const userPrompt = 'What is 2 + 2?'; + const promptUnderTest = 'You are a math helper.'; + + const result = buildResultPrompt({ userPrompt, promptUnderTest }); + + const expected = `You are an AI assistant. Execute the following prompt and return your response. + +CONTEXT (Prompt Under Test): +You are a math helper. + +USER PROMPT: +What is 2 + 2? + +INSTRUCTIONS: +1. Execute the user prompt above, following the guidance in the prompt under test +2. Return your complete response as plain text + +Respond naturally. Do NOT wrap your response in JSON, markdown fences, or any other structure. +Your entire output IS the result.`; + + assert({ + given: 'userPrompt and promptUnderTest', + should: 'return complete result prompt with context section', + actual: result, + expected + }); + }); + + test('omits context section when promptUnderTest is not provided', () => { + const userPrompt = 'What is 2 + 2?'; + + const result = buildResultPrompt({ userPrompt }); + + const expected = `You are an AI assistant. Execute the following prompt and return your response. + +USER PROMPT: +What is 2 + 2? + +INSTRUCTIONS: +1. Execute the user prompt above +2. Return your complete response as plain text + +Respond naturally. Do NOT wrap your response in JSON, markdown fences, or any other structure. +Your entire output IS the result.`; + + assert({ + given: 'userPrompt without promptUnderTest', + should: 'return prompt without context section', + actual: result, + expected + }); + }); +}); + +describe('buildJudgePrompt()', () => { + test('returns complete judge prompt with all sections', () => { + const userPrompt = 'What is 2 + 2?'; + const promptUnderTest = 'You are a math helper.'; + const result = 'The answer is 4.'; + const requirement = 'Given simple addition, should return 4'; + + const judgePrompt = buildJudgePrompt({ userPrompt, promptUnderTest, result, requirement }); + + const expected = `You are an AI judge. Evaluate whether a given result satisfies a specific requirement. + +CONTEXT (Prompt Under Test): +You are a math helper. + +ORIGINAL USER PROMPT: +What is 2 + 2? + +ACTUAL RESULT TO EVALUATE: +The answer is 4. + +REQUIREMENT: +Given simple addition, should return 4 + +INSTRUCTIONS: +1. Read the actual result above +2. Determine whether it satisfies the requirement +3. Summarize what was actually produced (actual) vs what was expected (expected) +4. Assign a quality score from 0 (completely fails) to 100 (perfectly satisfies) + +Return your judgment as a TAP YAML diagnostic block: +--- +passed: true +actual: "summary of what was produced" +expected: "what was expected" +score: 85 +--- + +CRITICAL: Return ONLY the TAP YAML block. Start with --- on its own line, +end with --- on its own line. No markdown fences, no explanation outside the block.`; + + assert({ + given: 'all required fields', + should: 'return complete judge prompt with all sections', + actual: judgePrompt, + expected + }); + }); +}); + +describe('extractTests()', () => { + test('extracts and returns validated test structure from agent output', async () => { + const testDir = createTempDir(); + + try { + mkdirSync(testDir, { recursive: true }); + + const promptFile = join(testDir, 'prompt.mdc'); + writeFileSync(promptFile, 'You are a math helper.'); + + const testFile = join(testDir, 'test.sudo'); + writeFileSync(testFile, 'import "prompt.mdc"\n\n- Given addition, should add correctly'); + + const extractedData = { + userPrompt: 'What is 2+2?', + importPaths: ['prompt.mdc'], + assertions: [ + { id: 1, requirement: 'Given addition, should add correctly' } + ] + }; + + const mockAgentConfig = { + command: 'node', + args: ['-e', `console.log(JSON.stringify(${JSON.stringify(extractedData)}))`] + }; + + const result = await extractTests({ + testContent: 'import "prompt.mdc"\n\n- Given addition, should add correctly', + testFilePath: testFile, + agentConfig: mockAgentConfig, + timeout: 5000, + projectRoot: testDir + }); + + assert({ + given: 'valid extraction output with import file on disk', + should: 'return extracted userPrompt', + actual: result.userPrompt, + expected: 'What is 2+2?' + }); + + assert({ + given: 'valid extraction output with import file on disk', + should: 'return promptUnderTest from resolved import', + actual: result.promptUnderTest, + expected: 'You are a math helper.' + }); + + assert({ + given: 'valid extraction output with one assertion', + should: 'return assertions array of length 1', + actual: result.assertions.length, + expected: 1 + }); + + assert({ + given: 'valid extraction output', + should: 'preserve the requirement text in assertions', + actual: result.assertions[0].requirement, + expected: 'Given addition, should add correctly' + }); + } finally { + rmSync(testDir, { recursive: true, force: true }); + } + }); + + test('concatenates multiple imported prompt files', async () => { + const testDir = createTempDir(); + + try { + mkdirSync(testDir, { recursive: true }); + + writeFileSync(join(testDir, 'rules1.mdc'), 'Rule 1: Be concise'); + writeFileSync(join(testDir, 'rules2.mdc'), 'Rule 2: Be accurate'); + + const extractedData = { + userPrompt: 'Test prompt', + importPaths: ['rules1.mdc', 'rules2.mdc'], + assertions: [{ id: 1, requirement: 'Given rules, should follow' }] + }; + + const mockAgentConfig = { + command: 'node', + args: ['-e', `console.log(JSON.stringify(${JSON.stringify(extractedData)}))`] + }; + + const result = await extractTests({ + testContent: 'import "rules1.mdc"\nimport "rules2.mdc"\n\n- Given rules, should follow', + testFilePath: join(testDir, 'test.sudo'), + agentConfig: mockAgentConfig, + timeout: 5000, + projectRoot: testDir + }); + + assert({ + given: 'two import files', + should: 'concatenate both file contents into promptUnderTest', + actual: result.promptUnderTest.includes('Rule 1') && result.promptUnderTest.includes('Rule 2'), + expected: true + }); + } finally { + rmSync(testDir, { recursive: true, force: true }); + } + }); + + test('resolves import paths relative to projectRoot, not test file location', async () => { + const testDir = createTempDir(); + + try { + mkdirSync(testDir, { recursive: true }); + const nestedDir = join(testDir, 'nested', 'deep'); + mkdirSync(nestedDir, { recursive: true }); + + writeFileSync(join(testDir, 'root-prompt.mdc'), 'Root level prompt'); + + const extractedData = { + userPrompt: 'Test', + importPaths: ['root-prompt.mdc'], + assertions: [{ id: 1, requirement: 'Given test, should pass' }] + }; + + const mockAgentConfig = { + command: 'node', + args: ['-e', `console.log(JSON.stringify(${JSON.stringify(extractedData)}))`] + }; + + const result = await extractTests({ + testContent: 'import "root-prompt.mdc"\n\n- Given test, should pass', + testFilePath: join(nestedDir, 'test.sudo'), + agentConfig: mockAgentConfig, + timeout: 5000, + projectRoot: testDir + }); + + assert({ + given: 'import path relative to project root', + should: 'resolve and read file from project root, not test file directory', + actual: result.promptUnderTest, + expected: 'Root level prompt' + }); + } finally { + rmSync(testDir, { recursive: true, force: true }); + } + }); + + test('allows imports from paths outside project root', async () => { + const testDir = createTempDir(); + + try { + mkdirSync(testDir, { recursive: true }); + + const externalDir = join(testDir, 'external'); + mkdirSync(externalDir, { recursive: true }); + writeFileSync(join(externalDir, 'shared-prompt.mdc'), 'External shared prompt content'); + + const projectDir = join(testDir, 'project'); + mkdirSync(projectDir, { recursive: true }); + + const extractedData = { + userPrompt: 'test', + importPaths: ['../external/shared-prompt.mdc'], + assertions: [{ id: 1, requirement: 'Given test, should pass' }] + }; + + const mockAgentConfig = { + command: 'node', + args: ['-e', `console.log(JSON.stringify(${JSON.stringify(extractedData)}))`] + }; + + const result = await extractTests({ + testContent: 'import "../external/shared-prompt.mdc"\n\n- Given test, should pass', + testFilePath: join(projectDir, 'test.sudo'), + agentConfig: mockAgentConfig, + timeout: 5000, + projectRoot: projectDir + }); + + assert({ + given: 'import path traversing outside project root', + should: 'resolve and read the external file without error', + actual: result.promptUnderTest, + expected: 'External shared prompt content' + }); + } finally { + rmSync(testDir, { recursive: true, force: true }); + } + }); + + test('throws ValidationError when promptUnderTest import is missing', async () => { + const extractedData = { + userPrompt: 'What is 2+2?', + importPaths: [], + assertions: [{ id: 1, requirement: 'Given a test, should pass' }] + }; + + const mockAgentConfig = { + command: 'node', + args: ['-e', `console.log(JSON.stringify(${JSON.stringify(extractedData)}))`] + }; + + const error = await Try(extractTests, { + testContent: '- Given test, should pass', + testFilePath: '/test/test.sudo', + agentConfig: mockAgentConfig, + timeout: 5000 + }); + + const invoked = []; + handleAIErrors({ ...allNoop, ValidationError: () => invoked.push('ValidationError') })(error); + + assert({ + given: 'no import paths (no promptUnderTest)', + should: 'throw an error that routes to the ValidationError handler', + actual: invoked, + expected: ['ValidationError'] + }); + + assert({ + given: 'no promptUnderTest import declared', + should: 'include MISSING_PROMPT_UNDER_TEST code in error', + actual: error?.cause?.code, + expected: 'MISSING_PROMPT_UNDER_TEST' + }); + }); + + test('throws ValidationError when userPrompt is empty', async () => { + const extractedData = { + userPrompt: '', + importPaths: ['package.json'], + assertions: [{ id: 1, requirement: 'Given a test, should pass' }] + }; + + const mockAgentConfig = { + command: 'node', + args: ['-e', `console.log(JSON.stringify(${JSON.stringify(extractedData)}))`] + }; + + const error = await Try(extractTests, { + testContent: 'import "package.json"\n\n- Given test, should pass', + testFilePath: '/test/test.sudo', + agentConfig: mockAgentConfig, + timeout: 5000 + }); + + const invoked = []; + handleAIErrors({ ...allNoop, ValidationError: () => invoked.push('ValidationError') })(error); + + assert({ + given: 'empty userPrompt in extraction result', + should: 'throw an error that routes to the ValidationError handler', + actual: invoked, + expected: ['ValidationError'] + }); + + assert({ + given: 'empty userPrompt', + should: 'include MISSING_USER_PROMPT code in error', + actual: error?.cause?.code, + expected: 'MISSING_USER_PROMPT' + }); + }); + + test('throws ValidationError when no assertions found', async () => { + const extractedData = { + userPrompt: 'test prompt', + importPaths: ['package.json'], + assertions: [] + }; + + const mockAgentConfig = { + command: 'node', + args: ['-e', `console.log(JSON.stringify(${JSON.stringify(extractedData)}))`] + }; + + const error = await Try(extractTests, { + testContent: 'import "package.json"\n\nuserPrompt = """test"""', + testFilePath: '/test/test.sudo', + agentConfig: mockAgentConfig, + timeout: 5000 + }); + + const invoked = []; + handleAIErrors({ ...allNoop, ValidationError: () => invoked.push('ValidationError') })(error); + + assert({ + given: 'empty assertions array', + should: 'throw an error that routes to the ValidationError handler', + actual: invoked, + expected: ['ValidationError'] + }); + + assert({ + given: 'empty assertions array', + should: 'include NO_ASSERTIONS_FOUND code in error', + actual: error?.cause?.code, + expected: 'NO_ASSERTIONS_FOUND' + }); + }); + + test('throws ValidationError with PROMPT_READ_FAILED when import file does not exist', async () => { + const testDir = createTempDir(); + + try { + mkdirSync(testDir, { recursive: true }); + + const extractedData = { + userPrompt: 'Test', + importPaths: ['nonexistent.mdc'], + assertions: [{ id: 1, requirement: 'Given test, should pass' }] + }; + + const mockAgentConfig = { + command: 'node', + args: ['-e', `console.log(JSON.stringify(${JSON.stringify(extractedData)}))`] + }; + + const error = await Try(extractTests, { + testContent: 'import "nonexistent.mdc"\n\n- Given test, should pass', + testFilePath: join(testDir, 'test.sudo'), + agentConfig: mockAgentConfig, + timeout: 5000, + projectRoot: testDir + }); + + const invoked = []; + handleAIErrors({ ...allNoop, ValidationError: () => invoked.push('ValidationError') })(error); + + assert({ + given: 'missing import file on disk', + should: 'throw an error that routes to the ValidationError handler', + actual: invoked, + expected: ['ValidationError'] + }); + + assert({ + given: 'missing import file on disk', + should: 'include PROMPT_READ_FAILED code in error', + actual: error?.cause?.code, + expected: 'PROMPT_READ_FAILED' + }); + + assert({ + given: 'missing import file on disk', + should: 'preserve original ENOENT error as cause', + actual: error?.cause?.cause?.code, + expected: 'ENOENT' + }); + } finally { + rmSync(testDir, { recursive: true, force: true }); + } + }); +});