diff --git a/package-lock.json b/package-lock.json
index 6e703490..0f664d5a 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -9,6 +9,7 @@
       "version": "9.0.0",
       "license": "MIT",
       "dependencies": {
+        "@paralleldrive/cuid2": "^3.3.0",
         "cheerio": "1.2.0",
         "dotignore": "^0.1.2",
         "error-causes": "^3.0.2",
@@ -1162,6 +1163,18 @@
         "node": ">= 0.4"
       }
     },
+    "node_modules/@noble/hashes": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/@noble/hashes/-/hashes-2.0.1.tgz",
+      "integrity": "sha512-XlOlEbQcE9fmuXxrVTXCTlG2nlRXa9Rj3rr5Ue/+tX+nmkgbX720YHh0VR3hBF9xDvwnb8D2shVGOwNx+ulArw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 20.19.0"
+      },
+      "funding": {
+        "url": "https://paulmillr.com/funding/"
+      }
+    },
     "node_modules/@nodeutils/defaults-deep": {
       "version": "1.1.0",
       "resolved": "https://registry.npmjs.org/@nodeutils/defaults-deep/-/defaults-deep-1.1.0.tgz",
@@ -1338,6 +1351,20 @@
         "@octokit/openapi-types": "^27.0.0"
       }
     },
+    "node_modules/@paralleldrive/cuid2": {
+      "version": "3.3.0",
+      "resolved": "https://registry.npmjs.org/@paralleldrive/cuid2/-/cuid2-3.3.0.tgz",
+      "integrity": "sha512-OqiFvSOF0dBSesELYY2CAMa4YINvlLpvKOz/rv6NeZEqiyttlHgv98Juwv4Ch+GrEV7IZ8jfI2VcEoYUjXXCjw==",
+      "license": "MIT",
+      "dependencies": {
+        "@noble/hashes": "^2.0.1",
+        "bignumber.js": "^9.3.1",
+        "error-causes": "^3.0.2"
+      },
+      "bin": {
+        "cuid2": "bin/cuid2.js"
+      }
+    },
     "node_modules/@phun-ky/typeof": {
       "version": "2.0.3",
       "resolved": "https://registry.npmjs.org/@phun-ky/typeof/-/typeof-2.0.3.tgz",
@@ -2127,6 +2154,15 @@
       "dev": true,
       "license": "Apache-2.0"
     },
+    "node_modules/bignumber.js": {
+      "version": "9.3.1",
+      "resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.3.1.tgz",
+      "integrity": "sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ==",
+      "license": "MIT",
+      "engines": {
+        "node": "*"
+      }
+    },
     "node_modules/boolbase": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
diff --git a/package.json b/package.json
index c8386f8d..7b2039f7 100644
--- a/package.json
+++ b/package.json
@@ -104,6 +104,7 @@
     "watch": "^1.0.2"
   },
   "dependencies": {
+    "@paralleldrive/cuid2": "^3.3.0",
     "cheerio": "1.2.0",
     "dotignore": "^0.1.2",
     "error-causes": "^3.0.2",
diff --git a/source/agent-config.js b/source/agent-config.js
index f49a18d5..2c771f1d 100644
--- a/source/agent-config.js
+++ b/source/agent-config.js
@@ -1,21 +1,9 @@
 import { readFile } from 'fs/promises';
 import { z } from 'zod';
 import { createError } from 'error-causes';
-import { ValidationError } from './ai-errors.js';
+import { ValidationError, AgentConfigReadError, AgentConfigParseError, AgentConfigValidationError, formatZodError } from './ai-errors.js';
 import { parseOpenCodeNDJSON } from './agent-parser.js';
 
-/**
- * Format Zod validation errors into a human-readable message.
- * @param {any} zodError - Zod validation error
- * @returns {string} Formatted error message
- */
-export const formatZodError = (zodError) => {
-  const issues = zodError.issues || zodError.errors;
-  return issues
-    ? issues.map(e => `${e.path.join('.')}: ${e.message}`).join('; ')
-    : zodError.message || 'Validation failed';
-};
-
 /**
  * Get agent configuration based on agent name.
  * Supports 'claude', 'opencode', and 'cursor' agents.
@@ -62,9 +50,8 @@ const readAgentConfigFile = async ({ configPath }) => {
     return await readFile(configPath, 'utf-8');
   } catch (err) {
     throw createError({
-      ...ValidationError,
+      ...AgentConfigReadError,
       message: `Failed to read agent config file: ${configPath}`,
-      code: 'AGENT_CONFIG_READ_ERROR',
       cause: err
     });
   }
@@ -75,9 +62,8 @@ const parseJson = ({ configPath, raw }) => {
     return JSON.parse(raw);
   } catch (err) {
     throw createError({
-      ...ValidationError,
+      ...AgentConfigParseError,
       message: `Agent config file is not valid JSON: ${configPath}`,
-      code: 'AGENT_CONFIG_PARSE_ERROR',
       cause: err
     });
   }
@@ -88,9 +74,8 @@ const validateAgentConfig = (parsed) => {
     return agentConfigFileSchema.parse(parsed);
   } catch (zodError) {
     throw createError({
-      ...ValidationError,
+      ...AgentConfigValidationError,
       message: `Invalid agent config: ${formatZodError(zodError)}`,
-      code: 'AGENT_CONFIG_VALIDATION_ERROR',
       cause: zodError
     });
   }
diff --git a/source/agent-config.test.js b/source/agent-config.test.js
index 964f9ded..c6d453dc 100644
--- a/source/agent-config.test.js
+++ b/source/agent-config.test.js
@@ -1,7 +1,8 @@
 import { describe, test } from 'vitest';
 import { assert } from './vitest.js';
 import { Try } from './riteway.js';
-import { formatZodError, getAgentConfig, loadAgentConfig } from './agent-config.js';
+import { handleAIErrors, allNoop, formatZodError } from './ai-errors.js';
+import { getAgentConfig, loadAgentConfig } from './agent-config.js';
 
 describe('formatZodError()', () => {
   test('formats a single issue', () => {
@@ -162,18 +163,14 @@ describe('getAgentConfig()', () => {
   test('throws ValidationError for invalid agent name', () => {
     const error = Try(getAgentConfig, 'invalid-agent');
 
-    assert({
-      given: 'invalid agent name',
-      should: 'throw Error with cause',
-      actual: error instanceof Error && error.cause !== undefined,
-      expected: true
-    });
+    const invoked = [];
+    handleAIErrors({ ...allNoop, ValidationError: () => invoked.push('ValidationError') })(error);
 
     assert({
       given: 'invalid agent name',
-      should: 'have ValidationError name in cause',
-      actual: error?.cause?.name,
-      expected: 'ValidationError'
+      should: 'throw an error that routes to the ValidationError handler',
+      actual: invoked,
+      expected: ['ValidationError']
     });
 
     assert({
@@ -218,78 +215,45 @@ describe('loadAgentConfig()', () => {
     });
   });
 
-  test('throws ValidationError with AGENT_CONFIG_PARSE_ERROR for invalid JSON', async () => {
+  test('throws AgentConfigParseError for invalid JSON', async () => {
     const error = await Try(loadAgentConfig, './source/fixtures/invalid-agent-config.txt');
 
-    assert({
-      given: 'invalid JSON file',
-      should: 'throw Error with cause',
-      actual: error instanceof Error && error.cause !== undefined,
-      expected: true
-    });
-
-    assert({
-      given: 'invalid JSON file',
-      should: 'have ValidationError name in cause',
-      actual: error?.cause?.name,
-      expected: 'ValidationError'
-    });
+    const invoked = [];
+    handleAIErrors({ ...allNoop, AgentConfigParseError: () => invoked.push('AgentConfigParseError') })(error);
 
     assert({
       given: 'invalid JSON file',
-      should: 'have AGENT_CONFIG_PARSE_ERROR code in cause',
-      actual: error?.cause?.code,
-      expected: 'AGENT_CONFIG_PARSE_ERROR'
+      should: 'throw an error that routes to the AgentConfigParseError handler',
+      actual: invoked,
+      expected: ['AgentConfigParseError']
     });
   });
 
-  test('throws ValidationError with AGENT_CONFIG_VALIDATION_ERROR when command field missing', async () => {
+  test('throws AgentConfigValidationError when command field missing', async () => {
     const error = await Try(loadAgentConfig, './source/fixtures/no-command-agent-config.json');
 
-    assert({
-      given: 'config file missing command field',
-      should: 'throw Error with cause',
-      actual: error instanceof Error && error.cause !== undefined,
-      expected: true
-    });
-
-    assert({
-      given: 'config file missing command field',
-      should: 'have ValidationError name in cause',
-      actual: error?.cause?.name,
-      expected: 'ValidationError'
-    });
+    const invoked = [];
+    handleAIErrors({ ...allNoop, AgentConfigValidationError: () => invoked.push('AgentConfigValidationError') })(error);
 
     assert({
       given: 'config file missing command field',
-      should: 'have AGENT_CONFIG_VALIDATION_ERROR code in cause',
-      actual: error?.cause?.code,
-      expected: 'AGENT_CONFIG_VALIDATION_ERROR'
+      should: 'throw an error that routes to the AgentConfigValidationError handler',
+      actual: invoked,
+      expected: ['AgentConfigValidationError']
     });
   });
 
-  test('throws ValidationError with AGENT_CONFIG_READ_ERROR for nonexistent file', async () => {
+  test('throws AgentConfigReadError for nonexistent file', async () => {
     const error = await Try(loadAgentConfig, './nonexistent/path.json');
 
-    assert({
-      given: 'nonexistent file path',
-      should: 'throw Error with cause',
-      actual: error instanceof Error && error.cause !== undefined,
-      expected: true
-    });
-
-    assert({
-      given: 'nonexistent file path',
-      should: 'have ValidationError name in cause',
-      actual: error?.cause?.name,
-      expected: 'ValidationError'
-    });
+    const invoked = [];
+    handleAIErrors({ ...allNoop, AgentConfigReadError: () => invoked.push('AgentConfigReadError') })(error);
 
     assert({
       given: 'nonexistent file path',
-      should: 'have AGENT_CONFIG_READ_ERROR code in cause',
-      actual: error?.cause?.code,
-      expected: 'AGENT_CONFIG_READ_ERROR'
+      should: 'throw an error that routes to the AgentConfigReadError handler',
+      actual: invoked,
+      expected: ['AgentConfigReadError']
     });
   });
 });
diff --git a/source/ai-errors.js b/source/ai-errors.js
index 6a214b8a..cff7a4a0 100644
--- a/source/ai-errors.js
+++ b/source/ai-errors.js
@@ -10,7 +10,10 @@ export const [aiErrors, handleAIErrors] = errorCauses({
   AITestError: { code: 'AI_TEST_ERROR', message: 'AI test execution failed' },
   OutputError: { code: 'OUTPUT_ERROR', message: 'Test output recording failed' },
   ExtractionParseError: { code: 'EXTRACTION_PARSE_FAILURE', message: 'Failed to parse extraction result' },
-  ExtractionValidationError: { code: 'EXTRACTION_VALIDATION_FAILURE', message: 'Invalid extraction result' }
+  ExtractionValidationError: { code: 'EXTRACTION_VALIDATION_FAILURE', message: 'Invalid extraction result' },
+  AgentConfigReadError: { code: 'AGENT_CONFIG_READ_ERROR', message: 'Failed to read agent config file' },
+  AgentConfigParseError: { code: 'AGENT_CONFIG_PARSE_ERROR', message: 'Agent config file is not valid JSON' },
+  AgentConfigValidationError: { code: 'AGENT_CONFIG_VALIDATION_ERROR', message: 'Invalid agent config' }
 });
 
 // handleAIErrors is exhaustive — every registered type must have a handler.
@@ -26,5 +29,22 @@ export const {
   AITestError,
   OutputError,
   ExtractionParseError,
-  ExtractionValidationError
+  ExtractionValidationError,
+  AgentConfigReadError,
+  AgentConfigParseError,
+  AgentConfigValidationError
 } = aiErrors;
+
+/**
+ * Format Zod validation errors into a human-readable message.
+ * Lives here because ai-runner.js also needs Zod error formatting; importing
+ * a utility from agent-config.js would create a wrong dependency direction.
+ * @param {any} zodError - Zod validation error
+ * @returns {string} Formatted error message
+ */
+export const formatZodError = (zodError) => {
+  const issues = zodError.issues || zodError.errors;
+  return issues
+    ? issues.map(e => `${e.path.join('.')}: ${e.message}`).join('; ')
+    : zodError.message || 'Validation failed';
+};
diff --git a/source/ai-runner.js b/source/ai-runner.js
new file mode 100644
index 00000000..53e190d9
--- /dev/null
+++ b/source/ai-runner.js
@@ -0,0 +1,231 @@
+import { readFile } from 'fs/promises';
+import { executeAgent } from './execute-agent.js';
+import { extractTests, buildResultPrompt, buildJudgePrompt } from './test-extractor.js';
+import { createDebugLogger } from './debug-logger.js';
+import { limitConcurrency } from './limit-concurrency.js';
+import { normalizeJudgment, aggregatePerAssertionResults } from './aggregation.js';
+import { parseTAPYAML } from './tap-yaml.js';
+import { verifyAgentAuthentication as verifyAuth } from './validation.js';
+
+export const readTestFile = (filePath) => readFile(filePath, 'utf-8');
+
+export const verifyAgentAuthentication = (options) => verifyAuth({ ...options, executeAgent });
+
+const extractStructuredTests = async ({
+  testContent,
+  testFilePath,
+  agentConfig,
+  timeout,
+  debug,
+  projectRoot,
+  logger
+}) => {
+  logger.log(`\nExtracting tests from: ${testFilePath}`);
+  logger.log(`Test content length: ${testContent.length} characters`);
+
+  const { userPrompt, promptUnderTest, assertions } = await extractTests({
+    testContent,
+    testFilePath,
+    agentConfig,
+    timeout,
+    debug,
+    projectRoot,
+    logger
+  });
+
+  logger.log(`Extracted ${assertions.length} assertions`);
+
+  const resultPrompt = buildResultPrompt({ userPrompt, promptUnderTest });
+
+  return { userPrompt, promptUnderTest, assertions, resultPrompt };
+};
+
+const judgeAssertion = async ({
+  assertion,
+  result,
+  userPrompt,
+  promptUnderTest,
+  runIndex,
+  assertionIndex,
+  totalAssertions,
+  agentConfig,
+  timeout,
+  debug,
+  logFile,
+  logger
+}) => {
+  const judgePrompt = buildJudgePrompt({
+    userPrompt,
+    promptUnderTest,
+    result,
+    requirement: assertion.requirement
+  });
+
+  logger.log(`  Assertion ${assertionIndex + 1}/${totalAssertions}: ${assertion.requirement}`);
+
+  const judgeOutput = await executeAgent({
+    agentConfig,
+    prompt: judgePrompt,
+    timeout,
+    debug,
+    logFile,
+    rawOutput: true
+  });
+
+  const parsed = parseTAPYAML(judgeOutput);
+  return normalizeJudgment({
+    judgeResponse: parsed,
+    requirement: assertion.requirement,
+    runIndex,
+    logger
+  });
+};
+
+const executeSingleRun = async ({
+  runIndex,
+  extracted,
+  resultPrompt,
+  runs,
+  agentConfig,
+  timeout,
+  debug,
+  logFile,
+  logger
+}) => {
+  const { userPrompt, promptUnderTest, assertions } = extracted;
+
+  logger.log(`\nRun ${runIndex + 1}/${runs}: Calling result agent...`);
+
+  const result = await executeAgent({
+    agentConfig,
+    prompt: resultPrompt,
+    timeout,
+    debug,
+    logFile,
+    rawOutput: true
+  });
+
+  logger.log(`Result obtained (${result.length} chars). Judging ${assertions.length} assertions...`);
+
+  const judgments = await Promise.all(
+    assertions.map((assertion, assertionIndex) =>
+      judgeAssertion({
+        assertion,
+        result,
+        userPrompt,
+        promptUnderTest,
+        runIndex,
+        assertionIndex,
+        totalAssertions: assertions.length,
+        agentConfig,
+        timeout,
+        debug,
+        logFile,
+        logger
+      })
+    )
+  );
+
+  return judgments;
+};
+
+const executeRuns = ({
+  extracted,
+  resultPrompt,
+  runs,
+  concurrency,
+  agentConfig,
+  timeout,
+  debug,
+  logFile,
+  logger
+}) => {
+  const runTasks = Array.from({ length: runs }, (_, runIndex) => async () =>
+    executeSingleRun({
+      runIndex,
+      extracted,
+      resultPrompt,
+      runs,
+      agentConfig,
+      timeout,
+      debug,
+      logFile,
+      logger
+    })
+  );
+
+  return limitConcurrency(runTasks, concurrency);
+};
+
+const aggregateResults = ({ assertions, allRunJudgments, threshold, runs }) => {
+  const perAssertionResults = assertions.map(({ requirement }, assertionIndex) => ({
+    requirement,
+    runResults: allRunJudgments.map(runJudgments => runJudgments[assertionIndex])
+  }));
+
+  return aggregatePerAssertionResults({ perAssertionResults, threshold, runs });
+};
+
+/**
+ * Run AI tests with two-agent pattern: result agent + judge agent.
+ * Pipeline: readTestFile → extractTests → result agent (once per run) → judge agents (per assertion, parallel) → aggregation.
+ *
+ * @param {Object} options
+ * @param {string} options.filePath - Path to test file
+ * @param {number} [options.runs=4] - Number of test runs per assertion
+ * @param {number} [options.threshold=75] - Required pass percentage (0-100)
+ * @param {Object} options.agentConfig - Agent CLI configuration
+ * @param {string} options.agentConfig.command - Command to execute
+ * @param {Array<string>} [options.agentConfig.args=[]] - Command arguments
+ * @param {number} [options.timeout=300000] - Timeout in milliseconds (default: 5 minutes)
+ * @param {number} [options.concurrency=4] - Maximum concurrent runs
+ * @param {boolean} [options.debug=false] - Enable debug logging
+ * @param {string} [options.logFile] - Optional log file path for debug output
+ * @param {string} [options.projectRoot=process.cwd()] - Project root directory for resolving import paths
+ * @returns {Promise<Object>} Aggregated per-assertion test results
+ */
+export const runAITests = async ({
+  filePath,
+  runs = 4,
+  threshold = 75,
+  timeout = 300000,
+  concurrency = 4,
+  debug = false,
+  logFile,
+  projectRoot = process.cwd(),
+  agentConfig = {
+    command: 'claude',
+    args: ['-p', '--output-format', 'json', '--no-session-persistence']
+  }
+}) => {
+  const logger = createDebugLogger({ debug, logFile });
+
+  const testContent = await readTestFile(filePath);
+
+  const extracted = await extractStructuredTests({
+    testContent,
+    testFilePath: filePath,
+    agentConfig,
+    timeout,
+    debug,
+    projectRoot,
+    logger
+  });
+
+  const { resultPrompt, assertions } = extracted;
+
+  const allRunJudgments = await executeRuns({
+    extracted,
+    resultPrompt,
+    runs,
+    concurrency,
+    agentConfig,
+    timeout,
+    debug,
+    logFile,
+    logger
+  });
+
+  logger.flush();
+  return aggregateResults({ assertions, allRunJudgments, threshold, runs });
+};
diff --git a/source/ai-runner.test.js b/source/ai-runner.test.js
new file mode 100644
index 00000000..f0d65fe6
--- /dev/null
+++ b/source/ai-runner.test.js
@@ -0,0 +1,263 @@
+import { describe, test } from 'vitest';
+import { assert } from './vitest.js';
+import { writeFileSync, mkdirSync, rmSync } from 'fs';
+import { join } from 'path';
+import { tmpdir } from 'os';
+import { init } from '@paralleldrive/cuid2';
+import { readTestFile, runAITests } from './ai-runner.js';
+
+const createSlug = init({ length: 5 });
+
+// Mock agent for two-agent pattern:
+// - Extraction calls (containing '<test-file-contents>') return extraction result (JSON)
+// - Result agent calls (containing 'CONTEXT (Prompt Under Test)') return plain text
+// - Judge agent calls (containing 'ACTUAL RESULT TO EVALUATE') return TAP YAML
+const createTwoAgentMockArgs = ({
+  extractedTests,
+  importPaths = ['prompt.mdc'],
+  resultText = 'Mock result from agent',
+  judgmentPassed = true,
+  judgmentScore = 85
+} = {}) => {
+  const extractionResult = {
+    userPrompt: 'What is 2+2?',
+    importPaths,
+    assertions: extractedTests
+  };
+  const tapYAML = `---
+passed: ${judgmentPassed}
+actual: "Mock actual output"
+expected: "Mock expected output"
+score: ${judgmentScore}
+---`;
+
+  return [
+    '-e',
+    `const prompt = process.argv[process.argv.length - 1];
+    if (prompt.includes('<test-file-contents>')) {
+      console.log(JSON.stringify(${JSON.stringify(extractionResult)}));
+    } else if (prompt.includes('ACTUAL RESULT TO EVALUATE')) {
+      console.log(\`${tapYAML}\`);
+    } else if (prompt.includes('CONTEXT (Prompt Under Test)')) {
+      console.log(${JSON.stringify(resultText)});
+    }`
+  ];
+};
+
+describe('readTestFile()', () => {
+  test('reads file contents from path', async () => {
+    const testDir = join(tmpdir(), 'riteway-test-' + createSlug());
+
+    try {
+      mkdirSync(testDir, { recursive: true });
+      const testFile = join(testDir, 'test.sudo');
+      const contents = 'describe("test", { requirements: ["should work"] })';
+      writeFileSync(testFile, contents);
+
+      assert({
+        given: 'a test file path',
+        should: 'return the file contents',
+        actual: await readTestFile(testFile),
+        expected: contents
+      });
+    } finally {
+      rmSync(testDir, { recursive: true, force: true });
+    }
+  });
+
+  test('reads any file extension', async () => {
+    const testDir = join(tmpdir(), 'riteway-test-' + createSlug());
+
+    try {
+      mkdirSync(testDir, { recursive: true });
+      const testFile = join(testDir, 'test.md');
+      const contents = '# My Test\n\nSome markdown content';
+      writeFileSync(testFile, contents);
+
+      assert({
+        given: 'a markdown file path',
+        should: 'return the file contents regardless of extension',
+        actual: await readTestFile(testFile),
+        expected: contents
+      });
+    } finally {
+      rmSync(testDir, { recursive: true, force: true });
+    }
+  });
+});
+
+describe('runAITests()', () => {
+  test('extracts tests and returns per-assertion results', async () => {
+    const testDir = join(tmpdir(), 'riteway-test-' + createSlug());
+
+    try {
+      mkdirSync(testDir, { recursive: true });
+      writeFileSync(join(testDir, 'prompt.mdc'), 'Test prompt context');
+      const testFile = join(testDir, 'test.sudo');
+      writeFileSync(testFile, '- Given addition, should add\n- Given format, should output JSON');
+
+      const extractedTests = [
+        { id: 1, requirement: 'Given addition, should add' },
+        { id: 2, requirement: 'Given format, should output JSON' }
+      ];
+
+      const result = await runAITests({
+        filePath: testFile,
+        runs: 2,
+        threshold: 50,
+        projectRoot: testDir,
+        agentConfig: {
+          command: 'node',
+          args: createTwoAgentMockArgs({ extractedTests })
+        }
+      });
+
+      assert({
+        given: 'multi-assertion test file with all runs passing at 50% threshold',
+        should: 'return passed: true',
+        actual: result.passed,
+        expected: true
+      });
+
+      assert({
+        given: 'two extracted assertions',
+        should: 'return assertions array of length 2',
+        actual: result.assertions.length,
+        expected: 2
+      });
+
+      assert({
+        given: 'first extracted assertion',
+        should: 'preserve the requirement text',
+        actual: result.assertions[0].requirement,
+        expected: 'Given addition, should add'
+      });
+    } finally {
+      rmSync(testDir, { recursive: true, force: true });
+    }
+  });
+
+  test('tracks pass count across N runs for each assertion', async () => {
+    const testDir = join(tmpdir(), 'riteway-test-' + createSlug());
+
+    try {
+      mkdirSync(testDir, { recursive: true });
+      writeFileSync(join(testDir, 'prompt.mdc'), 'Test prompt context');
+      const testFile = join(testDir, 'test.sudo');
+      writeFileSync(testFile, '- Given a test, should pass');
+
+      const extractedTests = [{ id: 1, requirement: 'Given a test, should pass' }];
+
+      const result = await runAITests({
+        filePath: testFile,
+        runs: 3,
+        threshold: 75,
+        projectRoot: testDir,
+        agentConfig: {
+          command: 'node',
+          args: createTwoAgentMockArgs({ extractedTests })
+        }
+      });
+
+      assert({
+        given: 'runs: 3 with one assertion',
+        should: 'execute 3 runs for the assertion',
+        actual: result.assertions[0].totalRuns,
+        expected: 3
+      });
+
+      assert({
+        given: 'all 3 runs passing',
+        should: 'have passCount 3',
+        actual: result.assertions[0].passCount,
+        expected: 3
+      });
+    } finally {
+      rmSync(testDir, { recursive: true, force: true });
+    }
+  });
+
+  test('fails when assertion does not meet threshold', async () => {
+    const testDir = join(tmpdir(), 'riteway-test-' + createSlug());
+
+    try {
+      mkdirSync(testDir, { recursive: true });
+      writeFileSync(join(testDir, 'prompt.mdc'), 'Test prompt context');
+      const testFile = join(testDir, 'test.sudo');
+      writeFileSync(testFile, '- Given a test, should fail');
+
+      const extractedTests = [{ id: 1, requirement: 'Given a test, should fail' }];
+
+      const result = await runAITests({
+        filePath: testFile,
+        runs: 2,
+        threshold: 75,
+        projectRoot: testDir,
+        agentConfig: {
+          command: 'node',
+          args: createTwoAgentMockArgs({
+            extractedTests,
+            judgmentPassed: false,
+            judgmentScore: 25
+          })
+        }
+      });
+
+      assert({
+        given: 'all runs failing at 75% threshold',
+        should: 'return passed: false',
+        actual: result.passed,
+        expected: false
+      });
+
+      assert({
+        given: 'the failing assertion',
+        should: 'have passCount 0',
+        actual: result.assertions[0].passCount,
+        expected: 0
+      });
+    } finally {
+      rmSync(testDir, { recursive: true, force: true });
+    }
+  });
+
+  test('includes averageScore across all runs for each assertion', async () => {
+    const testDir = join(tmpdir(), 'riteway-test-' + createSlug());
+
+    try {
+      mkdirSync(testDir, { recursive: true });
+      writeFileSync(join(testDir, 'prompt.mdc'), 'Test prompt context');
+      const testFile = join(testDir, 'test.sudo');
+      writeFileSync(testFile, '- Given a test, should pass');
+
+      const extractedTests = [{ id: 1, requirement: 'Given a test, should pass' }];
+
+      const result = await runAITests({
+        filePath: testFile,
+        runs: 2,
+        threshold: 50,
+        projectRoot: testDir,
+        agentConfig: {
+          command: 'node',
+          args: createTwoAgentMockArgs({ extractedTests, judgmentScore: 85 })
+        }
+      });
+
+      assert({
+        given: 'judgment score of 85 on both runs',
+        should: 'include averageScore as a number',
+        actual: typeof result.assertions[0].averageScore,
+        expected: 'number'
+      });
+
+      assert({
+        given: 'judgment score of 85 on both runs',
+        should: 'calculate correct average score',
+        actual: result.assertions[0].averageScore,
+        expected: 85
+      });
+    } finally {
+      rmSync(testDir, { recursive: true, force: true });
+    }
+  });
+});
diff --git a/source/execute-agent.js b/source/execute-agent.js
index ceac994f..e25776df 100644
--- a/source/execute-agent.js
+++ b/source/execute-agent.js
@@ -6,6 +6,9 @@ import { unwrapEnvelope, unwrapAgentResult } from './agent-parser.js';
 
 const maxOutputPreviewLength = 500;
 
+const truncateOutput = (str) =>
+  str.length > maxOutputPreviewLength ? `${str.slice(0, maxOutputPreviewLength)}...` : str;
+
 const withTimeout = (promise, ms, errorFactory) =>
   Promise.race([
     promise,
@@ -97,7 +100,7 @@ const processAgentOutput = ({ agentConfig, rawOutput, logger }) => ({ stdout })
     logger.flush();
     return result;
   } catch (err) {
-    const truncatedStdout = stdout.length > maxOutputPreviewLength ? `${stdout.slice(0, maxOutputPreviewLength)}...` : stdout;
+    const truncatedStdout = truncateOutput(stdout);
     logger.log('JSON parsing failed:', err.message);
     logger.flush();
 
@@ -133,8 +136,8 @@ const runAgentProcess = async ({ agentConfig, prompt, timeout, logger }) => {
   logger.log(`Stderr length: ${stderr.length} characters`);
 
   if (code !== 0) {
-    const truncatedStdout = stdout.length > maxOutputPreviewLength ? `${stdout.slice(0, maxOutputPreviewLength)}...` : stdout;
-    const truncatedStderr = stderr.length > maxOutputPreviewLength ? `${stderr.slice(0, maxOutputPreviewLength)}...` : stderr;
+    const truncatedStdout = truncateOutput(stdout);
+    const truncatedStderr = truncateOutput(stderr);
 
     logger.log('Process failed with non-zero exit code');
     logger.flush();
diff --git a/source/extraction-parser.js b/source/extraction-parser.js
index 8a6ef970..42fc7799 100644
--- a/source/extraction-parser.js
+++ b/source/extraction-parser.js
@@ -12,17 +12,17 @@ const assertionRequiredFields = ['id', 'requirement'];
  * This allows legitimate cross-project imports (e.g., shared prompt libraries).
  * Test authors are responsible for not importing sensitive files (.env, credentials).
  * See PR #394 remediation epic (Wave 1, Task 2) for design rationale.
+ *
+ * @param {string[]} importPaths - Paths to resolve relative to projectRoot
+ * @param {string} projectRoot - Root directory for resolving relative paths
+ * @param {Object} logger - Debug logger instance (injected by test-extractor.js)
  */
-export const resolveImportPaths = async (importPaths, projectRoot, debug) => {
-  if (debug) {
-    console.error(`[DEBUG] Found ${importPaths.length} imports to resolve`);
-  }
+export const resolveImportPaths = async (importPaths, projectRoot, logger) => {
+  logger.log(`Found ${importPaths.length} imports to resolve`);
   const importedContents = await Promise.all(
     importPaths.map(async importPath => {
       const resolvedPath = resolve(projectRoot, importPath);
-      if (debug) {
-        console.error(`[DEBUG] Reading import: ${importPath} -> ${resolvedPath}`);
-      }
+      logger.log(`Reading import: ${importPath} -> ${resolvedPath}`);
       try {
         return await readFile(resolvedPath, 'utf-8');
       } catch (originalError) {
@@ -38,9 +38,7 @@ export const resolveImportPaths = async (importPaths, projectRoot, debug) => {
     })
   );
   const result = importedContents.join('\n\n');
-  if (debug) {
-    console.error(`[DEBUG] Imported content length: ${result.length} characters`);
-  }
+  logger.log(`Imported content length: ${result.length} characters`);
   return result;
 };
 
diff --git a/source/extraction-parser.test.js b/source/extraction-parser.test.js
index 95b3d0b9..eb5d819d 100644
--- a/source/extraction-parser.test.js
+++ b/source/extraction-parser.test.js
@@ -172,10 +172,12 @@ describe('parseExtractionResult()', () => {
 });
 
 describe('resolveImportPaths()', () => {
+  const noopLogger = { log: () => {} };
+
   test('resolves and joins file contents for valid import paths', async () => {
     readFile.mockResolvedValueOnce('content of file A').mockResolvedValueOnce('content of file B');
 
-    const result = await resolveImportPaths(['a.mdc', 'b.mdc'], '/project', false);
+    const result = await resolveImportPaths(['a.mdc', 'b.mdc'], '/project', noopLogger);
 
     assert({
       given: 'two readable import paths',
@@ -188,7 +190,7 @@ describe('resolveImportPaths()', () => {
   test('throws ValidationError when a file cannot be read', async () => {
     readFile.mockRejectedValueOnce(new Error('ENOENT: no such file or directory'));
 
-    const error = await Try(resolveImportPaths, ['missing.mdc'], '/project', false);
+    const error = await Try(resolveImportPaths, ['missing.mdc'], '/project', noopLogger);
 
     const invoked = [];
     handleAIErrors({ ...allNoop, ValidationError: () => invoked.push('ValidationError') })(error);
diff --git a/source/test-extractor.js b/source/test-extractor.js
new file mode 100644
index 00000000..9a1a4088
--- /dev/null
+++ b/source/test-extractor.js
@@ -0,0 +1,193 @@
+import { createError } from 'error-causes';
+import { ValidationError } from './ai-errors.js';
+import { executeAgent } from './execute-agent.js';
+import { parseExtractionResult, resolveImportPaths } from './extraction-parser.js';
+
+/**
+ * Build a prompt that instructs an LLM agent to extract individual
+ * assertions from a multi-assertion test file.
+ *
+ * IMPORTANT: This extraction prompt asks the agent to return STRUCTURED DATA,
+ * not executable prompts. This is a deliberate architectural decision:
+ *
+ * Why not ask the extraction agent to create executable prompts?
+ * 1. Reliability: Extraction agents may create prompts in inconsistent formats
+ * 2. Format control: We need guaranteed JSON response format ({passed: boolean})
+ * 3. Testability: Template-based prompts are deterministic and testable
+ * 4. Debugging: Structured data allows us to inspect what was extracted
+ *
+ * Instead, we use a two-phase approach:
+ * Phase 1 (this function): Extract structured metadata (userPrompt, importPaths, requirement)
+ * Phase 2: Transform metadata into executable prompts for result and judge agents
+ */
+export const buildExtractionPrompt = (testContent) =>
+  `You are a test extraction agent. Analyze the following test file and extract structured information.
+
+For each assertion or requirement in the test file (these may be formatted as
+"Given X, should Y", bullet points, YAML entries, natural language sentences,
+SudoLang expressions, or any other format):
+
+1. Identify the userPrompt (the prompt to be tested)
+2. Extract the specific requirement from the assertion
+3. Identify any import file paths (e.g., import 'path/to/file.mdc')
+
+Return a JSON object with:
+- "userPrompt": the test prompt to execute (string)
+- "importPaths": array of import file paths found in the test file (e.g., ["ai/rules/ui.mdc"])
+- "assertions": array of assertion objects, each with:
+  - "id": sequential integer starting at 1
+  - "requirement": the assertion text (e.g., "Given X, should Y")
+
+Return ONLY valid JSON. No markdown fences, no explanation.
+
+<test-file-contents>
+${testContent}
+</test-file-contents>`;
+
+/**
+ * Build a result prompt that instructs an LLM to execute a user prompt
+ * and return plain text output (no JSON, no evaluation).
+ *
+ * This is part of the two-agent pattern where:
+ * - Result agent (this prompt): Execute the user prompt, return plain text
+ * - Judge agent (separate prompt): Evaluate the result against requirements
+ */
+export const buildResultPrompt = ({ userPrompt, promptUnderTest }) => {
+  const contextSection = promptUnderTest
+    ? `CONTEXT (Prompt Under Test):\n${promptUnderTest}\n\n`
+    : '';
+
+  return `You are an AI assistant. Execute the following prompt and return your response.
+
+${contextSection}USER PROMPT:
+${userPrompt}
+
+INSTRUCTIONS:
+1. Execute the user prompt above${promptUnderTest ? ', following the guidance in the prompt under test' : ''}
+2. Return your complete response as plain text
+
+Respond naturally. Do NOT wrap your response in JSON, markdown fences, or any other structure.
+Your entire output IS the result.`;
+};
+
+/**
+ * Build a judge prompt that instructs an LLM to evaluate a specific result
+ * against a single requirement. Returns TAP YAML diagnostic format.
+ *
+ * This is part of the two-agent pattern where:
+ * - Result agent: Execute the user prompt, return plain text
+ * - Judge agent (this prompt): Evaluate the result against ONE requirement
+ */
+export const buildJudgePrompt = ({ userPrompt, promptUnderTest, result, requirement }) =>
+  `You are an AI judge. Evaluate whether a given result satisfies a specific requirement.
+
+CONTEXT (Prompt Under Test):
+${promptUnderTest}
+
+ORIGINAL USER PROMPT:
+${userPrompt}
+
+ACTUAL RESULT TO EVALUATE:
+${result}
+
+REQUIREMENT:
+${requirement}
+
+INSTRUCTIONS:
+1. Read the actual result above
+2. Determine whether it satisfies the requirement
+3. Summarize what was actually produced (actual) vs what was expected (expected)
+4. Assign a quality score from 0 (completely fails) to 100 (perfectly satisfies)
+
+Return your judgment as a TAP YAML diagnostic block:
+---
+passed: true
+actual: "summary of what was produced"
+expected: "what was expected"
+score: 85
+---
+
+CRITICAL: Return ONLY the TAP YAML block. Start with --- on its own line,
+end with --- on its own line. No markdown fences, no explanation outside the block.`;
+
+/**
+ * Extract individual test assertions from a multi-assertion test file
+ * by calling an LLM agent with a specialized extraction prompt.
+ *
+ * Pipeline:
+ * Phase 1: Extraction agent parses test file → {userPrompt, importPaths, assertions}
+ * Phase 1.5: Read agent-identified import files → promptUnderTest string
+ * Phase 2: Return validated structured data for two-agent execution
+ *
+ * Validation:
+ * - Missing userPrompt → ValidationError MISSING_USER_PROMPT
+ * - Missing promptUnderTest → ValidationError MISSING_PROMPT_UNDER_TEST
+ * - No assertions → ValidationError NO_ASSERTIONS_FOUND
+ * - Missing import file → ValidationError PROMPT_READ_FAILED (with cause)
+ *
+ * @param {Object} options
+ * @param {string} options.testContent - Raw contents of the test file
+ * @param {string} [options.testFilePath] - Path to the test file (for resolving imports)
+ * @param {Object} options.agentConfig - Agent CLI configuration
+ * @param {number} [options.timeout=300000] - Timeout in milliseconds
+ * @param {boolean} [options.debug=false] - Enable debug logging
+ * @param {string} [options.projectRoot=process.cwd()] - Project root for resolving import paths
+ * @param {Object} [options.logger={ log: () => {} }] - Debug logger instance (owned by caller; defaults to noop)
+ * @returns {Promise<{ userPrompt: string, promptUnderTest: string, assertions: Array<{ id: number, requirement: string }> }>}
+ */
+export const extractTests = async ({
+  testContent,
+  testFilePath,
+  agentConfig,
+  timeout = 300000,
+  debug = false,
+  projectRoot = process.cwd(),
+  logger = { log: () => {} }
+}) => {
+  logger.log('\nCalling extraction agent...');
+
+  const extractionPrompt = buildExtractionPrompt(testContent);
+  const result = await executeAgent({ agentConfig, prompt: extractionPrompt, timeout, debug });
+  const extracted = parseExtractionResult(result);
+
+  logger.log(`Extraction complete. Found ${extracted.assertions.length} assertions`);
+
+  const promptUnderTest = testFilePath && extracted.importPaths.length > 0
+    ? await resolveImportPaths(extracted.importPaths, projectRoot, logger)
+    : '';
+
+  const { userPrompt, assertions } = extracted;
+
+  if (!userPrompt || userPrompt.trim() === '') {
+    throw createError({
+      ...ValidationError,
+      message: 'Test file does not define a userPrompt. Every test file must include a user prompt (inline or imported).',
+      code: 'MISSING_USER_PROMPT',
+      testFile: testFilePath
+    });
+  }
+
+  if (!promptUnderTest || promptUnderTest.trim() === '') {
+    throw createError({
+      ...ValidationError,
+      message: 'Test file does not declare a promptUnderTest import. Every test file must import the prompt under test.',
+      code: 'MISSING_PROMPT_UNDER_TEST',
+      testFile: testFilePath
+    });
+  }
+
+  if (!assertions || assertions.length === 0) {
+    throw createError({
+      ...ValidationError,
+      message: 'Test file does not contain any assertions. Every test file must include at least one assertion (e.g., "Given X, should Y").',
+      code: 'NO_ASSERTIONS_FOUND',
+      testFile: testFilePath
+    });
+  }
+
+  return {
+    userPrompt,
+    promptUnderTest,
+    assertions: assertions.map(({ id, requirement }) => ({ id, requirement }))
+  };
+};
diff --git a/source/test-extractor.test.js b/source/test-extractor.test.js
new file mode 100644
index 00000000..2b42bab6
--- /dev/null
+++ b/source/test-extractor.test.js
@@ -0,0 +1,524 @@
+import { describe, test } from 'vitest';
+import { assert } from './vitest.js';
+import { Try } from './riteway.js';
+import { handleAIErrors, allNoop } from './ai-errors.js';
+import {
+  buildExtractionPrompt,
+  buildResultPrompt,
+  buildJudgePrompt,
+  extractTests
+} from './test-extractor.js';
+import { mkdirSync, writeFileSync, rmSync } from 'fs';
+import { join } from 'path';
+import { tmpdir } from 'os';
+import { init } from '@paralleldrive/cuid2';
+
+const createSlug = init({ length: 10 });
+
+const createTempDir = () => {
+  const slug = createSlug();
+  return join(tmpdir(), `riteway-extractor-test-${slug}`);
+};
+
+describe('buildExtractionPrompt()', () => {
+  test('returns complete extraction prompt with test content embedded in delimiters', () => {
+    const testContent = `import @promptUnderTest
+
+userPrompt = """
+  What is 2 + 2?
+"""
+
+- Given simple addition, should add correctly
+- Given format, should output JSON`;
+
+    const result = buildExtractionPrompt(testContent);
+
+    const expected = `You are a test extraction agent. Analyze the following test file and extract structured information.
+
+For each assertion or requirement in the test file (these may be formatted as
+"Given X, should Y", bullet points, YAML entries, natural language sentences,
+SudoLang expressions, or any other format):
+
+1. Identify the userPrompt (the prompt to be tested)
+2. Extract the specific requirement from the assertion
+3. Identify any import file paths (e.g., import 'path/to/file.mdc')
+
+Return a JSON object with:
+- "userPrompt": the test prompt to execute (string)
+- "importPaths": array of import file paths found in the test file (e.g., ["ai/rules/ui.mdc"])
+- "assertions": array of assertion objects, each with:
+  - "id": sequential integer starting at 1
+  - "requirement": the assertion text (e.g., "Given X, should Y")
+
+Return ONLY valid JSON. No markdown fences, no explanation.
+
+<test-file-contents>
+${testContent}
+</test-file-contents>`;
+
+    assert({
+      given: 'test content with assertions',
+      should: 'return complete extraction prompt with test content wrapped in delimiters',
+      actual: result,
+      expected
+    });
+  });
+});
+
+describe('buildResultPrompt()', () => {
+  test('returns complete result prompt with context section when promptUnderTest provided', () => {
+    const userPrompt = 'What is 2 + 2?';
+    const promptUnderTest = 'You are a math helper.';
+
+    const result = buildResultPrompt({ userPrompt, promptUnderTest });
+
+    const expected = `You are an AI assistant. Execute the following prompt and return your response.
+
+CONTEXT (Prompt Under Test):
+You are a math helper.
+
+USER PROMPT:
+What is 2 + 2?
+
+INSTRUCTIONS:
+1. Execute the user prompt above, following the guidance in the prompt under test
+2. Return your complete response as plain text
+
+Respond naturally. Do NOT wrap your response in JSON, markdown fences, or any other structure.
+Your entire output IS the result.`;
+
+    assert({
+      given: 'userPrompt and promptUnderTest',
+      should: 'return complete result prompt with context section',
+      actual: result,
+      expected
+    });
+  });
+
+  test('omits context section when promptUnderTest is not provided', () => {
+    const userPrompt = 'What is 2 + 2?';
+
+    const result = buildResultPrompt({ userPrompt });
+
+    const expected = `You are an AI assistant. Execute the following prompt and return your response.
+
+USER PROMPT:
+What is 2 + 2?
+
+INSTRUCTIONS:
+1. Execute the user prompt above
+2. Return your complete response as plain text
+
+Respond naturally. Do NOT wrap your response in JSON, markdown fences, or any other structure.
+Your entire output IS the result.`;
+
+    assert({
+      given: 'userPrompt without promptUnderTest',
+      should: 'return prompt without context section',
+      actual: result,
+      expected
+    });
+  });
+});
+
+describe('buildJudgePrompt()', () => {
+  test('returns complete judge prompt with all sections', () => {
+    const userPrompt = 'What is 2 + 2?';
+    const promptUnderTest = 'You are a math helper.';
+    const result = 'The answer is 4.';
+    const requirement = 'Given simple addition, should return 4';
+
+    const judgePrompt = buildJudgePrompt({ userPrompt, promptUnderTest, result, requirement });
+
+    const expected = `You are an AI judge. Evaluate whether a given result satisfies a specific requirement.
+
+CONTEXT (Prompt Under Test):
+You are a math helper.
+
+ORIGINAL USER PROMPT:
+What is 2 + 2?
+
+ACTUAL RESULT TO EVALUATE:
+The answer is 4.
+
+REQUIREMENT:
+Given simple addition, should return 4
+
+INSTRUCTIONS:
+1. Read the actual result above
+2. Determine whether it satisfies the requirement
+3. Summarize what was actually produced (actual) vs what was expected (expected)
+4. Assign a quality score from 0 (completely fails) to 100 (perfectly satisfies)
+
+Return your judgment as a TAP YAML diagnostic block:
+---
+passed: true
+actual: "summary of what was produced"
+expected: "what was expected"
+score: 85
+---
+
+CRITICAL: Return ONLY the TAP YAML block. Start with --- on its own line,
+end with --- on its own line. No markdown fences, no explanation outside the block.`;
+
+    assert({
+      given: 'all required fields',
+      should: 'return complete judge prompt with all sections',
+      actual: judgePrompt,
+      expected
+    });
+  });
+});
+
+describe('extractTests()', () => {
+  test('extracts and returns validated test structure from agent output', async () => {
+    const testDir = createTempDir();
+
+    try {
+      mkdirSync(testDir, { recursive: true });
+
+      const promptFile = join(testDir, 'prompt.mdc');
+      writeFileSync(promptFile, 'You are a math helper.');
+
+      const testFile = join(testDir, 'test.sudo');
+      writeFileSync(testFile, 'import "prompt.mdc"\n\n- Given addition, should add correctly');
+
+      const extractedData = {
+        userPrompt: 'What is 2+2?',
+        importPaths: ['prompt.mdc'],
+        assertions: [
+          { id: 1, requirement: 'Given addition, should add correctly' }
+        ]
+      };
+
+      const mockAgentConfig = {
+        command: 'node',
+        args: ['-e', `console.log(JSON.stringify(${JSON.stringify(extractedData)}))`]
+      };
+
+      const result = await extractTests({
+        testContent: 'import "prompt.mdc"\n\n- Given addition, should add correctly',
+        testFilePath: testFile,
+        agentConfig: mockAgentConfig,
+        timeout: 5000,
+        projectRoot: testDir
+      });
+
+      assert({
+        given: 'valid extraction output with import file on disk',
+        should: 'return extracted userPrompt',
+        actual: result.userPrompt,
+        expected: 'What is 2+2?'
+      });
+
+      assert({
+        given: 'valid extraction output with import file on disk',
+        should: 'return promptUnderTest from resolved import',
+        actual: result.promptUnderTest,
+        expected: 'You are a math helper.'
+      });
+
+      assert({
+        given: 'valid extraction output with one assertion',
+        should: 'return assertions array of length 1',
+        actual: result.assertions.length,
+        expected: 1
+      });
+
+      assert({
+        given: 'valid extraction output',
+        should: 'preserve the requirement text in assertions',
+        actual: result.assertions[0].requirement,
+        expected: 'Given addition, should add correctly'
+      });
+    } finally {
+      rmSync(testDir, { recursive: true, force: true });
+    }
+  });
+
+  test('concatenates multiple imported prompt files', async () => {
+    const testDir = createTempDir();
+
+    try {
+      mkdirSync(testDir, { recursive: true });
+
+      writeFileSync(join(testDir, 'rules1.mdc'), 'Rule 1: Be concise');
+      writeFileSync(join(testDir, 'rules2.mdc'), 'Rule 2: Be accurate');
+
+      const extractedData = {
+        userPrompt: 'Test prompt',
+        importPaths: ['rules1.mdc', 'rules2.mdc'],
+        assertions: [{ id: 1, requirement: 'Given rules, should follow' }]
+      };
+
+      const mockAgentConfig = {
+        command: 'node',
+        args: ['-e', `console.log(JSON.stringify(${JSON.stringify(extractedData)}))`]
+      };
+
+      const result = await extractTests({
+        testContent: 'import "rules1.mdc"\nimport "rules2.mdc"\n\n- Given rules, should follow',
+        testFilePath: join(testDir, 'test.sudo'),
+        agentConfig: mockAgentConfig,
+        timeout: 5000,
+        projectRoot: testDir
+      });
+
+      assert({
+        given: 'two import files',
+        should: 'concatenate both file contents into promptUnderTest',
+        actual: result.promptUnderTest.includes('Rule 1') && result.promptUnderTest.includes('Rule 2'),
+        expected: true
+      });
+    } finally {
+      rmSync(testDir, { recursive: true, force: true });
+    }
+  });
+
+  test('resolves import paths relative to projectRoot, not test file location', async () => {
+    const testDir = createTempDir();
+
+    try {
+      mkdirSync(testDir, { recursive: true });
+      const nestedDir = join(testDir, 'nested', 'deep');
+      mkdirSync(nestedDir, { recursive: true });
+
+      writeFileSync(join(testDir, 'root-prompt.mdc'), 'Root level prompt');
+
+      const extractedData = {
+        userPrompt: 'Test',
+        importPaths: ['root-prompt.mdc'],
+        assertions: [{ id: 1, requirement: 'Given test, should pass' }]
+      };
+
+      const mockAgentConfig = {
+        command: 'node',
+        args: ['-e', `console.log(JSON.stringify(${JSON.stringify(extractedData)}))`]
+      };
+
+      const result = await extractTests({
+        testContent: 'import "root-prompt.mdc"\n\n- Given test, should pass',
+        testFilePath: join(nestedDir, 'test.sudo'),
+        agentConfig: mockAgentConfig,
+        timeout: 5000,
+        projectRoot: testDir
+      });
+
+      assert({
+        given: 'import path relative to project root',
+        should: 'resolve and read file from project root, not test file directory',
+        actual: result.promptUnderTest,
+        expected: 'Root level prompt'
+      });
+    } finally {
+      rmSync(testDir, { recursive: true, force: true });
+    }
+  });
+
+  test('allows imports from paths outside project root', async () => {
+    const testDir = createTempDir();
+
+    try {
+      mkdirSync(testDir, { recursive: true });
+
+      const externalDir = join(testDir, 'external');
+      mkdirSync(externalDir, { recursive: true });
+      writeFileSync(join(externalDir, 'shared-prompt.mdc'), 'External shared prompt content');
+
+      const projectDir = join(testDir, 'project');
+      mkdirSync(projectDir, { recursive: true });
+
+      const extractedData = {
+        userPrompt: 'test',
+        importPaths: ['../external/shared-prompt.mdc'],
+        assertions: [{ id: 1, requirement: 'Given test, should pass' }]
+      };
+
+      const mockAgentConfig = {
+        command: 'node',
+        args: ['-e', `console.log(JSON.stringify(${JSON.stringify(extractedData)}))`]
+      };
+
+      const result = await extractTests({
+        testContent: 'import "../external/shared-prompt.mdc"\n\n- Given test, should pass',
+        testFilePath: join(projectDir, 'test.sudo'),
+        agentConfig: mockAgentConfig,
+        timeout: 5000,
+        projectRoot: projectDir
+      });
+
+      assert({
+        given: 'import path traversing outside project root',
+        should: 'resolve and read the external file without error',
+        actual: result.promptUnderTest,
+        expected: 'External shared prompt content'
+      });
+    } finally {
+      rmSync(testDir, { recursive: true, force: true });
+    }
+  });
+
+  test('throws ValidationError when promptUnderTest import is missing', async () => {
+    const extractedData = {
+      userPrompt: 'What is 2+2?',
+      importPaths: [],
+      assertions: [{ id: 1, requirement: 'Given a test, should pass' }]
+    };
+
+    const mockAgentConfig = {
+      command: 'node',
+      args: ['-e', `console.log(JSON.stringify(${JSON.stringify(extractedData)}))`]
+    };
+
+    const error = await Try(extractTests, {
+      testContent: '- Given test, should pass',
+      testFilePath: '/test/test.sudo',
+      agentConfig: mockAgentConfig,
+      timeout: 5000
+    });
+
+    const invoked = [];
+    handleAIErrors({ ...allNoop, ValidationError: () => invoked.push('ValidationError') })(error);
+
+    assert({
+      given: 'no import paths (no promptUnderTest)',
+      should: 'throw an error that routes to the ValidationError handler',
+      actual: invoked,
+      expected: ['ValidationError']
+    });
+
+    assert({
+      given: 'no promptUnderTest import declared',
+      should: 'include MISSING_PROMPT_UNDER_TEST code in error',
+      actual: error?.cause?.code,
+      expected: 'MISSING_PROMPT_UNDER_TEST'
+    });
+  });
+
+  test('throws ValidationError when userPrompt is empty', async () => {
+    const extractedData = {
+      userPrompt: '',
+      importPaths: ['package.json'],
+      assertions: [{ id: 1, requirement: 'Given a test, should pass' }]
+    };
+
+    const mockAgentConfig = {
+      command: 'node',
+      args: ['-e', `console.log(JSON.stringify(${JSON.stringify(extractedData)}))`]
+    };
+
+    const error = await Try(extractTests, {
+      testContent: 'import "package.json"\n\n- Given test, should pass',
+      testFilePath: '/test/test.sudo',
+      agentConfig: mockAgentConfig,
+      timeout: 5000
+    });
+
+    const invoked = [];
+    handleAIErrors({ ...allNoop, ValidationError: () => invoked.push('ValidationError') })(error);
+
+    assert({
+      given: 'empty userPrompt in extraction result',
+      should: 'throw an error that routes to the ValidationError handler',
+      actual: invoked,
+      expected: ['ValidationError']
+    });
+
+    assert({
+      given: 'empty userPrompt',
+      should: 'include MISSING_USER_PROMPT code in error',
+      actual: error?.cause?.code,
+      expected: 'MISSING_USER_PROMPT'
+    });
+  });
+
+  test('throws ValidationError when no assertions found', async () => {
+    const extractedData = {
+      userPrompt: 'test prompt',
+      importPaths: ['package.json'],
+      assertions: []
+    };
+
+    const mockAgentConfig = {
+      command: 'node',
+      args: ['-e', `console.log(JSON.stringify(${JSON.stringify(extractedData)}))`]
+    };
+
+    const error = await Try(extractTests, {
+      testContent: 'import "package.json"\n\nuserPrompt = """test"""',
+      testFilePath: '/test/test.sudo',
+      agentConfig: mockAgentConfig,
+      timeout: 5000
+    });
+
+    const invoked = [];
+    handleAIErrors({ ...allNoop, ValidationError: () => invoked.push('ValidationError') })(error);
+
+    assert({
+      given: 'empty assertions array',
+      should: 'throw an error that routes to the ValidationError handler',
+      actual: invoked,
+      expected: ['ValidationError']
+    });
+
+    assert({
+      given: 'empty assertions array',
+      should: 'include NO_ASSERTIONS_FOUND code in error',
+      actual: error?.cause?.code,
+      expected: 'NO_ASSERTIONS_FOUND'
+    });
+  });
+
+  test('throws ValidationError with PROMPT_READ_FAILED when import file does not exist', async () => {
+    const testDir = createTempDir();
+
+    try {
+      mkdirSync(testDir, { recursive: true });
+
+      const extractedData = {
+        userPrompt: 'Test',
+        importPaths: ['nonexistent.mdc'],
+        assertions: [{ id: 1, requirement: 'Given test, should pass' }]
+      };
+
+      const mockAgentConfig = {
+        command: 'node',
+        args: ['-e', `console.log(JSON.stringify(${JSON.stringify(extractedData)}))`]
+      };
+
+      const error = await Try(extractTests, {
+        testContent: 'import "nonexistent.mdc"\n\n- Given test, should pass',
+        testFilePath: join(testDir, 'test.sudo'),
+        agentConfig: mockAgentConfig,
+        timeout: 5000,
+        projectRoot: testDir
+      });
+
+      const invoked = [];
+      handleAIErrors({ ...allNoop, ValidationError: () => invoked.push('ValidationError') })(error);
+
+      assert({
+        given: 'missing import file on disk',
+        should: 'throw an error that routes to the ValidationError handler',
+        actual: invoked,
+        expected: ['ValidationError']
+      });
+
+      assert({
+        given: 'missing import file on disk',
+        should: 'include PROMPT_READ_FAILED code in error',
+        actual: error?.cause?.code,
+        expected: 'PROMPT_READ_FAILED'
+      });
+
+      assert({
+        given: 'missing import file on disk',
+        should: 'preserve original ENOENT error as cause',
+        actual: error?.cause?.cause?.code,
+        expected: 'ENOENT'
+      });
+    } finally {
+      rmSync(testDir, { recursive: true, force: true });
+    }
+  });
+});