paralleldrive
diff --git a/‎.eslintrc.json‎
Lines changed: 2 additions & 1 deletion b/‎.eslintrc.json‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 94 additions & 0 deletions b/‎README.md‎
Lines changed: 94 additions & 0 deletions
diff --git a/‎bin/riteway.js‎
Lines changed: 39 additions & 13 deletions b/‎bin/riteway.js‎
Lines changed: 39 additions & 13 deletions
diff --git a/‎source/agent-config.js‎
Lines changed: 105 additions & 27 deletions b/‎source/agent-config.js‎
Lines changed: 105 additions & 27 deletions
@@ -2,7 +2,8 @@
   "env": {
     "browser": true,
     "commonjs": true,
-    "es6": true
+    "es6": true,
+    "node": true
   },
   "extends": [
     "eslint:recommended",
 
@@ -69,6 +69,100 @@ In this case, we're using [nyc](https://www.npmjs.com/package/nyc), which genera
 Riteway requires Node.js 16+ and uses native ES modules. Add `"type": "module"` to your package.json to enable ESM support. For JSX component testing, you'll need a build tool that can transpile JSX (see [JSX Setup](#jsx-setup) below).
 
 
+## `riteway ai` — AI Prompt Evaluations
+
+The `riteway ai` CLI runs your AI agent prompt evaluations against a configurable pass-rate threshold. Write a `.sudo` test file, run it through any supported AI agent, and get a TAP-formatted report with per-assertion pass rates across multiple runs.
+
+### Authentication
+
+All agents use OAuth authentication — no API keys needed. Authenticate once before running evals:
+
+| Agent | Command | Docs |
+|-------|---------|------|
+| Claude | `claude setup-token` | [Claude Code docs](https://docs.anthropic.com/en/docs/claude-code) |
+| Cursor | `agent login` | [Cursor docs](https://docs.cursor.com/context/rules-for-ai) |
+| OpenCode | See docs | [opencode.ai/docs/cli](https://opencode.ai/docs/cli/) |
+
+### Writing a test file
+
+AI evals are written in `.sudo` files using [SudoLang](https://github.com/paralleldrive/sudolang) syntax:
+
+```
+# my-feature-test.sudo
+
+import 'path/to/spec.mdc'   # optional: the prompt-under-test (shared spec or task)
+
+userPrompt = """
+Implement the sum function as described.
+"""
+
+- Given the spec, should name the function sum
+- Given the spec, should accept two parameters named a and b
+- Given the spec, should return the correct sum of the two parameters
+```
+
+Each `- Given ..., should ...` line becomes an independently judged assertion. The agent is asked to respond to the `userPrompt` (with any imported spec as context), and a judge agent scores each assertion across all runs.
+
+### Running an eval
+
+```shell
+riteway ai path/to/my-feature-test.sudo
+```
+
+By default this runs **4 passes**, requires **75% pass rate**, uses the **claude** agent, and runs up to **4 tests concurrently**.
+
+```shell
+# Specify runs, threshold, and agent
+riteway ai path/to/test.sudo --runs 10 --threshold 80 --agent opencode
+
+# Use a Cursor agent with color output
+riteway ai path/to/test.sudo --agent cursor --color
+
+# Use a custom agent config file (mutually exclusive with --agent)
+riteway ai path/to/test.sudo --agent-config ./my-agent.json
+```
+
+### Options
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--runs N` | `4` | Number of passes per assertion |
+| `--threshold P` | `75` | Required pass percentage (0–100) |
+| `--agent NAME` | `claude` | Agent: `claude`, `opencode`, `cursor`, or a custom name from `riteway.agent-config.json` |
+| `--agent-config FILE` | — | Path to a flat single-agent JSON config `{"command","args","outputFormat"}` — mutually exclusive with `--agent` |
+| `--concurrency N` | `4` | Max concurrent test executions |
+| `--color` | off | Enable ANSI color output |
+
+Results are written as a TAP markdown file under `ai-evals/` in the project root.
+
+### Custom agent configuration
+
+`riteway ai init` writes all built-in agent configs to `riteway.agent-config.json` in your project root, so you can add custom agents or tweak existing flags:
+
+```shell
+riteway ai init           # create riteway.agent-config.json
+riteway ai init --force   # overwrite existing file
+```
+
+The generated file is a keyed registry. Add a custom agent entry and use it with `--agent`:
+
+```json
+{
+  "claude":   { "command": "claude",   "args": ["-p", "--output-format", "json", "--no-session-persistence"], "outputFormat": "json"  },
+  "opencode": { "command": "opencode", "args": ["run", "--format", "json"],                                   "outputFormat": "ndjson" },
+  "cursor":   { "command": "agent",    "args": ["--print", "--output-format", "json", "--trust"],             "outputFormat": "json"  },
+  "my-agent": { "command": "my-tool",  "args": ["--json"],                                                    "outputFormat": "json"  }
+}
+```
+
+```shell
+riteway ai path/to/test.sudo --agent my-agent
+```
+
+Once `riteway.agent-config.json` exists, any agent key defined in it supersedes the library's built-in defaults for that agent.
+
+---
+
 ## Example Usage
 
 ```js
 
@@ -7,7 +7,10 @@ import minimist from 'minimist';
 import { globSync } from 'glob';
 import dotignore from 'dotignore';
 import { handleAIErrors } from '../source/ai-errors.js';
-import { parseAIArgs, runAICommand, defaults } from '../source/ai-command.js';
+import { parseAIArgs, runAICommand } from '../source/ai-command.js';
+import { defaults } from '../source/constants.js';
+import { initAgentRegistry } from '../source/ai-init.js';
+import { registryFileName } from '../source/agent-config.js';
 
 const resolveModule = resolve.sync;
 const createMatcher = dotignore.createMatcher;
@@ -84,8 +87,8 @@ const handleAIError = handleAIErrors({
     console.error('\nUsage: riteway ai <file> [--runs N] [--threshold P] [--agent NAME | --agent-config FILE] [--color]');
     console.error(`  --runs N               Number of test runs per assertion (default: ${defaults.runs})`);
     console.error(`  --threshold P          Required pass percentage 0-100 (default: ${defaults.threshold})`);
-    console.error(`  --agent NAME           AI agent: claude, opencode, cursor (default: ${defaults.agent})`);
-    console.error('  --agent-config FILE    Path to custom agent config JSON (mutually exclusive with --agent)');
+    console.error(`  --agent NAME           Agent: claude, opencode, cursor, or custom from ${registryFileName} (default: ${defaults.agent})`);
+    console.error(`  --agent-config FILE    Path to a flat single-agent config JSON (mutually exclusive with --agent)`);
     console.error(`  --color                Enable ANSI color codes in terminal output (default: ${defaults.color ? 'enabled' : 'disabled'})`);
     console.error('\nAuthentication: Run agent-specific OAuth setup:');
     console.error("  Claude:  'claude setup-token'");
@@ -163,7 +166,7 @@ const handleAIError = handleAIErrors({
   },
   AgentConfigValidationError: ({ message }) => {
     console.error(`❌ Agent config validation failed: ${message}`);
-    console.error('💡 Config must be a JSON object with "command" (string) and optional "args" (string[]).');
+    console.error('💡 Each agent entry must have "command" (string), optional "args" (string[]), and optional "outputFormat" ("json"|"ndjson"|"text", default "json").');
     process.exit(1);
   }
 });
@@ -173,8 +176,8 @@ const main = async (argv) => {
     console.log(`
 Usage:
   riteway <patterns...> [options]       Run test files
-  riteway ai <file> [options]           Run AI prompt tests
-    --runs N --threshold P --agent NAME [--concurrency N] [--color] [--agent-config FILE]
+  riteway ai <file> [options]           Run AI prompt evaluations
+  riteway ai init [--force]             Write agent config registry to ${registryFileName}
 
 Test Runner Options:
   -r, --require <module>    Require module before running tests
@@ -183,11 +186,14 @@ Test Runner Options:
 AI Test Options:
   --runs N                  Number of test runs per assertion (default: ${defaults.runs})
   --threshold P             Required pass percentage 0-100 (default: ${defaults.threshold})
-  --agent NAME              AI agent to use: claude, opencode, cursor (default: ${defaults.agent})
-  --agent-config FILE       Path to custom agent config JSON {"command","args"} (mutually exclusive with --agent)
+  --agent NAME              Agent: claude, opencode, cursor, or custom from ${registryFileName} (default: ${defaults.agent})
+  --agent-config FILE       Path to a flat single-agent config JSON {"command","args","outputFormat"} (mutually exclusive with --agent)
   --concurrency N           Max concurrent test executions (default: ${defaults.concurrency})
   --color                   Enable ANSI color codes in terminal output
 
+AI Init Options:
+  --force                   Overwrite existing ${registryFileName}
+
 Authentication:
   All agents use OAuth authentication (no API keys required):
     Claude:  Run 'claude setup-token' - https://docs.anthropic.com/en/docs/claude-code
@@ -201,17 +207,37 @@ Examples:
   riteway ai prompts/test.sudo --agent opencode --runs 5
   riteway ai prompts/test.sudo --color
   riteway ai prompts/test.sudo --agent-config ./my-agent.json
+  riteway ai init
+  riteway ai init --force
     `);
     process.exit(0);
   }
 
   if (argv[0] === 'ai') {
-    try {
-      await mainAIRunner(argv.slice(1));
-      process.exit(0);
-    } catch (error) {
-      handleAIError(error);
+    if (argv[1] === 'init') {
+      try {
+        const force = argv.slice(2).includes('--force');
+        const outputPath = await initAgentRegistry({ force, cwd: process.cwd() });
+        console.log(`Wrote ${outputPath}`);
+        console.log('');
+        console.log("⚠️  You now own your agent configuration. The library's built-in agent configs");
+        console.log('    are bypassed for any agent defined in this file. Edit freely.');
+        console.log('');
+        console.log('    To use a custom agent:    riteway ai <file> --agent <name>');
+        console.log('    To use a specific config:  riteway ai <file> --agent-config <path>');
+        process.exit(0);
+      } catch (error) {
+        handleAIError(error);
+      }
+    } else {
+      try {
+        await mainAIRunner(argv.slice(1));
+        process.exit(0);
+      } catch (error) {
+        handleAIError(error);
+      }
     }
+    return;
   }
 
   return mainTestRunner(argv);
 
@@ -1,34 +1,38 @@
 import { readFile } from 'fs/promises';
+import { join } from 'path';
 import { z } from 'zod';
 import { createError } from 'error-causes';
 import { ValidationError, AgentConfigReadError, AgentConfigParseError, AgentConfigValidationError } from './ai-errors.js';
 
+export const registryFileName = 'riteway.agent-config.json';
+export const builtInAgentNames = ['claude', 'opencode', 'cursor'];
+
+const agentConfigs = {
+  claude: {
+    command: 'claude',
+    args: ['-p', '--output-format', 'json', '--no-session-persistence'],
+    outputFormat: 'json'
+  },
+  opencode: {
+    command: 'opencode',
+    args: ['run', '--format', 'json'],
+    outputFormat: 'ndjson'
+  },
+  cursor: {
+    command: 'agent',
+    args: ['--print', '--output-format', 'json', '--trust'],
+    outputFormat: 'json'
+  }
+};
+
 /**
  * Get agent configuration based on agent name.
  * Supports 'claude', 'opencode', and 'cursor' agents.
  * All agents use their standard OAuth authentication flows.
  * @param {string} agentName - Name of the agent ('claude', 'opencode', 'cursor')
- * @returns {Object} Agent configuration with command and args
+ * @returns {Object} Agent configuration with command, args, and outputFormat
  */
 export const getAgentConfig = (agentName = 'claude') => {
-  const agentConfigs = {
-    claude: {
-      command: 'claude',
-      args: ['-p', '--output-format', 'json', '--no-session-persistence'],
-      outputFormat: 'json'
-    },
-    opencode: {
-      command: 'opencode',
-      args: ['run', '--format', 'json'],
-      outputFormat: 'ndjson'
-    },
-    cursor: {
-      command: 'agent',
-      args: ['--print', '--output-format', 'json', '--trust'],
-      outputFormat: 'json'
-    }
-  };
-
   const config = agentConfigs[agentName.toLowerCase()];
   if (!config) {
     throw createError({
@@ -46,6 +50,7 @@ const agentConfigFileSchema = z.object({
   outputFormat: z.enum(['json', 'ndjson', 'text']).default('json')
 });
 
+// Throws AgentConfigReadError on any read failure, including ENOENT.
 const readAgentConfigFile = async ({ configPath }) => {
   try {
     return await readFile(configPath, 'utf-8');
@@ -58,24 +63,38 @@ const readAgentConfigFile = async ({ configPath }) => {
   }
 };
 
-const parseJson = ({ configPath, raw }) => {
+// Returns null on ENOENT; throws AgentConfigReadError on other failures.
+const readFileOrNull = async (filePath) => {
+  try {
+    return await readFile(filePath, 'utf-8');
+  } catch (err) {
+    if (err.code === 'ENOENT') return null;
+    throw createError({
+      ...AgentConfigReadError,
+      message: `Failed to read file: ${filePath}`,
+      cause: err
+    });
+  }
+};
+
+const parseJsonContent = ({ path, raw }) => {
   try {
     return JSON.parse(raw);
   } catch (err) {
     throw createError({
       ...AgentConfigParseError,
-      message: `Agent config file is not valid JSON: ${configPath}`,
+      message: `Not valid JSON: ${path}`,
       cause: err
     });
   }
 };
 
-const validateAgentConfig = (parsed) => {
-  const result = agentConfigFileSchema.safeParse(parsed);
+const validateWithSchema = (schema, label, parsed) => {
+  const result = schema.safeParse(parsed);
   if (!result.success) {
     throw createError({
       ...AgentConfigValidationError,
-      message: `Invalid agent config: ${z.prettifyError(result.error)}`,
+      message: `Invalid ${label}: ${z.prettifyError(result.error)}`,
       cause: result.error
     });
   }
@@ -90,10 +109,69 @@ const validateAgentConfig = (parsed) => {
  * Never pass a path derived from untrusted user input.
  *
  * @param {string} configPath - Path to the JSON config file
- * @returns {Promise<Object>} Validated agent config with command and args
+ * @returns {Promise<Object>} Validated agent config with command, args, and outputFormat
  */
 export const loadAgentConfig = async (configPath) => {
   const raw = await readAgentConfigFile({ configPath });
-  const parsed = parseJson({ configPath, raw });
-  return validateAgentConfig(parsed);
+  const parsed = parseJsonContent({ path: configPath, raw });
+  return validateWithSchema(agentConfigFileSchema, 'agent config', parsed);
+};
+
+const agentRegistrySchema = z.record(z.string().min(1), agentConfigFileSchema);
+
+/**
+ * Load and validate a riteway.agent-config.json registry from a directory.
+ * Returns null when the file is not found — callers decide the fallback behavior.
+ * Throws on read permission errors, invalid JSON, or schema violations so
+ * misconfigured registries surface immediately rather than silently falling through.
+ *
+ * Trust boundary: registry entries are developer-controlled. The `command` field in
+ * each entry is executed as a subprocess without whitelist validation.
+ *
+ * @param {string} cwd - Directory to look for riteway.agent-config.json
+ * @returns {Promise<Object|null>} Registry map keyed by agent name, or null if not found
+ */
+export const loadAgentRegistry = async (cwd) => {
+  const registryPath = join(cwd, registryFileName);
+  const raw = await readFileOrNull(registryPath);
+  if (raw === null) return null;
+  const parsed = parseJsonContent({ path: registryPath, raw });
+  return validateWithSchema(agentRegistrySchema, 'agent registry', parsed);
+};
+
+/**
+ * Resolve agent configuration using a three-level priority chain:
+ * 1. `agentConfigPath` — explicit flat config file (highest priority)
+ * 2. `riteway.agent-config.json` in `cwd` — project registry (if present)
+ * 3. Built-in `getAgentConfig(agent)` — library defaults (fallback)
+ *
+ * Trust boundary: all config sources ultimately produce a `command` executed as a
+ * subprocess without whitelist validation. All paths must be developer-controlled.
+ *
+ * @param {Object} options
+ * @param {string} options.agent - Agent name
+ * @param {string} [options.agentConfigPath] - Path to a flat single-agent config file
+ * @param {string} options.cwd - Working directory to search for the project registry
+ * @returns {Promise<Object>} Resolved agent configuration
+ */
+export const resolveAgentConfig = async ({ agent, agentConfigPath, cwd }) => {
+  if (agentConfigPath) {
+    return loadAgentConfig(agentConfigPath);
+  }
+
+  const registry = await loadAgentRegistry(cwd);
+
+  if (registry !== null) {
+    const config = registry[agent];
+    if (!config) {
+      throw createError({
+        ...ValidationError,
+        code: 'AGENT_NOT_IN_REGISTRY',
+        message: `Agent "${agent}" not found in riteway.agent-config.json. Add it to the registry or use --agent-config.`
+      });
+    }
+    return config;
+  }
+
+  return getAgentConfig(agent);
 };