diff --git a/.claude/skills/load-audit/SKILL.md b/.claude/skills/load-audit/SKILL.md new file mode 100644 index 000000000..01b60f27b --- /dev/null +++ b/.claude/skills/load-audit/SKILL.md @@ -0,0 +1,58 @@ +--- +name: load-audit +description: Run a frontend load testing audit. Seeds data, tests all pages via Chrome DevTools MCP, records network calls, TanStack queries, DOM sizes, and generates a timestamped report. +user_invocable: true +--- + +# Frontend Load Testing Audit + +**Testing plan:** `frontend/docs/load-testing-plan.md` +**Previous reports:** `frontend/docs/audits/` + +--- + +## Agent Instructions + +When the user invokes `/load-audit`, perform a full frontend load testing audit: + +### 1. Setup + +1. Read the testing plan at `frontend/docs/load-testing-plan.md` for the full methodology +2. Seed data: `bun backend/scripts/seed-stress-test.ts --tier medium` +3. Verify nginx is responding on `localhost` (not the direct Vite port) + +### 2. Discover Routes + +1. Read `frontend/src/App.tsx` and extract all `` entries +2. Compare against the **Known Pages** table in the testing plan +3. Add any new routes to the audit list; skip removed routes and note them in the report +4. For parameterized routes (e.g., `/workflows/:id`), use real entities from seeded data + +### 3. Audit Every Page + +Follow the discovered route list (Known Pages + any new routes). For each page: + +1. Navigate via `localhost` (nginx reverse proxy) — **never use the direct Vite dev server port** +2. Wait for data to load (spinners gone, tables populated) +3. Take a screenshot and save to `.context/` +4. Record all fetch/XHR network requests with timing +5. Extract TanStack Query cache state using the snippet in the testing plan +6. Measure DOM element count: `document.querySelectorAll('*').length` +7. Note anomalies: ghost queries, duplicate calls, missing pagination, large payloads + +For Page 1 (Workflow List), also run a Chrome performance trace to get LCP, CLS, TTFB. + +### 4. Generate Report + +1. Compile all per-page data into a report following the format of previous reports in `frontend/docs/audits/` +2. Include cross-page analysis: DOM comparison, API call counts, shared queries, pagination gaps +3. Summarize findings with severity ratings (MEDIUM/LOW/INFO) +4. Save the report as `frontend/docs/audits/load-audit-.md` using the current datetime +5. If a previous report exists, note any improvements or regressions compared to the most recent one + +### 5. Important Rules + +- This is an **audit only** — do NOT make any code changes or fixes +- All testing must go through `localhost` (nginx), not the direct Vite dev server port +- Use Chrome DevTools MCP tools for all browser interaction +- Save screenshots to `.context/` directory diff --git a/backend/scripts/seed-stress-test.ts b/backend/scripts/seed-stress-test.ts new file mode 100644 index 000000000..12ba5307d --- /dev/null +++ b/backend/scripts/seed-stress-test.ts @@ -0,0 +1,2194 @@ +import { Pool, PoolClient } from 'pg'; +import { randomUUID } from 'crypto'; +import { SecretEncryption, parseMasterKey } from '@shipsec/shared'; +import * as bcrypt from 'bcryptjs'; + +// ─── Configuration ─────────────────────────────────────────────────────────── + +const ORG_ID = process.env.SEED_ORG_ID || 'stress-test'; + +interface TierConfig { + workflows: number; + workflowVersionsRange: [number, number]; + workflowRuns: number; + tracesPerRunRange: [number, number]; + nodeIoPerRunRange: [number, number]; + schedules: number; + webhookConfigs: number; + webhookDeliveries: number; + humanInputRequests: number; + artifactsAndFiles: number; + mcpGroups: number; + mcpServers: number; + mcpToolsPerServerRange: [number, number]; + agentTraceEventsPerRun: [number, number]; + secrets: number; + apiKeys: number; +} + +const TIERS: Record = { + small: { + workflows: 10, + workflowVersionsRange: [1, 3], + workflowRuns: 100, + tracesPerRunRange: [5, 15], + nodeIoPerRunRange: [3, 10], + schedules: 5, + webhookConfigs: 5, + webhookDeliveries: 20, + humanInputRequests: 10, + artifactsAndFiles: 10, + mcpGroups: 3, + mcpServers: 6, + mcpToolsPerServerRange: [3, 8], + agentTraceEventsPerRun: [10, 50], + secrets: 5, + apiKeys: 3, + }, + medium: { + workflows: 50, + workflowVersionsRange: [1, 5], + workflowRuns: 2000, + tracesPerRunRange: [5, 50], + nodeIoPerRunRange: [3, 20], + schedules: 30, + webhookConfigs: 25, + webhookDeliveries: 200, + humanInputRequests: 100, + artifactsAndFiles: 100, + mcpGroups: 10, + mcpServers: 30, + mcpToolsPerServerRange: [3, 15], + agentTraceEventsPerRun: [10, 100], + secrets: 20, + apiKeys: 10, + }, + large: { + workflows: 200, + workflowVersionsRange: [1, 10], + workflowRuns: 20000, + tracesPerRunRange: [5, 500], + nodeIoPerRunRange: [3, 50], + schedules: 150, + webhookConfigs: 100, + webhookDeliveries: 2000, + humanInputRequests: 1000, + artifactsAndFiles: 500, + mcpGroups: 30, + mcpServers: 100, + mcpToolsPerServerRange: [3, 30], + agentTraceEventsPerRun: [10, 500], + secrets: 50, + apiKeys: 25, + }, +}; + +// Status/trigger distributions +const RUN_STATUS_DIST: [string, number][] = [ + ['COMPLETED', 0.6], + ['FAILED', 0.15], + ['RUNNING', 0.05], + ['CANCELLED', 0.05], + ['TIMED_OUT', 0.03], + ['QUEUED', 0.02], + ['TERMINATED', 0.02], + ['AWAITING_INPUT', 0.05], + ['STALE', 0.03], +]; + +const TRIGGER_DIST: [string, number][] = [ + ['manual', 0.4], + ['schedule', 0.3], + ['api', 0.2], + ['webhook', 0.1], +]; + +const SCHEDULE_STATUS_DIST: [string, number][] = [ + ['active', 0.7], + ['paused', 0.2], + ['error', 0.1], +]; + +const DELIVERY_STATUS_DIST: [string, number][] = [ + ['delivered', 0.7], + ['processing', 0.1], + ['failed', 0.2], +]; + +const HUMAN_INPUT_STATUS_DIST: [string, number][] = [ + ['pending', 0.3], + ['resolved', 0.5], + ['expired', 0.15], + ['cancelled', 0.05], +]; + +const HUMAN_INPUT_TYPE_DIST: [string, number][] = [ + ['approval', 0.4], + ['form', 0.25], + ['selection', 0.15], + ['review', 0.15], + ['acknowledge', 0.05], +]; + +// Real registered component IDs from the worker component registry +const NODE_TYPES = [ + 'core.workflow.entrypoint', + 'core.http.request', + 'core.ai.agent', + 'core.logic.script', + 'core.ai.generate-text', + 'core.workflow.call', + 'core.file.writer', + 'core.artifact.writer', + 'core.notification.slack', + 'core.text.splitter', + 'core.text.joiner', + 'core.array.pack', + 'core.array.pick', + 'core.file.loader', + 'core.secret.fetch', + 'core.manual_action.approval', + 'core.manual_action.form', + 'core.manual_action.selection', + 'core.provider.openai', + 'core.provider.gemini', + 'core.destination.artifact', + 'core.destination.s3', + 'core.credentials.aws', + 'core.analytics.sink', +]; + +const CRON_EXPRESSIONS = [ + '0 */6 * * *', + '0 9 * * 1-5', + '*/15 * * * *', + '0 0 * * *', + '30 8 * * 1', + '0 */2 * * *', + '0 12 * * *', + '*/30 * * * *', +]; + +const TIMEZONES = ['UTC', 'America/New_York', 'Europe/London', 'Asia/Tokyo']; + +const WORKFLOW_NAMES = [ + 'Data Pipeline', + 'Customer Onboarding', + 'Report Generator', + 'Notification Sender', + 'Data Sync', + 'Invoice Processor', + 'Lead Scorer', + 'Content Publisher', + 'Inventory Check', + 'Order Fulfillment', + 'User Provisioning', + 'Backup Automation', + 'Health Monitor', + 'ETL Process', + 'API Gateway', + 'Log Aggregator', + 'Deployment Pipeline', + 'Testing Suite', + 'Security Scanner', + 'Cost Optimizer', +]; + +const SECRET_NAMES = [ + 'stripe_api_key', + 'openai_token', + 'aws_access_key', + 'aws_secret_key', + 'slack_webhook_url', + 'github_pat', + 'postgres_password', + 'redis_password', + 'sendgrid_key', + 'twilio_auth_token', + 'gcp_service_account', + 'datadog_api_key', + 'sentry_dsn', + 'cloudflare_token', + 'jwt_signing_secret', + 'smtp_password', + 'mongo_connection_string', + 'elasticsearch_api_key', + 'pagerduty_token', + 'vercel_token', + 'docker_registry_password', + 'npm_auth_token', + 'azure_client_secret', + 'firebase_admin_key', + 'algolia_api_key', + 'mixpanel_token', + 'segment_write_key', + 'intercom_access_token', + 'hubspot_api_key', + 'salesforce_client_secret', + 'jira_api_token', + 'confluence_token', + 'linear_api_key', + 'notion_integration_secret', + 'airtable_api_key', + 'google_maps_api_key', + 'mapbox_access_token', + 'plaid_secret', + 'braintree_private_key', + 'coinbase_api_secret', + 'anthropic_api_key', + 'cohere_api_key', + 'pinecone_api_key', + 'weaviate_api_key', + 'replicate_api_token', + 'huggingface_token', + 'stability_api_key', + 'deepgram_api_key', + 'assemblyai_api_key', + 'eleven_labs_api_key', +]; + +const SECRET_TAGS: string[][] = [ + ['payment', 'stripe'], + ['ai', 'llm'], + ['cloud', 'aws'], + ['cloud', 'aws'], + ['messaging', 'slack'], + ['ci-cd', 'github'], + ['database', 'postgres'], + ['database', 'redis'], + ['email'], + ['messaging', 'twilio'], + ['cloud', 'gcp'], + ['monitoring'], + ['monitoring', 'sentry'], + ['cdn', 'cloudflare'], + ['auth'], + ['email', 'smtp'], + ['database', 'mongo'], + ['search'], + ['monitoring', 'pagerduty'], + ['deployment'], +]; + +const API_KEY_NAMES = [ + 'Production API', + 'Staging API', + 'CI/CD Pipeline', + 'Monitoring Service', + 'Partner Integration', + 'Mobile App', + 'Internal Dashboard', + 'Data Pipeline', + 'Webhook Processor', + 'Testing Automation', + 'Analytics Service', + 'Customer Portal', + 'Batch Processor', + 'Admin Console', + 'Third-Party Integration', + 'Load Balancer Health', + 'CDN Purge Service', + 'Log Collector', + 'Alerting System', + 'Backup Service', + 'Migration Script', + 'Sandbox Environment', + 'Demo App', + 'Developer Portal', + 'Support Tool', +]; + +const API_KEY_PERMISSION_PRESETS: { + workflows: { run: boolean; list: boolean; read: boolean }; + runs: { read: boolean; cancel: boolean }; +}[] = [ + // Full access + { workflows: { run: true, list: true, read: true }, runs: { read: true, cancel: true } }, + // Read-only + { workflows: { run: false, list: true, read: true }, runs: { read: true, cancel: false } }, + // Run-only + { workflows: { run: true, list: true, read: false }, runs: { read: false, cancel: false } }, + // Run + monitor + { workflows: { run: true, list: true, read: true }, runs: { read: true, cancel: false } }, + // List-only + { workflows: { run: false, list: true, read: false }, runs: { read: false, cancel: false } }, +]; + +const UNICODE_NAMES = [ + '数据管道', + 'ワークフロー処理', + '🚀 Rocket Pipeline', + 'الأتمتة الذكية', + '데이터 동기화', + 'Ünîcödé Wörkflöw', +]; + +const AGENT_PART_TYPES = [ + 'text', + 'tool-call', + 'tool-result', + 'step-start', + 'source-url', + 'reasoning', + 'file', + 'error', +]; + +// ─── Utilities ─────────────────────────────────────────────────────────────── + +function randInt(min: number, max: number): number { + return Math.floor(Math.random() * (max - min + 1)) + min; +} + +function pick(arr: T[]): T { + return arr[Math.floor(Math.random() * arr.length)]; +} + +function pickWeighted(dist: [string, number][]): string { + const r = Math.random(); + let cumulative = 0; + for (const [value, weight] of dist) { + cumulative += weight; + if (r <= cumulative) return value; + } + return dist[dist.length - 1][0]; +} + +function randomDate(daysBack: number): Date { + const now = Date.now(); + return new Date(now - Math.random() * daysBack * 24 * 60 * 60 * 1000); +} + +function shortUUID(): string { + return randomUUID().split('-')[0]; +} + +function escapeLiteral(val: string): string { + return val.replace(/'/g, "''"); +} + +function sqlVal(v: unknown): string { + if (v === null || v === undefined) return 'NULL'; + if (typeof v === 'number' || typeof v === 'bigint') return String(v); + if (typeof v === 'boolean') return v ? 'TRUE' : 'FALSE'; + if (v instanceof Date) return `'${escapeLiteral(v.toISOString())}'`; + if (typeof v === 'object') return `'${escapeLiteral(JSON.stringify(v))}'::jsonb`; + return `'${escapeLiteral(String(v))}'`; +} + +function generateLongName(): string { + const base = pick(WORKFLOW_NAMES); + return ( + base + ' - ' + Array.from({ length: 20 }, () => pick(WORKFLOW_NAMES).split(' ')[0]).join(' ') + ); +} + +// ─── Graph Generators ──────────────────────────────────────────────────────── + +type TemplateType = 'simple_http' | 'ai_agent' | 'complex_branching' | 'subflow' | 'large_pipeline'; + +const TEMPLATES: TemplateType[] = [ + 'simple_http', + 'ai_agent', + 'complex_branching', + 'subflow', + 'large_pipeline', +]; + +function nodeCountForTemplate(template: TemplateType, isLarge: boolean): number { + switch (template) { + case 'simple_http': + return randInt(3, 5); + case 'ai_agent': + return randInt(5, 8); + case 'complex_branching': + return randInt(10, 20); + case 'subflow': + return randInt(8, 15); + case 'large_pipeline': + return isLarge ? randInt(20, 50) : randInt(15, 25); + } +} + +function generateWorkflowGraph( + name: string, + template: TemplateType, + nodeCount: number, + secretNames: string[] = [], +) { + const nodes: { + id: string; + type: string; + position: { x: number; y: number }; + data: { + label: string; + config: { params: Record; inputOverrides: Record }; + }; + }[] = []; + const edges: { + id: string; + source: string; + target: string; + sourceHandle?: string; + targetHandle?: string; + }[] = []; + + // Always start with entrypoint + const entryId = `node_${shortUUID()}`; + nodes.push({ + id: entryId, + type: 'core.workflow.entrypoint', + position: { x: 0, y: 0 }, + data: { + label: 'Entry Point', + config: { params: {}, inputOverrides: {} }, + }, + }); + + // Optionally inject a secret-fetch node (~30% of workflows when secrets exist) + let prevId = entryId; + if (secretNames.length > 0 && Math.random() < 0.3) { + const secretNodeId = `node_${shortUUID()}`; + const chosenSecret = pick(secretNames); + nodes.push({ + id: secretNodeId, + type: 'core.secret.fetch', + position: { x: 250, y: 0 }, + data: { + label: 'Fetch Secret', + config: { + params: { secretId: chosenSecret, outputFormat: 'raw' }, + inputOverrides: {}, + }, + }, + }); + edges.push({ + id: `edge_${shortUUID()}`, + source: entryId, + target: secretNodeId, + }); + prevId = secretNodeId; + } + + // Generate remaining nodes + const templateNodeTypes: Record = { + simple_http: [ + 'core.http.request', + 'core.text.splitter', + 'core.text.joiner', + 'core.file.writer', + ], + ai_agent: [ + 'core.ai.agent', + 'core.ai.generate-text', + 'core.provider.openai', + 'core.http.request', + 'core.text.joiner', + ], + complex_branching: [ + 'core.http.request', + 'core.logic.script', + 'core.manual_action.approval', + 'core.artifact.writer', + 'core.text.splitter', + 'core.array.pick', + ], + subflow: [ + 'core.http.request', + 'core.workflow.call', + 'core.logic.script', + 'core.file.loader', + 'core.text.joiner', + ], + large_pipeline: NODE_TYPES.slice(1), // all except entrypoint + }; + + const availableTypes = templateNodeTypes[template]; + + for (let i = 1; i < nodeCount; i++) { + const nodeId = `node_${shortUUID()}`; + const nodeType = pick(availableTypes); + const layer = Math.floor(i / 3); + const posInLayer = i % 3; + + nodes.push({ + id: nodeId, + type: nodeType, + position: { x: (layer + 1) * 250, y: posInLayer * 150 }, + data: { + label: `${nodeType.split('.').pop()} ${i}`, + config: { params: {}, inputOverrides: {} }, + }, + }); + + // Connect to previous node (simple chain with some branching for complex) + if (template === 'complex_branching' && i > 3 && Math.random() < 0.3) { + // Connect to a random earlier node instead + const randomEarlier = nodes[randInt(1, Math.max(1, i - 2))]; + edges.push({ + id: `edge_${shortUUID()}`, + source: randomEarlier.id, + target: nodeId, + }); + } else { + edges.push({ + id: `edge_${shortUUID()}`, + source: prevId, + target: nodeId, + }); + } + + prevId = nodeId; + } + + return { + name, + description: + Math.random() < 0.3 ? null : `Auto-generated ${template} workflow for stress testing`, + nodes, + edges, + viewport: { x: 0, y: 0, zoom: 1 }, + }; +} + +function generateCompiledDefinition( + graph: ReturnType, +): Record { + const actions = graph.nodes.map((n) => ({ + ref: n.id, + componentId: n.type, + params: {}, + inputOverrides: {}, + dependsOn: [] as string[], + inputMappings: {}, + })); + + // Set up dependsOn from edges + for (const edge of graph.edges) { + const action = actions.find((a) => a.ref === edge.target); + if (action && !action.dependsOn.includes(edge.source)) { + action.dependsOn.push(edge.source); + } + } + + const depCounts: Record = {}; + for (const a of actions) { + depCounts[a.ref] = a.dependsOn.length; + } + + return { + version: 2, + title: graph.name, + description: graph.description, + entrypoint: { ref: graph.nodes[0].id }, + nodes: Object.fromEntries( + graph.nodes.map((n) => [n.id, { ref: n.id, label: n.data.label, mode: 'normal' }]), + ), + edges: graph.edges.map((e) => ({ + id: e.id, + sourceRef: e.source, + targetRef: e.target, + kind: 'success', + })), + dependencyCounts: depCounts, + actions, + config: { environment: 'default', timeoutSeconds: 0 }, + }; +} + +// ─── Trace Generator ───────────────────────────────────────────────────────── + +function generateTraceSequence( + nodeRefs: string[], + runStatus: string, + maxTraces: number, +): { + type: string; + nodeRef: string; + message: string | null; + error: unknown; + outputSummary: unknown; + level: string; + data: unknown; +}[] { + const traces: { + type: string; + nodeRef: string; + message: string | null; + error: unknown; + outputSummary: unknown; + level: string; + data: unknown; + }[] = []; + + const maxPerNode = Math.max(2, Math.floor(maxTraces / Math.max(1, nodeRefs.length))); + const isFailed = runStatus === 'FAILED'; + const isRunning = runStatus === 'RUNNING' || runStatus === 'QUEUED'; + // For running runs, only process a subset of nodes (the run is still in progress) + const activeNodeCount = isRunning + ? Math.max(1, Math.floor(nodeRefs.length * (0.3 + Math.random() * 0.5))) + : nodeRefs.length; + + for (let ni = 0; ni < activeNodeCount && traces.length < maxTraces; ni++) { + const nodeRef = nodeRefs[ni]; + const isLastNode = ni === nodeRefs.length - 1; + const isLastActiveNode = ni === activeNodeCount - 1; + + // NODE_STARTED + traces.push({ + type: 'NODE_STARTED', + nodeRef, + message: null, + error: null, + outputSummary: null, + level: 'info', + data: null, + }); + + // Optional progress events + const progressCount = randInt(0, Math.min(3, maxPerNode - 2)); + for (let p = 0; p < progressCount && traces.length < maxTraces; p++) { + traces.push({ + type: 'NODE_PROGRESS', + nodeRef, + message: `Processing step ${p + 1}`, + error: null, + outputSummary: null, + level: 'info', + data: null, + }); + } + + // Optional HTTP events + if (Math.random() < 0.3 && traces.length + 2 <= maxTraces) { + const corrId = randomUUID(); + traces.push({ + type: 'HTTP_REQUEST_SENT', + nodeRef, + message: null, + error: null, + outputSummary: null, + level: 'info', + data: { + correlationId: corrId, + request: { method: 'GET', url: 'https://api.example.com/data' }, + }, + }); + traces.push({ + type: 'HTTP_RESPONSE_RECEIVED', + nodeRef, + message: null, + error: null, + outputSummary: null, + level: 'info', + data: { correlationId: corrId, har: { status: 200 } }, + }); + } + + // Completion + if (isFailed && isLastNode) { + traces.push({ + type: 'NODE_FAILED', + nodeRef, + message: 'Connection timeout', + error: { message: 'Connection timeout', code: 'ETIMEDOUT' }, + outputSummary: null, + level: 'error', + data: null, + }); + } else if (runStatus === 'CANCELLED' && isLastNode) { + traces.push({ + type: 'NODE_SKIPPED', + nodeRef, + message: 'Run cancelled', + error: null, + outputSummary: null, + level: 'warn', + data: null, + }); + } else if (runStatus === 'TIMED_OUT' && isLastNode) { + // Fix: TIMED_OUT runs should end with a timeout error, not NODE_COMPLETED + traces.push({ + type: 'NODE_FAILED', + nodeRef, + message: 'Execution timed out', + error: { message: 'Execution timed out', code: 'DEADLINE_EXCEEDED' }, + outputSummary: null, + level: 'error', + data: null, + }); + } else if ((runStatus === 'TERMINATED' || runStatus === 'STALE') && isLastNode) { + // Fix: TERMINATED/STALE runs should not show all nodes completed + traces.push({ + type: 'NODE_FAILED', + nodeRef, + message: runStatus === 'TERMINATED' ? 'Run terminated by system' : 'Run became stale', + error: { + message: runStatus === 'TERMINATED' ? 'Run terminated by system' : 'Run became stale', + code: runStatus === 'TERMINATED' ? 'TERMINATED' : 'STALE', + }, + outputSummary: null, + level: 'error', + data: null, + }); + } else if (isRunning && isLastActiveNode) { + // Last active node in a running run — still executing, no completion event + } else { + traces.push({ + type: 'NODE_COMPLETED', + nodeRef, + message: null, + error: null, + outputSummary: { result: 'ok' }, + level: 'info', + data: null, + }); + } + } + + return traces; +} + +// ─── Batch Insert Helper ───────────────────────────────────────────────────── + +async function batchInsert( + client: PoolClient, + table: string, + columns: string[], + rows: string[][], + batchSize = 500, +): Promise { + let inserted = 0; + for (let i = 0; i < rows.length; i += batchSize) { + const batch = rows.slice(i, i + batchSize); + const values = batch.map((row) => `(${row.join(', ')})`).join(',\n'); + const sql = `INSERT INTO ${table} (${columns.join(', ')}) VALUES ${values} ON CONFLICT DO NOTHING`; + const result = await client.query(sql); + inserted += result.rowCount ?? 0; + if (inserted % 1000 === 0 && inserted > 0) { + console.log(` ... ${table}: ${inserted}/${rows.length} rows`); + } + } + return inserted; +} + +// ─── Seeders ───────────────────────────────────────────────────────────────── + +interface WorkflowData { + id: string; + name: string; + graph: ReturnType; + nodeRefs: string[]; + createdAt: Date; +} + +interface VersionData { + id: string; + workflowId: string; + version: number; +} + +interface RunData { + runId: string; + workflowId: string; + versionId: string | null; + version: number | null; + status: string; + nodeRefs: string[]; + createdAt: Date; + isAgentRun: boolean; +} + +interface SeededSecret { + id: string; + name: string; +} + +async function seedSecrets(client: PoolClient, count: number): Promise { + const masterKeyRaw = process.env.SECRET_STORE_MASTER_KEY; + if (!masterKeyRaw) { + console.log('\n⚠ SECRET_STORE_MASTER_KEY not set — skipping secrets seeding'); + return []; + } + + console.log(`\nSeeding ${count} secrets with encrypted values...`); + + const masterKey = parseMasterKey(masterKeyRaw); + const encryptor = new SecretEncryption(masterKey); + + const seeded: SeededSecret[] = []; + const secretRows: string[][] = []; + const versionRows: string[][] = []; + + const secretCols = [ + 'id', + 'name', + 'description', + 'tags', + 'organization_id', + 'created_at', + 'updated_at', + ]; + const versionCols = [ + 'id', + 'secret_id', + 'version', + 'encrypted_value', + 'iv', + 'auth_tag', + 'encryption_key_id', + 'created_at', + 'created_by', + 'organization_id', + 'is_active', + ]; + + for (let i = 0; i < count; i++) { + const secretId = randomUUID(); + const baseName = SECRET_NAMES[i % SECRET_NAMES.length]; + const name = `${baseName}_${shortUUID()}`; + const tags = SECRET_TAGS[i % SECRET_TAGS.length] ?? []; + const createdAt = randomDate(90); + + seeded.push({ id: secretId, name }); + + secretRows.push([ + sqlVal(secretId), + sqlVal(name), + sqlVal(`Seed secret for ${baseName.replace(/_/g, ' ')}`), + sqlVal(tags), + sqlVal(ORG_ID), + sqlVal(createdAt), + sqlVal(new Date(createdAt.getTime() + randInt(0, 7 * 24 * 60 * 60 * 1000))), + ]); + + // Encrypt a realistic fake value + const fakeValue = `sk_test_${randomUUID().replace(/-/g, '')}`; + const material = await encryptor.encrypt(fakeValue); + + const versionId = randomUUID(); + versionRows.push([ + sqlVal(versionId), + sqlVal(secretId), + sqlVal(1), + sqlVal(material.ciphertext), + sqlVal(material.iv), + sqlVal(material.authTag), + sqlVal(material.keyId), + sqlVal(createdAt), + sqlVal('seed-script'), + sqlVal(ORG_ID), + sqlVal(true), + ]); + + // Fix: ~30% of secrets get a second version — version 1 is the older inactive one, + // version 2 is the current active one (version numbers should increase with time) + if (Math.random() < 0.3) { + // Demote the existing entry to version 1 (older, inactive) by re-dating it + const olderDate = new Date(createdAt.getTime() - randInt(1, 30) * 24 * 60 * 60 * 1000); + secretRows[secretRows.length - 1][5] = sqlVal(olderDate); // shift secret created_at back + + // Re-assign existing version row to be version 1 (older, inactive) + versionRows[versionRows.length - 1][2] = sqlVal(1); // version = 1 + versionRows[versionRows.length - 1][7] = sqlVal(olderDate); // created_at + versionRows[versionRows.length - 1][10] = sqlVal(false); // is_active = false + + // New version 2 is the current active one + const newValue = `sk_test_v2_${randomUUID().replace(/-/g, '')}`; + const newMaterial = await encryptor.encrypt(newValue); + + versionRows.push([ + sqlVal(randomUUID()), + sqlVal(secretId), + sqlVal(2), + sqlVal(newMaterial.ciphertext), + sqlVal(newMaterial.iv), + sqlVal(newMaterial.authTag), + sqlVal(newMaterial.keyId), + sqlVal(createdAt), // version 2 created at the secret's main created_at + sqlVal('seed-script'), + sqlVal(ORG_ID), + sqlVal(true), // active + ]); + } + } + + const insertedSecrets = await batchInsert(client, 'secrets', secretCols, secretRows); + const insertedVersions = await batchInsert(client, 'secret_versions', versionCols, versionRows); + console.log(` Inserted ${insertedSecrets} secrets, ${insertedVersions} secret versions`); + return seeded; +} + +async function seedApiKeys(client: PoolClient, count: number): Promise { + console.log(`\nSeeding ${count} API keys...`); + + const rows: string[][] = []; + const columns = [ + 'id', + 'name', + 'description', + 'key_hash', + 'key_prefix', + 'key_hint', + 'permissions', + 'scopes', + 'organization_id', + 'created_by', + 'is_active', + 'expires_at', + 'last_used_at', + 'usage_count', + 'rate_limit', + 'created_at', + 'updated_at', + ]; + + for (let i = 0; i < count; i++) { + const id = randomUUID(); + const name = `${API_KEY_NAMES[i % API_KEY_NAMES.length]} #${i + 1}`; + + // Generate a realistic key and hash it + const keyId = randomUUID().replace(/-/g, '').slice(0, 8); + const keySecret = randomUUID().replace(/-/g, '') + randomUUID().replace(/-/g, '').slice(0, 8); + const plainKey = `sk_live_${keyId}_${keySecret}`; + const keyHash = await bcrypt.hash(plainKey, 10); + const keyHint = plainKey.slice(-8); + + const permissions = pick(API_KEY_PERMISSION_PRESETS); + const isActive = Math.random() < 0.8; + const createdAt = randomDate(90); + const hasExpiry = Math.random() < 0.4; + const expiresAt = hasExpiry + ? new Date(Date.now() + randInt(1, 365) * 24 * 60 * 60 * 1000) + : null; + const wasUsed = Math.random() < 0.7; + const lastUsedAt = wasUsed ? randomDate(7) : null; + const usageCount = wasUsed ? randInt(1, 10000) : 0; + const rateLimit = Math.random() < 0.5 ? pick([60, 120, 300, 600, 1000]) : null; + + rows.push([ + sqlVal(id), + sqlVal(name), + sqlVal(`API key for ${name.toLowerCase()}`), + sqlVal(keyHash), + sqlVal('sk_live_'), + sqlVal(keyHint), + sqlVal(permissions), + sqlVal([]), + sqlVal(ORG_ID), + sqlVal('seed-script'), + sqlVal(isActive), + sqlVal(expiresAt), + sqlVal(lastUsedAt), + sqlVal(usageCount), + sqlVal(rateLimit), + sqlVal(createdAt), + sqlVal(new Date(createdAt.getTime() + randInt(0, 7 * 24 * 60 * 60 * 1000))), + ]); + } + + const inserted = await batchInsert(client, 'api_keys', columns, rows); + console.log(` Inserted ${inserted} API keys`); +} + +async function seedFiles(client: PoolClient, count: number): Promise { + console.log(`\nSeeding ${count} files...`); + const fileIds: string[] = []; + const rows: string[][] = []; + const columns = [ + 'id', + 'file_name', + 'mime_type', + 'size', + 'storage_key', + 'organization_id', + 'uploaded_at', + ]; + + for (let i = 0; i < count; i++) { + const id = randomUUID(); + fileIds.push(id); + const ext = pick(['json', 'csv', 'txt', 'pdf', 'png']); + rows.push([ + sqlVal(id), + sqlVal(`stress-test-file-${i}.${ext}`), + sqlVal(ext === 'png' ? 'image/png' : ext === 'pdf' ? 'application/pdf' : `text/${ext}`), + sqlVal(randInt(100, 500000)), + sqlVal(`stress-test/${id}/${i}.${ext}`), + sqlVal(ORG_ID), + sqlVal(randomDate(90)), + ]); + } + + const inserted = await batchInsert(client, 'files', columns, rows); + console.log(` Inserted ${inserted} files`); + return fileIds; +} + +async function seedWorkflows( + client: PoolClient, + count: number, + tierName: string, + secretNames: string[] = [], +): Promise { + console.log(`\nSeeding ${count} workflows...`); + const workflows: WorkflowData[] = []; + const rows: string[][] = []; + const columns = [ + 'id', + 'name', + 'description', + 'graph', + 'organization_id', + 'compiled_definition', + 'last_run', + 'run_count', + 'created_at', + 'updated_at', + ]; + + for (let i = 0; i < count; i++) { + const id = randomUUID(); + const template = pick(TEMPLATES); + const isLarge = tierName === 'large'; + const nodeCount = nodeCountForTemplate(template, isLarge); + + // 5% long names, 5% unicode + let name: string; + const nameRoll = Math.random(); + if (nameRoll < 0.05) { + name = generateLongName(); + } else if (nameRoll < 0.1) { + name = pick(UNICODE_NAMES); + } else { + name = `${pick(WORKFLOW_NAMES)} #${i + 1}`; + } + + const graph = generateWorkflowGraph(name, template, nodeCount, secretNames); + const compiled = generateCompiledDefinition(graph); + const nodeRefs = graph.nodes.map((n) => n.id); + const createdAt = randomDate(90); + + workflows.push({ id, name, graph, nodeRefs, createdAt }); + + rows.push([ + sqlVal(id), + sqlVal(name), + sqlVal(graph.description), + sqlVal(graph), + sqlVal(ORG_ID), + sqlVal(compiled), + // Fix: last_run must be after workflow created_at (derived, not independent random) + sqlVal( + Math.random() < 0.8 + ? new Date( + createdAt.getTime() + randInt(60000, Date.now() - createdAt.getTime() || 60000), + ) + : null, + ), + // Fix: run_count set to 0 initially — updated after runs are seeded via SQL + sqlVal(0), + sqlVal(createdAt), + sqlVal(new Date(createdAt.getTime() + randInt(0, 7 * 24 * 60 * 60 * 1000))), + ]); + } + + const inserted = await batchInsert(client, 'workflows', columns, rows); + console.log(` Inserted ${inserted} workflows`); + return workflows; +} + +async function seedVersions( + client: PoolClient, + workflows: WorkflowData[], + versionRange: [number, number], +): Promise { + console.log(`\nSeeding workflow versions...`); + const versions: VersionData[] = []; + const rows: string[][] = []; + const columns = [ + 'id', + 'workflow_id', + 'version', + 'graph', + 'organization_id', + 'compiled_definition', + 'created_at', + ]; + + for (const wf of workflows) { + const vCount = randInt(versionRange[0], versionRange[1]); + for (let v = 1; v <= vCount; v++) { + const id = randomUUID(); + // Fix: version created_at must be after the workflow's created_at + const versionCreatedAt = new Date( + wf.createdAt.getTime() + v * randInt(60000, 7 * 24 * 60 * 60 * 1000), + ); + versions.push({ id, workflowId: wf.id, version: v }); + rows.push([ + sqlVal(id), + sqlVal(wf.id), + sqlVal(v), + sqlVal(wf.graph), + sqlVal(ORG_ID), + sqlVal(generateCompiledDefinition(wf.graph)), + sqlVal(versionCreatedAt), + ]); + } + } + + const inserted = await batchInsert(client, 'workflow_versions', columns, rows); + console.log(` Inserted ${inserted} workflow versions`); + return versions; +} + +async function seedMcpData( + client: PoolClient, + groupCount: number, + serverCount: number, + toolsRange: [number, number], +): Promise<{ groupIds: string[]; serverIds: string[] }> { + console.log(`\nSeeding MCP data (${groupCount} groups, ${serverCount} servers)...`); + + // Groups + const groupIds: string[] = []; + const groupRows: string[][] = []; + const groupCols = [ + 'id', + 'slug', + 'name', + 'description', + 'credential_contract_name', + 'enabled', + 'created_at', + 'updated_at', + ]; + + for (let i = 0; i < groupCount; i++) { + const id = randomUUID(); + groupIds.push(id); + // Fix: updated_at must be after created_at + const groupCreatedAt = randomDate(60); + groupRows.push([ + sqlVal(id), + sqlVal(`stress-test-group-${i}`), + sqlVal(`Stress Test Group ${i}`), + sqlVal(`MCP group for stress testing #${i}`), + sqlVal('none'), + sqlVal(true), + sqlVal(groupCreatedAt), + sqlVal(new Date(groupCreatedAt.getTime() + randInt(0, 30 * 24 * 60 * 60 * 1000))), + ]); + } + + await batchInsert(client, 'mcp_groups', groupCols, groupRows); + console.log(` Inserted ${groupCount} MCP groups`); + + // Servers + const serverIds: string[] = []; + const serverRows: string[][] = []; + const serverCols = [ + 'id', + 'name', + 'description', + 'transport_type', + 'endpoint', + 'command', + 'enabled', + 'health_check_url', + 'last_health_status', + 'group_id', + 'organization_id', + 'created_at', + 'updated_at', + ]; + + for (let i = 0; i < serverCount; i++) { + const id = randomUUID(); + serverIds.push(id); + const isHttp = Math.random() < 0.6; + const groupId = pick(groupIds); + // Fix: updated_at must be after created_at + const serverCreatedAt = randomDate(60); + serverRows.push([ + sqlVal(id), + sqlVal(`stress-test-server-${i}`), + sqlVal(`MCP server #${i} for stress testing`), + sqlVal(isHttp ? 'http' : 'stdio'), + sqlVal(isHttp ? `http://localhost:${3000 + i}/mcp` : null), + sqlVal(isHttp ? null : 'npx'), + sqlVal(Math.random() < 0.9), + sqlVal(isHttp ? `http://localhost:${3000 + i}/health` : null), + sqlVal(pick(['healthy', 'unhealthy', 'unknown', null])), + sqlVal(groupId), + sqlVal(ORG_ID), + sqlVal(serverCreatedAt), + sqlVal(new Date(serverCreatedAt.getTime() + randInt(0, 30 * 24 * 60 * 60 * 1000))), + ]); + } + + await batchInsert(client, 'mcp_servers', serverCols, serverRows); + console.log(` Inserted ${serverCount} MCP servers`); + + // Group-server junction + const junctionRows: string[][] = []; + const junctionCols = ['group_id', 'server_id', 'recommended', 'default_selected', 'created_at']; + + for (const sid of serverIds) { + const gid = pick(groupIds); + junctionRows.push([ + sqlVal(gid), + sqlVal(sid), + sqlVal(Math.random() < 0.3), + sqlVal(Math.random() < 0.7), + sqlVal(randomDate(60)), + ]); + } + + await batchInsert(client, 'mcp_group_servers', junctionCols, junctionRows); + + // Tools + const toolRows: string[][] = []; + const toolCols = [ + 'id', + 'server_id', + 'tool_name', + 'description', + 'input_schema', + 'enabled', + 'discovered_at', + ]; + + for (const sid of serverIds) { + const toolCount = randInt(toolsRange[0], toolsRange[1]); + for (let t = 0; t < toolCount; t++) { + toolRows.push([ + sqlVal(randomUUID()), + sqlVal(sid), + sqlVal(`tool_${shortUUID()}_${t}`), + sqlVal(`Auto-discovered tool #${t}`), + sqlVal({ type: 'object', properties: { input: { type: 'string' } } }), + sqlVal(Math.random() < 0.85), + sqlVal(randomDate(30)), + ]); + } + } + + const toolsInserted = await batchInsert(client, 'mcp_server_tools', toolCols, toolRows); + console.log(` Inserted ${toolsInserted} MCP server tools`); + + return { groupIds, serverIds }; +} + +async function seedRuns( + client: PoolClient, + workflows: WorkflowData[], + versions: VersionData[], + count: number, +): Promise { + console.log(`\nSeeding ${count} workflow runs...`); + const runs: RunData[] = []; + const rows: string[][] = []; + const columns = [ + 'run_id', + 'workflow_id', + 'workflow_version_id', + 'workflow_version', + 'temporal_run_id', + 'parent_run_id', + 'parent_node_ref', + 'total_actions', + 'inputs', + 'trigger_type', + 'trigger_source', + 'trigger_label', + 'input_preview', + 'organization_id', + 'status', + 'close_time', + 'created_at', + 'updated_at', + ]; + + const versionsByWorkflow = new Map(); + for (const v of versions) { + const arr = versionsByWorkflow.get(v.workflowId) || []; + arr.push(v); + versionsByWorkflow.set(v.workflowId, arr); + } + + // First pass: create runs + for (let i = 0; i < count; i++) { + const wf = pick(workflows); + const wfVersions = versionsByWorkflow.get(wf.id) || []; + const version = wfVersions.length > 0 ? pick(wfVersions) : null; + const status = pickWeighted(RUN_STATUS_DIST); + const trigger = pickWeighted(TRIGGER_DIST); + const runId = `wfr_${randomUUID()}`; + const isOpenStatus = ['RUNNING', 'QUEUED', 'AWAITING_INPUT'].includes(status); + // Fix: open runs use recent timestamps for realistic durations; + // closed runs use a random time after the workflow was created (not before it) + const createdAt = isOpenStatus + ? randomDate(0.1) + : new Date( + wf.createdAt.getTime() + + randInt(60000, Math.max(60001, Date.now() - wf.createdAt.getTime())), + ); + const isAgentRun = wf.graph.nodes.some((n) => n.type === 'core.ai.agent'); + + runs.push({ + runId, + workflowId: wf.id, + versionId: version?.id ?? null, + version: version?.version ?? null, + status, + nodeRefs: wf.nodeRefs, + createdAt, + isAgentRun, + }); + + const triggerLabels: Record = { + manual: 'Manual run', + schedule: 'Scheduled run', + api: 'API trigger', + webhook: 'Webhook trigger', + }; + + rows.push([ + sqlVal(runId), + sqlVal(wf.id), + sqlVal(version?.id ?? null), + sqlVal(version?.version ?? null), + sqlVal(`temporal-${shortUUID()}`), + sqlVal(null), // parentRunId set in second pass + sqlVal(null), + sqlVal(wf.nodeRefs.length), + sqlVal({}), + sqlVal(trigger), + sqlVal(trigger === 'webhook' ? `/hooks/${shortUUID()}` : null), + sqlVal(triggerLabels[trigger] || 'Manual run'), + sqlVal({ runtimeInputs: {}, nodeOverrides: {} }), + sqlVal(ORG_ID), + sqlVal(status), + sqlVal(isOpenStatus ? null : new Date(createdAt.getTime() + randInt(1000, 300000))), + sqlVal(createdAt), + sqlVal(new Date(createdAt.getTime() + randInt(1000, 300000))), + ]); + } + + const inserted = await batchInsert(client, 'workflow_runs', columns, rows); + console.log(` Inserted ${inserted} workflow runs`); + + // Second pass: set parent-child relationships (~10% are child runs, up to 3-4 deep chains) + const parentCandidates = runs.filter((r) => r.status === 'COMPLETED'); + const childCount = Math.floor(runs.length * 0.1); + + if (parentCandidates.length > 0 && childCount > 0) { + console.log(` Setting ${childCount} parent-child relationships...`); + const shuffled = [...runs].sort(() => Math.random() - 0.5).slice(0, childCount); + for (const child of shuffled) { + // Fix: only assign parent if child was created after parent (child runs are spawned during parent execution) + const validParents = parentCandidates.filter( + (p) => p.runId !== child.runId && p.createdAt.getTime() < child.createdAt.getTime(), + ); + if (validParents.length === 0) continue; + const parent = pick(validParents); + await client.query( + `UPDATE workflow_runs SET parent_run_id = $1, parent_node_ref = $2 WHERE run_id = $3`, + [parent.runId, pick(parent.nodeRefs), child.runId], + ); + } + } + + return runs; +} + +async function seedTraces( + client: PoolClient, + runs: RunData[], + tracesRange: [number, number], +): Promise { + console.log(`\nSeeding workflow traces...`); + const columns = [ + 'run_id', + 'workflow_id', + 'organization_id', + 'type', + 'node_ref', + 'timestamp', + 'message', + 'error', + 'output_summary', + 'level', + 'data', + 'sequence', + 'created_at', + ]; + + let totalInserted = 0; + const batchRows: string[][] = []; + + // 2% of runs have zero traces + const runsWithTraces = runs.filter(() => Math.random() > 0.02); + + for (const run of runsWithTraces) { + const maxTraces = randInt(tracesRange[0], tracesRange[1]); + const traceEvents = generateTraceSequence(run.nodeRefs, run.status, maxTraces); + const baseTime = run.createdAt.getTime(); + + // Fix: use cumulative offset so timestamps are always monotonically increasing + let cumulativeOffset = 0; + for (let seq = 0; seq < traceEvents.length; seq++) { + const evt = traceEvents[seq]; + cumulativeOffset += randInt(100, 5000); + const ts = new Date(baseTime + cumulativeOffset); + + batchRows.push([ + sqlVal(run.runId), + sqlVal(run.workflowId), + sqlVal(ORG_ID), + sqlVal(evt.type), + sqlVal(evt.nodeRef), + sqlVal(ts), + sqlVal(evt.message), + sqlVal(evt.error), + sqlVal(evt.outputSummary), + sqlVal(evt.level), + sqlVal(evt.data), + sqlVal(seq), + sqlVal(ts), + ]); + } + + // Flush periodically + if (batchRows.length >= 1000) { + totalInserted += await batchInsert(client, 'workflow_traces', columns, batchRows, 500); + batchRows.length = 0; + } + } + + if (batchRows.length > 0) { + totalInserted += await batchInsert(client, 'workflow_traces', columns, batchRows, 500); + } + + console.log(` Inserted ${totalInserted} workflow traces`); +} + +async function seedNodeIO( + client: PoolClient, + runs: RunData[], + ioRange: [number, number], +): Promise { + console.log(`\nSeeding node I/O...`); + const columns = [ + 'run_id', + 'node_ref', + 'workflow_id', + 'organization_id', + 'component_id', + 'inputs', + 'inputs_size', + 'inputs_spilled', + 'outputs', + 'outputs_size', + 'outputs_spilled', + 'started_at', + 'completed_at', + 'duration_ms', + 'status', + 'error_message', + 'created_at', + 'updated_at', + ]; + + let totalInserted = 0; + const batchRows: string[][] = []; + const seenKeys = new Set(); + + for (const run of runs) { + const ioCount = Math.min(randInt(ioRange[0], ioRange[1]), run.nodeRefs.length); + const selectedNodes = run.nodeRefs.slice(0, ioCount); + + // Fix: stagger node start times sequentially so later nodes start after earlier ones + let nodeTimeOffset = 0; + for (const nodeRef of selectedNodes) { + const key = `${run.runId}:${nodeRef}`; + if (seenKeys.has(key)) continue; + seenKeys.add(key); + + const isFailed = + run.status === 'FAILED' && nodeRef === selectedNodes[selectedNodes.length - 1]; + const isRunning = ['RUNNING', 'QUEUED'].includes(run.status); + const isLastSelectedNode = nodeRef === selectedNodes[selectedNodes.length - 1]; + // Only the last node in a running run is still executing; earlier nodes completed + const nodeStatus = isFailed + ? 'failed' + : isRunning && isLastSelectedNode + ? 'running' + : 'completed'; + nodeTimeOffset += randInt(100, 5000); + const startedAt = new Date(run.createdAt.getTime() + nodeTimeOffset); + const durationMs = randInt(50, 30000); + nodeTimeOffset += durationMs; // next node starts after this one completes + + // 5% large outputs (50-100KB) + const isLargeOutput = Math.random() < 0.05; + const outputData = isLargeOutput + ? { data: 'x'.repeat(randInt(50000, 100000)) } + : { result: 'ok', value: randInt(1, 1000) }; + const outputJson = JSON.stringify(outputData); + + batchRows.push([ + sqlVal(run.runId), + sqlVal(nodeRef), + sqlVal(run.workflowId), + sqlVal(ORG_ID), + sqlVal(pick(NODE_TYPES)), + sqlVal({ input1: 'value1' }), + sqlVal(20), + sqlVal(false), + sqlVal(outputData), + sqlVal(outputJson.length), + sqlVal(isLargeOutput), + sqlVal(startedAt), + sqlVal(nodeStatus === 'running' ? null : new Date(startedAt.getTime() + durationMs)), + sqlVal(nodeStatus === 'running' ? null : durationMs), + sqlVal(nodeStatus), + sqlVal(isFailed ? 'Connection timeout' : null), + sqlVal(startedAt), + // Fix: running nodes haven't completed yet, so updated_at should be startedAt + sqlVal(nodeStatus === 'running' ? startedAt : new Date(startedAt.getTime() + durationMs)), + ]); + } + + if (batchRows.length >= 1000) { + totalInserted += await batchInsert(client, 'node_io', columns, batchRows, 500); + batchRows.length = 0; + } + } + + if (batchRows.length > 0) { + totalInserted += await batchInsert(client, 'node_io', columns, batchRows, 500); + } + + console.log(` Inserted ${totalInserted} node I/O records`); +} + +async function seedSchedules( + client: PoolClient, + workflows: WorkflowData[], + versions: VersionData[], + count: number, +): Promise { + console.log(`\nSeeding ${count} workflow schedules...`); + const columns = [ + 'id', + 'workflow_id', + 'workflow_version_id', + 'workflow_version', + 'name', + 'description', + 'cron_expression', + 'timezone', + 'human_label', + 'overlap_policy', + 'status', + 'last_run_at', + 'next_run_at', + 'input_payload', + 'temporal_schedule_id', + 'temporal_snapshot', + 'organization_id', + 'created_at', + 'updated_at', + ]; + + const versionsByWorkflow = new Map(); + for (const v of versions) { + const arr = versionsByWorkflow.get(v.workflowId) || []; + arr.push(v); + versionsByWorkflow.set(v.workflowId, arr); + } + + const rows: string[][] = []; + + for (let i = 0; i < count; i++) { + const wf = pick(workflows); + const wfVersions = versionsByWorkflow.get(wf.id) || []; + const version = wfVersions.length > 0 ? pick(wfVersions) : null; + const status = pickWeighted(SCHEDULE_STATUS_DIST); + const cron = pick(CRON_EXPRESSIONS); + const tz = pick(TIMEZONES); + // Fix: updated_at must be after created_at (derive, don't use independent random) + const schedCreatedAt = randomDate(60); + + rows.push([ + sqlVal(randomUUID()), + sqlVal(wf.id), + sqlVal(version?.id ?? null), + sqlVal(version?.version ?? null), + sqlVal(`Schedule for ${wf.name.substring(0, 50)} #${i}`), + sqlVal(Math.random() < 0.3 ? null : `Runs ${cron} in ${tz}`), + sqlVal(cron), + sqlVal(tz), + sqlVal(`Every ${pick(['6 hours', 'weekday at 9am', '15 minutes', 'day at midnight'])}`), + sqlVal(pick(['skip', 'buffer', 'allow'])), + sqlVal(status), + sqlVal(status !== 'error' ? randomDate(7) : null), + sqlVal(status === 'active' ? new Date(Date.now() + randInt(60000, 86400000)) : null), + sqlVal({ runtimeInputs: {}, nodeOverrides: {} }), + sqlVal(`temporal-sched-${shortUUID()}`), + sqlVal({}), + sqlVal(ORG_ID), + sqlVal(schedCreatedAt), + sqlVal(new Date(schedCreatedAt.getTime() + randInt(0, 30 * 24 * 60 * 60 * 1000))), + ]); + } + + const inserted = await batchInsert(client, 'workflow_schedules', columns, rows); + console.log(` Inserted ${inserted} workflow schedules`); +} + +async function seedWebhooks( + client: PoolClient, + workflows: WorkflowData[], + versions: VersionData[], + runs: RunData[], + configCount: number, + deliveryCount: number, +): Promise { + console.log(`\nSeeding ${configCount} webhook configs, ${deliveryCount} deliveries...`); + + // Webhook configurations + const configCols = [ + 'id', + 'workflow_id', + 'workflow_version_id', + 'workflow_version', + 'name', + 'description', + 'webhook_path', + 'parsing_script', + 'expected_inputs', + 'status', + 'organization_id', + 'created_at', + 'updated_at', + ]; + + const versionsByWorkflow = new Map(); + for (const v of versions) { + const arr = versionsByWorkflow.get(v.workflowId) || []; + arr.push(v); + versionsByWorkflow.set(v.workflowId, arr); + } + + const webhookIds: string[] = []; + const configRows: string[][] = []; + + for (let i = 0; i < configCount; i++) { + const id = randomUUID(); + webhookIds.push(id); + const wf = pick(workflows); + const wfVersions = versionsByWorkflow.get(wf.id) || []; + const version = wfVersions.length > 0 ? pick(wfVersions) : null; + + // Fix: updated_at must be after created_at + const whConfigCreatedAt = randomDate(60); + configRows.push([ + sqlVal(id), + sqlVal(wf.id), + sqlVal(version?.id ?? null), + sqlVal(version?.version ?? null), + sqlVal(`Webhook ${wf.name.substring(0, 40)} #${i}`), + sqlVal(Math.random() < 0.3 ? null : 'Auto-generated webhook'), + sqlVal(`wh_${shortUUID()}_${i}`), + sqlVal('return { ...body };'), + sqlVal([{ id: 'input1', label: 'Input 1', type: 'text', required: true }]), + sqlVal(Math.random() < 0.8 ? 'active' : 'inactive'), + sqlVal(ORG_ID), + sqlVal(whConfigCreatedAt), + sqlVal(new Date(whConfigCreatedAt.getTime() + randInt(0, 30 * 24 * 60 * 60 * 1000))), + ]); + } + + const configsInserted = await batchInsert( + client, + 'webhook_configurations', + configCols, + configRows, + ); + console.log(` Inserted ${configsInserted} webhook configurations`); + + // Webhook deliveries + const deliveryCols = [ + 'id', + 'webhook_id', + 'workflow_run_id', + 'status', + 'payload', + 'headers', + 'parsed_data', + 'error_message', + 'created_at', + 'completed_at', + ]; + + const deliveryRows: string[][] = []; + + // Fix: link deliveries to actual webhook-triggered runs and ensure completed_at >= created_at + const webhookRuns = runs.filter((r) => r.status === 'COMPLETED' || r.status === 'FAILED'); + for (let i = 0; i < deliveryCount; i++) { + const webhookId = pick(webhookIds); + const status = pickWeighted(DELIVERY_STATUS_DIST); + const deliveryCreatedAt = randomDate(30); + // Fix: link to an actual run when delivered (not always null) + const linkedRun = status === 'delivered' && webhookRuns.length > 0 ? pick(webhookRuns) : null; + + deliveryRows.push([ + sqlVal(randomUUID()), + sqlVal(webhookId), + sqlVal(linkedRun?.runId ?? null), + sqlVal(status), + sqlVal({ event: 'test', data: { index: i } }), + sqlVal({ 'content-type': 'application/json' }), + sqlVal(status === 'delivered' ? { parsed: true } : null), + sqlVal(status === 'failed' ? 'Webhook processing failed' : null), + sqlVal(deliveryCreatedAt), + // Fix: completed_at must be after created_at (not independent random) + sqlVal( + status !== 'processing' + ? new Date(deliveryCreatedAt.getTime() + randInt(100, 30000)) + : null, + ), + ]); + } + + const deliveriesInserted = await batchInsert( + client, + 'webhook_deliveries', + deliveryCols, + deliveryRows, + ); + console.log(` Inserted ${deliveriesInserted} webhook deliveries`); +} + +async function seedHumanInputRequests( + client: PoolClient, + runs: RunData[], + count: number, +): Promise { + console.log(`\nSeeding ${count} human input requests...`); + const columns = [ + 'id', + 'run_id', + 'workflow_id', + 'node_ref', + 'status', + 'input_type', + 'input_schema', + 'title', + 'description', + 'context', + 'resolve_token', + 'timeout_at', + 'response_data', + 'responded_at', + 'responded_by', + 'organization_id', + 'created_at', + 'updated_at', + ]; + + const rows: string[][] = []; + + // Fix: ensure every AWAITING_INPUT run gets at least one pending human input request + const awaitingRuns = runs.filter((r) => r.status === 'AWAITING_INPUT'); + const guaranteedRows: { run: RunData; status: string }[] = awaitingRuns.map((r) => ({ + run: r, + status: 'pending', + })); + + // Fill remaining slots with random assignments + const remainingCount = Math.max(0, count - guaranteedRows.length); + const randomRows: { run: RunData; status: string }[] = []; + for (let i = 0; i < remainingCount; i++) { + randomRows.push({ + run: pick(runs), + status: pickWeighted(HUMAN_INPUT_STATUS_DIST), + }); + } + + const allInputRows = [...guaranteedRows, ...randomRows]; + + for (let i = 0; i < allInputRows.length; i++) { + const { run, status } = allInputRows[i]; + const inputType = pickWeighted(HUMAN_INPUT_TYPE_DIST); + // Fix: created_at should be within the run's time window, not an independent random date + const runDuration = Math.max(60000, Date.now() - run.createdAt.getTime()); + const createdAt = new Date( + run.createdAt.getTime() + randInt(1000, Math.min(runDuration, 24 * 60 * 60 * 1000)), + ); + + rows.push([ + sqlVal(randomUUID()), + sqlVal(run.runId), + sqlVal(run.workflowId), + sqlVal(pick(run.nodeRefs)), + sqlVal(status), + sqlVal(inputType), + sqlVal({ type: 'object', properties: {} }), + sqlVal(`${inputType.charAt(0).toUpperCase() + inputType.slice(1)} Request #${i}`), + sqlVal(Math.random() < 0.3 ? null : `Please ${inputType} this action`), + sqlVal({ workflow: run.workflowId }), + sqlVal(`token_${randomUUID()}`), + sqlVal(status === 'expired' ? new Date(createdAt.getTime() + 3600000) : null), + sqlVal(status === 'resolved' ? { approved: true } : null), + sqlVal( + status === 'resolved' ? new Date(createdAt.getTime() + randInt(60000, 3600000)) : null, + ), + sqlVal(status === 'resolved' ? 'stress-test-user' : null), + sqlVal(ORG_ID), + sqlVal(createdAt), + sqlVal( + status !== 'pending' ? new Date(createdAt.getTime() + randInt(60000, 3600000)) : createdAt, + ), + ]); + } + + const inserted = await batchInsert(client, 'human_input_requests', columns, rows); + console.log(` Inserted ${inserted} human input requests`); +} + +async function seedArtifacts( + client: PoolClient, + runs: RunData[], + fileIds: string[], + count: number, +): Promise { + console.log(`\nSeeding ${count} artifacts...`); + const columns = [ + 'id', + 'run_id', + 'workflow_id', + 'workflow_version_id', + 'component_id', + 'component_ref', + 'file_id', + 'name', + 'mime_type', + 'size', + 'destinations', + 'metadata', + 'organization_id', + 'created_at', + ]; + + const rows: string[][] = []; + + for (let i = 0; i < count; i++) { + const run = pick(runs); + const fileId = pick(fileIds); + const ext = pick(['json', 'csv', 'txt', 'pdf', 'png']); + // Fix: artifact created_at should be within the run's execution window + const artifactCreatedAt = new Date(run.createdAt.getTime() + randInt(1000, 300000)); + + rows.push([ + sqlVal(randomUUID()), + sqlVal(run.runId), + sqlVal(run.workflowId), + sqlVal(run.versionId), + sqlVal(pick(NODE_TYPES)), + sqlVal(pick(run.nodeRefs)), + sqlVal(fileId), + sqlVal(`artifact-${i}.${ext}`), + sqlVal(ext === 'png' ? 'image/png' : ext === 'pdf' ? 'application/pdf' : `text/${ext}`), + sqlVal(randInt(100, 500000)), + sqlVal(pick([['run'], ['library'], ['run', 'library']])), + sqlVal(Math.random() < 0.5 ? { generated: true, index: i } : null), + sqlVal(ORG_ID), + sqlVal(artifactCreatedAt), + ]); + } + + const inserted = await batchInsert(client, 'artifacts', columns, rows); + console.log(` Inserted ${inserted} artifacts`); +} + +async function seedAgentTraceEvents( + client: PoolClient, + runs: RunData[], + eventsRange: [number, number], +): Promise { + const agentRuns = runs.filter((r) => r.isAgentRun); + if (agentRuns.length === 0) { + console.log(`\nNo agent runs to seed trace events for.`); + return; + } + + console.log(`\nSeeding agent trace events for ${agentRuns.length} agent runs...`); + const columns = [ + 'agent_run_id', + 'workflow_run_id', + 'node_ref', + 'sequence', + 'timestamp', + 'part_type', + 'payload', + 'created_at', + ]; + + let totalInserted = 0; + const batchRows: string[][] = []; + + for (const run of agentRuns) { + const agentNodeRefs = run.nodeRefs.slice(0, Math.max(1, Math.floor(run.nodeRefs.length / 3))); + const agentRunId = `agent_${shortUUID()}`; + // Fix: all events in a single agent run belong to the same agent node + const agentNodeRef = pick(agentNodeRefs); + const eventCount = randInt(eventsRange[0], eventsRange[1]); + const baseTime = run.createdAt.getTime(); + // Fix: use cumulative offset for monotonically increasing timestamps + let agentCumulativeOffset = 0; + + for (let seq = 0; seq < eventCount; seq++) { + const partType = pick(AGENT_PART_TYPES); + agentCumulativeOffset += randInt(500, 3000); + const ts = new Date(baseTime + agentCumulativeOffset); + + let payload: unknown; + switch (partType) { + case 'text': + payload = { text: `Agent response step ${seq}` }; + break; + case 'tool-call': + payload = { toolName: `tool_${shortUUID()}`, args: { input: 'test' } }; + break; + case 'tool-result': + payload = { result: { output: 'success' } }; + break; + case 'step-start': + payload = { step: seq }; + break; + case 'reasoning': + payload = { reasoning: `Thinking about step ${seq}...` }; + break; + case 'error': + payload = { error: 'Something went wrong', code: 'ERR_UNKNOWN' }; + break; + default: + payload = { data: partType }; + } + + batchRows.push([ + sqlVal(agentRunId), + sqlVal(run.runId), + sqlVal(agentNodeRef), + sqlVal(seq), + sqlVal(ts), + sqlVal(partType), + sqlVal(payload), + sqlVal(ts), + ]); + } + + if (batchRows.length >= 1000) { + totalInserted += await batchInsert(client, 'agent_trace_events', columns, batchRows, 500); + batchRows.length = 0; + } + } + + if (batchRows.length > 0) { + totalInserted += await batchInsert(client, 'agent_trace_events', columns, batchRows, 500); + } + + console.log(` Inserted ${totalInserted} agent trace events`); +} + +// ─── Cleanup ───────────────────────────────────────────────────────────────── + +async function cleanup(client: PoolClient): Promise { + console.log('\nCleaning up stress-test data...\n'); + + // Delete in reverse FK order + const deletions = [ + `DELETE FROM agent_trace_events WHERE workflow_run_id IN (SELECT run_id FROM workflow_runs WHERE organization_id = '${ORG_ID}')`, + `DELETE FROM artifacts WHERE organization_id = '${ORG_ID}'`, + `DELETE FROM human_input_requests WHERE organization_id = '${ORG_ID}'`, + `DELETE FROM webhook_deliveries WHERE webhook_id IN (SELECT id FROM webhook_configurations WHERE organization_id = '${ORG_ID}')`, + `DELETE FROM webhook_configurations WHERE organization_id = '${ORG_ID}'`, + `DELETE FROM workflow_schedules WHERE organization_id = '${ORG_ID}'`, + `DELETE FROM node_io WHERE organization_id = '${ORG_ID}'`, + `DELETE FROM workflow_traces WHERE organization_id = '${ORG_ID}'`, + `DELETE FROM workflow_runs WHERE organization_id = '${ORG_ID}'`, + `DELETE FROM mcp_server_tools WHERE server_id IN (SELECT id FROM mcp_servers WHERE organization_id = '${ORG_ID}')`, + `DELETE FROM mcp_group_servers WHERE server_id IN (SELECT id FROM mcp_servers WHERE organization_id = '${ORG_ID}')`, + `DELETE FROM mcp_servers WHERE organization_id = '${ORG_ID}'`, + `DELETE FROM mcp_groups WHERE slug LIKE 'stress-test-group-%'`, + `DELETE FROM api_keys WHERE organization_id = '${ORG_ID}'`, + `DELETE FROM secret_versions WHERE organization_id = '${ORG_ID}'`, + `DELETE FROM secrets WHERE organization_id = '${ORG_ID}'`, + `DELETE FROM workflow_versions WHERE organization_id = '${ORG_ID}'`, + `DELETE FROM workflows WHERE organization_id = '${ORG_ID}'`, + `DELETE FROM files WHERE organization_id = '${ORG_ID}'`, + ]; + + for (const sql of deletions) { + const table = sql.match(/FROM (\S+)/)?.[1] || 'unknown'; + const result = await client.query(sql); + console.log(` Deleted ${result.rowCount} rows from ${table}`); + } + + console.log('\nCleanup complete.'); +} + +// ─── Main ──────────────────────────────────────────────────────────────────── + +function parseArgs(): { tier: string; clean: boolean } { + const args = process.argv.slice(2); + let tier = 'small'; + let clean = false; + + for (let i = 0; i < args.length; i++) { + if (args[i] === '--tier' && args[i + 1]) { + tier = args[i + 1]; + i++; + } + if (args[i] === '--clean') { + clean = true; + } + } + + if (!TIERS[tier]) { + console.error(`Invalid tier: ${tier}. Choose from: ${Object.keys(TIERS).join(', ')}`); + process.exit(1); + } + + return { tier, clean }; +} + +async function main() { + const { tier, clean } = parseArgs(); + const config = TIERS[tier]; + + const connectionString = + process.env.DATABASE_URL || 'postgresql://shipsec:shipsec@localhost:5433/shipsec'; + + const pool = new Pool({ connectionString }); + const client = await pool.connect(); + + try { + if (clean) { + await client.query('BEGIN'); + await cleanup(client); + await client.query('COMMIT'); + return; + } + + console.log(`\nStress Test Seed - Tier: ${tier.toUpperCase()}`); + console.log('='.repeat(50)); + + // Auto-clean existing seed data before re-seeding to prevent accumulation + await client.query('BEGIN'); + await cleanup(client); + await client.query('COMMIT'); + + const startTime = Date.now(); + + // Seed in FK-safe order, one transaction per entity type + await client.query('BEGIN'); + const fileIds = await seedFiles(client, config.artifactsAndFiles); + await client.query('COMMIT'); + + await client.query('BEGIN'); + const seededSecrets = await seedSecrets(client, config.secrets); + await client.query('COMMIT'); + + const secretNames = seededSecrets.map((s) => s.name); + + await client.query('BEGIN'); + const workflows = await seedWorkflows(client, config.workflows, tier, secretNames); + await client.query('COMMIT'); + + await client.query('BEGIN'); + const versions = await seedVersions(client, workflows, config.workflowVersionsRange); + await client.query('COMMIT'); + + await client.query('BEGIN'); + await seedMcpData(client, config.mcpGroups, config.mcpServers, config.mcpToolsPerServerRange); + await client.query('COMMIT'); + + await client.query('BEGIN'); + const runs = await seedRuns(client, workflows, versions, config.workflowRuns); + await client.query('COMMIT'); + + // Fix: update run_count to match actual seeded runs per workflow + await client.query('BEGIN'); + await client.query(` + UPDATE workflows SET run_count = sub.cnt + FROM (SELECT workflow_id, COUNT(*) AS cnt FROM workflow_runs WHERE organization_id = '${ORG_ID}' GROUP BY workflow_id) sub + WHERE workflows.id = sub.workflow_id AND workflows.organization_id = '${ORG_ID}' + `); + await client.query('COMMIT'); + + await client.query('BEGIN'); + await seedTraces(client, runs, config.tracesPerRunRange); + await client.query('COMMIT'); + + await client.query('BEGIN'); + await seedNodeIO(client, runs, config.nodeIoPerRunRange); + await client.query('COMMIT'); + + await client.query('BEGIN'); + await seedSchedules(client, workflows, versions, config.schedules); + await client.query('COMMIT'); + + await client.query('BEGIN'); + await seedWebhooks( + client, + workflows, + versions, + runs, + config.webhookConfigs, + config.webhookDeliveries, + ); + await client.query('COMMIT'); + + await client.query('BEGIN'); + await seedHumanInputRequests(client, runs, config.humanInputRequests); + await client.query('COMMIT'); + + await client.query('BEGIN'); + await seedArtifacts(client, runs, fileIds, config.artifactsAndFiles); + await client.query('COMMIT'); + + await client.query('BEGIN'); + await seedAgentTraceEvents(client, runs, config.agentTraceEventsPerRun); + await client.query('COMMIT'); + + await client.query('BEGIN'); + await seedApiKeys(client, config.apiKeys); + await client.query('COMMIT'); + + const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); + + console.log('\n' + '='.repeat(50)); + console.log(`Seed complete in ${elapsed}s`); + console.log(`Tier: ${tier} | Organization: ${ORG_ID}`); + console.log(`Run --clean to remove all stress-test data.`); + } catch (error) { + await client.query('ROLLBACK'); + console.error('\nSeed failed:'); + console.error(error); + process.exitCode = 1; + } finally { + client.release(); + await pool.end(); + } +} + +main().catch((error) => { + console.error('Script encountered an unexpected error'); + console.error(error); + process.exit(1); +}); diff --git a/frontend/docs/audits/load-audit-2026-02-18T00-00.md b/frontend/docs/audits/load-audit-2026-02-18T00-00.md new file mode 100644 index 000000000..e7b39ee7d --- /dev/null +++ b/frontend/docs/audits/load-audit-2026-02-18T00-00.md @@ -0,0 +1,467 @@ +# Frontend Load Testing Audit Report + +**Date:** 2026-02-18 +**Branch:** `LuD1161/tanstack-query-migrate` +**Seed Tier:** Medium (50 workflows, ~2000 runs, ~45K traces, ~17K node I/O, ~45K agent events) +**Environment:** Nginx reverse proxy (localhost:80 → Vite dev server), no CPU/network throttling +**Seed Duration:** 20.3s +**Testing Plan:** See `frontend/docs/load-testing-plan.md` + +--- + +## 1. Workflow List Page (`/`) + +### 1.1 Performance Trace Summary (Lighthouse-style) + +| Metric | Value | Rating | +| -------- | ------------ | ------ | +| **LCP** | **4,893 ms** | POOR | +| **CLS** | 0.00 | GOOD | +| **TTFB** | **54 ms** | GOOD | + +### 1.2 LCP Breakdown + +- **TTFB:** 54 ms (1.1% of LCP) +- **Render Delay:** 4,839 ms (98.9% of LCP) — the main bottleneck +- Max critical path latency: **4,577 ms** — dominated by Vite module chain loading +- LCP element: `

` (text, not network-fetched) + +> **Note:** The 4.9s LCP is dominated by Vite's unbundled ESM module chain. The critical path goes through `main.tsx → App.tsx → AppLayout.tsx → prefetch-routes.ts → SchedulesPage.tsx` etc. A production build would dramatically reduce this. The previous audit via Vite direct showed 9.7s LCP, so nginx proxy already improved it by ~2x. + +### 1.3 API Request Timings + +| # | Endpoint | Status | +| --- | --------------------------- | ------ | +| 1 | `/api/v1/workflows/summary` | 200 OK | +| 2 | `/api/v1/components` | 200 OK | + +**Observations:** + +- Only **2 API calls** on the list page — efficient! +- All **50 workflows** are loaded in a single request — no pagination +- Both responses use gzip compression via nginx + +### 1.4 TanStack Query Cache (on list page) + +| Query Key | Status | Stale Time | Is Stale | Data | +| ---------------------------------- | ------- | ------------- | -------- | -------- | +| `["components","local-dev"]` | success | 600s (10 min) | false | object | +| `["workflowsSummary","local-dev"]` | success | 60s (1 min) | false | 50 items | + +**Observations:** + +- Components cache is well-configured at 10 min stale time (rarely changes) +- Workflows summary has 60s stale time — refreshes frequently on tab focus, good for live data +- Prefetch is working correctly via `usePrefetchOnIdle` +- **Clean cache — no ghost queries on this page** + +### 1.5 DOM Size: **1,529 elements** + +### 1.6 Third-Party Impact + +Only **Google Fonts** (48.4 KB). Minimal third-party overhead. + +### 1.7 Critical Path Analysis + +The network dependency tree shows the critical chain: + +``` +localhost/ (66ms) + └─ main.tsx (267ms) + └─ react_jsx-dev-runtime (268ms) + └─ chunk-UV2NTXO5 (272ms) + └─ WorkflowList.tsx (4,569ms) ← bottleneck + └─ statusBadgeStyles.ts (4,577ms) +``` + +The `prefetch-routes.ts` module eagerly loads ALL page modules (SchedulesPage, McpLibraryPage, ArtifactLibrary, WebhooksPage, etc.), adding significant module chain depth. This is a Vite dev-mode issue — production bundles would chunk differently. + +--- + +## 2. Workflow Detail Page — Design Tab (`/workflows/:id`) + +### 2.1 API Requests on SPA Navigation + +| # | Endpoint | Notes | +| --- | ---------------------------------- | ----------------------------------------------- | +| 1 | `/api/v1/workflows/summary` | **Served from TanStack cache** (not re-fetched) | +| 2 | `/api/v1/components` | **Served from TanStack cache** (not re-fetched) | +| 3 | `/api/v1/secrets` | New fetch — 0 items | +| 4 | `/api/v1/schedules` | New fetch — 30 items | +| 5 | `/api/v1/workflows/:id` | Full workflow with graph — large payload | +| 6 | `/api/v1/schedules?workflowId=:id` | Workflow-specific schedules — 0 items | + +**Observations:** + +- 4 new API calls on SPA navigation (2 cached from prefetch) +- Cache reuse for `workflows/summary` and `components` is working correctly +- The workflow detail response includes the **full graph JSON** + +### 2.2 TanStack Query Cache (on design tab) — 7 queries + +| Query Key | Status | Stale Time | Is Stale | Data | Issue? | +| --------------------------------------------- | ----------- | ---------- | -------- | --------- | --------------- | +| `["components","local-dev"]` | success | 600s | false | object | | +| `["workflowsSummary","local-dev"]` | success | 60s | false | 50 items | | +| `["secrets","local-dev"]` | success | 300s | false | 0 items | | +| `["schedules","local-dev",{}]` | success | 60s | false | 30 items | | +| `["schedules","local-dev",{workflowId}]` | success | 60s | false | 0 items | | +| **`["runs","local-dev","__disabled__"]`** | **PENDING** | 30s | false | undefined | **Ghost query** | +| **`["workflow","local-dev","__disabled__"]`** | **PENDING** | 60s | false | undefined | **Ghost query** | + +### FINDINGS — Ghost Queries: + +1. **`["runs","local-dev","__disabled__"]`** — Query created with `status: pending` + `fetchStatus: idle`. The `__disabled__` sentinel indicates the query is deliberately disabled (via `enabled: false` when no run is selected), but the cache entry persists. + +2. **`["workflow","local-dev","__disabled__"]`** — Same pattern. A disabled sentinel key sitting idle in cache. + +### 2.3 DOM Size: **1,667 elements** + +--- + +## 3. Workflow Detail Page — Execute Tab (`/workflows/:id/runs/:runId`) + +### 3.1 API Requests on Execute Tab Switch + +| # | Endpoint | Notes | +| --- | ----------------------------------------------- | -------------------------- | +| 1 | `/api/v1/workflows/runs?workflowId=:id&limit=5` | Run selector (last 5 runs) | +| 2 | `/api/v1/workflows/runs/:runId/events` | Event timeline | +| 3 | `/api/v1/workflows/runs/:runId/dataflows` | Node I/O data | +| 4 | `/api/v1/workflows/runs/:runId/status` | Run status | +| 5 | `/api/v1/workflows/:id/versions/:versionId` | Workflow version | +| 6 | `/api/v1/workflows/runs/:runId/stream` | Live event stream (SSE) | +| 7 | `/api/v1/workflows/runs/:runId/events` | **DUPLICATE** | +| 8 | `/api/v1/workflows/runs/:runId/dataflows` | **DUPLICATE** | +| 9 | `/api/v1/workflows/runs/:runId/status` | **DUPLICATE** | +| 10 | `/api/v1/workflows/runs/:runId/logs?limit=500` | Logs | + +### FINDING: Duplicate API Calls (PERSISTS) + +The Execute tab fires **duplicate requests** for multiple endpoints: + +- **`events`** called **2 times** +- **`dataflows`** called **2 times** +- **`status`** called **2 times** + +This suggests component re-renders triggering refetches or multiple components independently querying the same run data. TanStack Query deduplication should prevent this — investigate if `enabled` flags are toggling or if queryFn differs across instances. + +### 3.2 TanStack Query Cache (on execute tab) — 9 queries + +| Query Key | Status | Stale Time | Is Stale | Notes | +| ----------------------------------------- | ------- | ---------- | -------- | ----------------- | +| `["components","local-dev"]` | success | 600s | false | | +| `["workflowsSummary","local-dev"]` | success | 60s | false | | +| `["secrets","local-dev"]` | success | 300s | false | | +| `["schedules","local-dev",{}]` | success | 60s | false | | +| `["schedules","local-dev",{workflowId}]` | success | 60s | false | | +| `["runs","local-dev","__disabled__"]` | PENDING | 30s | false | Ghost | +| `["workflow","local-dev","__disabled__"]` | PENDING | 60s | **true** | Ghost, stale | +| `["runs","local-dev",":workflowId"]` | success | 30s | **true** | Actively fetching | +| `["runs","local-dev","detail",":runId"]` | success | 30s | false | | + +### 3.3 DOM Size: **2,091 elements** — highest of any page + +### 3.4 Run Selector Pagination + +The run selector fetches with `limit=5`, which is efficient. Good pagination strategy. + +--- + +## 4. Mobile Responsiveness (375px iPhone, 768px Tablet) + +### 4.1 iPhone (375px) + +- Sidebar collapses correctly to a hamburger menu ("Open menu" button) +- Table columns adapted: **"Last Run" and "Last Updated" columns hidden** +- Only shows: Name, Nodes, Status, Actions +- Long workflow names (e.g., "Content Publisher - User Health Inventory...") wrap correctly +- Mobile-specific top bar with "ShipSec Studio" logo and "New" button visible +- **All 50 workflows** rendered — no virtual scrolling on mobile (potential scroll jank) + +### 4.2 Tablet (768px) + +- Sidebar collapsed +- Table shows more columns than mobile but less than desktop +- Layout is functional + +### 4.3 Mobile Performance Concern + +Rendering 50 workflow rows without virtualization is particularly problematic on mobile devices with constrained GPU/CPU. The 1,529 DOM elements could cause scroll jank on older devices. + +--- + +## 5. Schedules Page (`/schedules`) + +### 5.1 API Requests + +| # | Endpoint | Status | +| --- | --------------------------- | ------ | +| 1 | `/api/v1/workflows/summary` | 200 OK | +| 2 | `/api/v1/components` | 200 OK | +| 3 | `/api/v1/schedules` | 200 OK | + +- **3 API calls total** — clean, no duplicates +- 30 schedules loaded in a single request + +### 5.2 TanStack Query Cache + +| Query Key | Status | Stale Time | Is Stale | Data | +| --------------------------------------------- | ----------- | ---------- | -------- | --------- | +| `["components","local-dev"]` | success | 600s | false | object | +| `["workflowsSummary","local-dev"]` | success | 60s | false | 50 items | +| `["schedules","local-dev",{}]` | success | 60s | false | 30 items | +| **`["workflow","local-dev","__disabled__"]`** | **PENDING** | 60s | false | undefined | + +- Ghost query `["workflow","__disabled__"]` persists across page navigations — confirmed global issue + +### 5.3 DOM Size: **1,452 elements** + +--- + +## 6. Webhooks Page (`/webhooks`) + +### 6.1 API Requests + +| # | Endpoint | Status | +| --- | --------------------------------- | ------ | +| 1 | `/api/v1/workflows/summary` | 200 OK | +| 2 | `/api/v1/components` | 200 OK | +| 3 | `/api/v1/webhooks/configurations` | 200 OK | + +- **3 API calls total** — all fast +- 25 webhooks loaded — no pagination + +### 6.2 TanStack Query Cache + +| Query Key | Status | Stale Time | Is Stale | Data | +| ---------------------------------- | ------- | ---------- | -------- | -------- | +| `["components","local-dev"]` | success | 600s | false | object | +| `["workflowsSummary","local-dev"]` | success | 60s | false | 50 items | +| `["webhooks","local-dev",null]` | success | 60s | false | 25 items | + +- **Clean cache — no ghost queries on this page** +- Webhooks stale time improved from 15s → 60s since previous audit + +### 6.3 DOM Size: **1,178 elements** + +--- + +## 7. Action Center Page (`/action-center`) + +### 7.1 API Requests + +| # | Endpoint | Status | +| --- | ------------------------------------- | ------ | +| 1 | `/api/v1/workflows/summary` | 200 OK | +| 2 | `/api/v1/components` | 200 OK | +| 3 | `/api/v1/human-inputs?status=pending` | 200 OK | + +- **3 API calls total** +- 28 pending human input items loaded + +### 7.2 TanStack Query Cache + +| Query Key | Status | Stale Time | Is Stale | Data | +| -------------------------------------------------- | ------- | ---------- | -------- | -------- | +| `["components","local-dev"]` | success | 600s | false | object | +| `["workflowsSummary","local-dev"]` | success | 60s | false | 50 items | +| `["humanInputs","local-dev",{"status":"pending"}]` | success | 30s | false | 28 items | + +- Clean cache, no issues + +### 7.3 DOM Size: **1,023 elements** + +--- + +## 8. Artifact Library Page (`/artifacts`) + +### 8.1 API Requests + +| # | Endpoint | Status | +| --- | --------------------------- | ------ | +| 1 | `/api/v1/workflows/summary` | 200 OK | +| 2 | `/api/v1/components` | 200 OK | +| 3 | `/api/v1/artifacts` | 200 OK | + +- **3 API calls total** +- 50 artifacts loaded — no pagination + +### 8.2 TanStack Query Cache + +| Query Key | Status | Stale Time | Is Stale | Data | +| -------------------------------------- | ------- | ---------- | -------- | -------- | +| `["components","local-dev"]` | success | 600s | false | object | +| `["workflowsSummary","local-dev"]` | success | 60s | false | 50 items | +| `["artifactLibrary","local-dev",null]` | success | 30s | false | 50 items | + +- Clean cache, no issues + +### 8.3 DOM Size: **1,520 elements** (second highest after execute tab) + +--- + +## 9. Manage Section + +### 9.1 Secrets Page (`/secrets`) + +| # | Endpoint | Status | +| --- | --------------------------- | ------ | +| 1 | `/api/v1/workflows/summary` | 200 OK | +| 2 | `/api/v1/components` | 200 OK | +| 3 | `/api/v1/secrets` | 200 OK | + +- **3 API calls**, 0 secrets returned +- **Query:** `["secrets","local-dev"]` — success, 300s stale time (appropriate) +- **DOM Size:** **222 elements** — lightest page + +### 9.2 API Keys Page (`/api-keys`) + +| # | Endpoint | Status | +| --- | --------------------------- | ------ | +| 1 | `/api/v1/workflows/summary` | 200 OK | +| 2 | `/api/v1/components` | 200 OK | +| 3 | `/api/v1/api-keys` | 200 OK | + +- **3 API calls**, 0 API keys returned (seed data creates them but not in this org scope) +- **Query:** `["apiKeys","local-dev"]` — success, 60s stale time +- **DOM Size:** **212 elements** — lightest page in the app + +### 9.3 MCP Library Page (`/mcp-library`) + +| # | Endpoint | Status | +| --- | ---------------------------------------- | ------ | +| 1 | `/api/v1/workflows/summary` | 200 OK | +| 2 | `/api/v1/components` | 200 OK | +| 3 | `/api/v1/mcp-groups/templates` | 200 OK | +| 4 | `/api/v1/mcp-servers` | 200 OK | +| 5 | `/api/v1/mcp-servers/tools` | 200 OK | +| 6 | `/api/v1/mcp-groups?includeServers=true` | 200 OK | + +- **6 API calls** — most of any page (tied with Execute tab) +- `mcp-servers/tools` returns **268 items** — the largest item count from any single endpoint + +**TanStack Query Cache:** + +| Query Key | Status | Stale Time | Is Stale | Data | +| ----------------------------------------- | ------- | ---------- | -------- | --------- | +| `["mcpGroupTemplates","local-dev"]` | success | 300s | false | 1 item | +| `["mcpServers","local-dev"]` | success | 120s | false | 30 items | +| `["mcpServers","local-dev","tools"]` | success | 120s | false | 268 items | +| `["mcpGroups","local-dev","withServers"]` | success | 30s | false | 10 items | + +- **DOM Size:** **521 elements** + +### 9.4 Analytics Settings Page (`/analytics-settings`) + +| # | Endpoint | Status | +| --- | --------------------------- | ------ | +| 1 | `/api/v1/workflows/summary` | 200 OK | +| 2 | `/api/v1/components` | 200 OK | + +- **2 API calls** — no page-specific API calls +- Only `components` and `workflowsSummary` (both prefetched) +- **No page-specific TanStack queries** +- **DOM Size:** **249 elements** + +--- + +## 10. Cross-Page Analysis + +### 10.1 DOM Element Count Comparison + +| Page | DOM Elements | Rating | +| ------------------------------------------ | ------------ | ------------ | +| **Workflow Execute Tab** | **2,091** | **Heaviest** | +| Workflow Detail Design | 1,667 | Heavy | +| Workflow List (`/`) | 1,529 | Heavy | +| Artifact Library (`/artifacts`) | 1,520 | Heavy | +| Schedules (`/schedules`) | 1,452 | Heavy | +| Webhooks (`/webhooks`) | 1,178 | Moderate | +| Action Center (`/action-center`) | 1,023 | Moderate | +| MCP Library (`/mcp-library`) | 521 | Light | +| Analytics Settings (`/analytics-settings`) | 249 | Minimal | +| Secrets (`/secrets`) | 222 | Minimal | +| API Keys (`/api-keys`) | 212 | Minimal | + +### 10.2 API Call Count per Page + +| Page | API Calls | Notes | +| ----------------------- | --------- | ------------------------------ | +| Workflow Detail Execute | 10 | Includes 3 pairs of duplicates | +| MCP Library | 6 | Most for a non-detail page | +| Workflow Detail Design | 6 | 4 new + 2 cached | +| Schedules | 3 | | +| Webhooks | 3 | | +| Action Center | 3 | | +| Artifact Library | 3 | | +| Secrets | 3 | | +| API Keys | 3 | | +| Workflow List | 2 | | +| Analytics Settings | 2 | No page-specific queries | + +### 10.3 Shared Queries Across All Pages + +Every page loads `["components","local-dev"]` (600s stale) and `["workflowsSummary","local-dev"]` (60s stale) via `usePrefetchOnIdle`. This is consistent and efficient — the prefetch works correctly across all navigation paths. + +### 10.4 Pages Without Pagination + +| Page | Items Loaded | Pagination? | +| ---------------- | --------------- | ----------- | +| Workflow List | 50 workflows | **No** | +| Schedules | 30 schedules | **No** | +| Webhooks | 25 webhooks | **No** | +| Action Center | 28 human inputs | **No** | +| Artifact Library | 50 artifacts | **No** | +| MCP Servers | 30 servers | **No** | +| MCP Tools | 268 tools | **No** | + +> None of the list pages implement server-side pagination. Currently manageable at medium data volumes, but will become problematic at scale. + +--- + +## 11. Summary of Findings + +### What's Working Well + +1. **TanStack Query caching** is effective — `components` (10 min stale) and `workflowsSummary` (60s stale) are properly cached and reused across SPA navigations +2. **Prefetch on idle** works — `usePrefetchOnIdle` correctly pre-fetches workflow summary and components on every page +3. **Auth gating** on prefetch appears functional +4. **Run selector pagination** — uses `limit=5`, efficient for workflow detail +5. **Gzip compression** active on all API responses via nginx +6. **CLS = 0.00** — no layout shifts, excellent UX +7. **Mobile responsive layout** — sidebar collapses, table columns adapt appropriately +8. **Stale time tuning** — most queries have appropriate stale times post-migration +9. **No unnecessary refetches** on SPA navigation — cache reuse is working + +### Issues Found + +| # | Severity | Finding | Details | +| --- | ---------- | --------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| 1 | **MEDIUM** | Duplicate API calls on Execute tab | `events`, `dataflows`, `status` each called 2x on tab switch. TanStack deduplication not working for these. Total: 10 calls instead of expected 7. | +| 2 | **MEDIUM** | Ghost queries persist across navigations | `["runs","__disabled__"]` and `["workflow","__disabled__"]` with status=pending, fetchStatus=idle. Found on Design tab and Schedules page. These use `__disabled__` sentinel (improved from `__global__`) but still pollute the cache. | +| 3 | **MEDIUM** | No pagination on ANY list page | Workflows (50), schedules (30), webhooks (25), artifacts (50), human inputs (28), MCP servers (30), MCP tools (268) — all loaded without pagination. Won't scale beyond ~200 items. | +| 4 | **LOW** | LCP 4.9s in dev mode | Dominated by Vite module chain (4,577ms critical path). Production build expected to be significantly faster. Not a code issue. | +| 5 | **LOW** | `prefetch-routes.ts` eagerly loads ALL page modules | Adds to initial load time. All page components (Schedules, MCP, Artifacts, Webhooks, etc.) are loaded on first navigation regardless of which page the user visits. | +| 6 | **LOW** | No virtual scrolling for heavy pages | Execute tab has 2,091 DOM elements, Workflow List 1,529, Artifacts 1,520. Could cause scroll performance issues at scale, especially on mobile. | +| 7 | **INFO** | MCP tools payload: 268 items | Largest item count from any endpoint. Well-cached at 120s stale time. Compressed well via gzip. | +| 8 | **INFO** | Analytics Settings has no page-specific queries | Only prefetched queries load — settings appear to be client-side only. | + +### Stale Time Configuration Summary (Current) + +| Query | Stale Time | Assessment | +| ------------------- | ------------- | ------------------------------------------ | +| `components` | 600s (10 min) | Appropriate — rarely changes | +| `workflowsSummary` | 60s (1 min) | Good for list page | +| `workflow` (detail) | 60s | Good | +| `secrets` | 300s (5 min) | Appropriate | +| `mcpGroupTemplates` | 300s (5 min) | Appropriate | +| `schedules` | 60s | Good | +| `mcpServers` | 120s (2 min) | Good | +| `mcpServers/tools` | 120s (2 min) | Good | +| `mcpGroups` | 30s | Acceptable | +| `runs` | 30s | Good for potentially live data | +| `webhooks` | 60s | Good (improved from 15s in previous audit) | +| `artifactLibrary` | 30s | Acceptable | +| `humanInputs` | 30s | Good | +| `apiKeys` | 60s | Good | diff --git a/frontend/docs/load-testing-plan.md b/frontend/docs/load-testing-plan.md new file mode 100644 index 000000000..78fd52bac --- /dev/null +++ b/frontend/docs/load-testing-plan.md @@ -0,0 +1,127 @@ +# Frontend Load Testing Plan + +A reusable guide for auditing frontend performance, TanStack Query behavior, and responsiveness. + +## Prerequisites + +1. **Seed data:** `bun backend/scripts/seed-stress-test.ts --tier medium` +2. **Dev server running:** `just dev` (Vite on localhost) +3. **Browser:** Chrome with DevTools open (Network tab, Performance tab) +4. **Viewport:** Desktop 1280x720 default, then mobile/tablet for responsive checks +5. **All testing must be done via `localhost` (nginx reverse proxy), NOT via the direct Vite dev server port.** Nginx provides gzip compression and more closely matches production behavior. + +## Step 0: Discover All Routes + +Before auditing, discover the current set of routes from the source of truth: + +**Route definitions:** `frontend/src/App.tsx` — all `` entries + +1. Read `frontend/src/App.tsx` and extract every `` entry +2. Compare against the **Known Pages** table below +3. If there are new routes not in the table, **add them to the audit** as fresh navigation tests +4. If routes have been removed, skip them and note the removal in the report +5. For parameterized routes (e.g., `/workflows/:id`), use a real entity from seeded data + +This ensures the audit always covers the full application, even as new pages are added. + +## Per-Page Audit Sequence + +For each page below, perform these steps in order: + +1. **Navigate** to the page URL (fresh navigation, not SPA transition, for cold-load pages) +2. **Wait** for all data to load (spinners gone, tables populated) +3. **Screenshot** the page +4. **Network tab** — filter fetch/XHR — record all API calls, durations, transfer/decoded sizes +5. **TanStack Query cache** — extract via JS console (see snippet below) — record query keys, status, fetchStatus, staleTime, isStale, data size +6. **DOM element count** — `document.querySelectorAll('*').length` +7. Note any anomalies: ghost queries, duplicates, stale-on-arrival, missing pagination + +## Known Pages (baseline — update as routes change) + +| # | Page | URL | Special Actions | +| --- | ------------------------- | ---------------------------- | ------------------------------------------------------------------------------------------------ | +| 1 | Workflow List | `/` | Run Chrome performance trace (reload + auto-stop). Check LCP, CLS, TTFB. Identify critical path. | +| 2 | Workflow Detail (Design) | `/workflows/:id` | Click a workflow from list page (SPA navigation). Check cache reuse for components/summary. | +| 3 | Workflow Detail (Execute) | `/workflows/:id/runs/:runId` | Switch to Execute tab. Watch for duplicate API calls. Check SSE stream connection. | +| 4 | Mobile (375px) | `/` | Resize viewport to 375x667. Check sidebar collapse, column hiding, text wrapping. | +| 5 | Tablet (768px) | `/` | Resize viewport to 768x1024. Check layout adaptation. | +| 6 | Schedules | `/schedules` | Fresh navigation. | +| 7 | Webhooks | `/webhooks` | Fresh navigation. | +| 8 | Action Center | `/action-center` | Fresh navigation. Check pending human input count. | +| 9 | Artifact Library | `/artifacts` | Fresh navigation. | +| 10 | Secrets | `/secrets` | Fresh navigation. | +| 11 | API Keys | `/api-keys` | Fresh navigation. | +| 12 | MCP Library | `/mcp-library` | Fresh navigation. Check tool count and payload size. | +| 13 | Analytics Settings | `/analytics-settings` | Fresh navigation. Verify if any page-specific queries exist. | + +## TanStack Query Cache Extraction Snippet + +Run in Chrome DevTools console on any page: + +```js +(() => { + const root = document.querySelector('#root'); + const fiberKey = Object.keys(root).find((k) => k.startsWith('__reactContainer')); + let fiber = root[fiberKey], + found = null; + const visited = new Set(), + queue = [fiber]; + let i = 0; + while (queue.length && i < 500) { + i++; + const f = queue.shift(); + if (!f || visited.has(f)) continue; + visited.add(f); + let s = f.memoizedState, + si = 0; + while (s && si < 20) { + si++; + if (s.memoizedState?.getQueryCache) { + found = s.memoizedState; + break; + } + s = s.next; + } + if (found) break; + if (f.pendingProps?.client?.getQueryCache) { + found = f.pendingProps.client; + break; + } + if (f.child) queue.push(f.child); + if (f.sibling) queue.push(f.sibling); + } + if (!found) return 'QueryClient not found'; + return found + .getQueryCache() + .getAll() + .map((q) => ({ + key: JSON.stringify(q.queryKey), + status: q.state.status, + fetchStatus: q.state.fetchStatus, + isStale: q.isStale(), + size: Array.isArray(q.state.data) ? q.state.data.length + ' items' : typeof q.state.data, + staleTime: q.options?.staleTime, + })); +})(); +``` + +## What to Look For + +- **Ghost/PENDING queries:** status=pending + fetchStatus=idle (created but never fetched) +- **Duplicate API calls:** same endpoint called 2+ times on single navigation +- **Cache misses:** prefetched queries (components, workflowsSummary) not served from cache +- **Aggressive stale times:** queries going stale immediately after fetch +- **Missing pagination:** list endpoints returning all items without limit/offset +- **Large payloads:** decoded size > 50KB for a single endpoint +- **Query key inconsistencies:** missing org-id scoping vs other queries + +## Comparing with Previous Reports + +After completing an audit, save the report to `frontend/docs/audits/` with a datetime stamp in the filename (e.g., `load-audit-2026-02-18T14-30.md`). This allows multiple audits per day (e.g., before and after a fix). Compare key metrics across reports: + +- LCP / CLS / TTFB +- DOM element counts per page +- API call counts and duplicates +- Ghost query count +- Stale time configuration changes +- New pages or removed pages