From 07bd5111e6802f1fa475aa5eec5697f2267756cb Mon Sep 17 00:00:00 2001 From: Ditadi Date: Fri, 23 Jan 2026 13:12:50 +0100 Subject: [PATCH 01/13] chore: taskflow package --- packages/taskflow/package.json | 30 ++++ packages/taskflow/src/index.test.ts | 8 + packages/taskflow/src/index.ts | 3 + packages/taskflow/tsconfig.json | 12 ++ packages/taskflow/tsdown.config.ts | 19 +++ pnpm-lock.yaml | 224 ++++++++++++++++++---------- tsconfig.json | 3 +- vitest.config.ts | 10 +- 8 files changed, 231 insertions(+), 78 deletions(-) create mode 100644 packages/taskflow/package.json create mode 100644 packages/taskflow/src/index.test.ts create mode 100644 packages/taskflow/src/index.ts create mode 100644 packages/taskflow/tsconfig.json create mode 100644 packages/taskflow/tsdown.config.ts diff --git a/packages/taskflow/package.json b/packages/taskflow/package.json new file mode 100644 index 00000000..b71e7f8f --- /dev/null +++ b/packages/taskflow/package.json @@ -0,0 +1,30 @@ +{ + "name": "@databricks/taskflow", + "type": "module", + "version": "0.1.0", + "description": "", + "main": "dist/index.js", + "types": "dist/index.d.ts", + "packageManager": "pnpm@10.21.0", + "keywords": [], + "author": "", + "license": "ISC", + "exports": { + ".": { + "development": "./src/index.ts", + "default": "./dist/index.js" + }, + "./package.json": "./package.json" + }, + "scripts": { + "build:package": "tsdown --config tsdown.config.ts", + "build:watch": "tsdown --config tsdown.config.ts --watch", + "typecheck": "tsc --noEmit", + "clean": "rm -rf dist", + "clean:full": "rm -rf dist node_modules", + "test": "cd ../.. && vitest run --project=taskflow" + }, + "devDependencies": { + "vitest": "^3.2.4" + } +} diff --git a/packages/taskflow/src/index.test.ts b/packages/taskflow/src/index.test.ts new file mode 100644 index 00000000..dd3acf8f --- /dev/null +++ b/packages/taskflow/src/index.test.ts @@ -0,0 +1,8 @@ +import { describe, expect, it } from "vitest"; +import { hello } from "./index"; + +describe("hello", () => { + it("should return a greeting", () => { + expect(hello("World")).toBe("Hello, World!"); + }); +}); diff --git a/packages/taskflow/src/index.ts b/packages/taskflow/src/index.ts new file mode 100644 index 00000000..bbc74285 --- /dev/null +++ b/packages/taskflow/src/index.ts @@ -0,0 +1,3 @@ +export function hello(name: string): string { + return `Hello, ${name}!`; +} diff --git a/packages/taskflow/tsconfig.json b/packages/taskflow/tsconfig.json new file mode 100644 index 00000000..4a6e68b3 --- /dev/null +++ b/packages/taskflow/tsconfig.json @@ -0,0 +1,12 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "outDir": "dist", + "baseUrl": ".", + "paths": { + "@/*": ["src/*"] + } + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +} diff --git a/packages/taskflow/tsdown.config.ts b/packages/taskflow/tsdown.config.ts new file mode 100644 index 00000000..78490e93 --- /dev/null +++ b/packages/taskflow/tsdown.config.ts @@ -0,0 +1,19 @@ +import { defineConfig } from "tsdown"; + +export default defineConfig({ + name: "@databricks/taskflow", + entry: "src/index.ts", + outDir: "dist", + format: "esm", + platform: "neutral", + minify: false, + dts: true, + sourcemap: false, + clean: false, + unbundle: true, + skipNodeModulesBundle: true, + tsconfig: "./tsconfig.json", + exports: { + devExports: "development", + }, +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index d63a0a85..14988671 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -507,6 +507,12 @@ importers: specifier: ^11.2.0 version: 11.2.0 + packages/taskflow: + devDependencies: + vitest: + specifier: ^3.2.4 + version: 3.2.4(@types/debug@4.1.12)(@types/node@24.10.1)(jiti@2.6.1)(jsdom@27.0.0(bufferutil@4.0.9)(postcss@8.5.6))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.20.6)(yaml@2.8.1) + packages: '@ai-sdk/gateway@2.0.21': @@ -3000,8 +3006,8 @@ packages: resolution: {integrity: sha512-Z7x2dZOmznihvdvCvLKMl+nswtOSVxS2H2ocar+U9xx6iMfTp0VGIrX6a4xB1v80IwOPC7dT1LXIJrY70Xu3Jw==} engines: {node: ^20.19.0 || >=22.12.0} - '@oxc-project/types@0.108.0': - resolution: {integrity: sha512-7lf13b2IA/kZO6xgnIZA88sq3vwrxWk+2vxf6cc+omwYCRTiA5e63Beqf3fz/v8jEviChWWmFYBwzfSeyrsj7Q==} + '@oxc-project/types@0.110.0': + resolution: {integrity: sha512-6Ct21OIlrEnFEJk5LT4e63pk3btsI6/TusD/GStLi7wYlGJNOl1GI9qvXAnRAxQU9zqA2Oz+UwhfTOU2rPZVow==} '@oxc-project/types@0.93.0': resolution: {integrity: sha512-yNtwmWZIBtJsMr5TEfoZFDxIWV6OdScOpza/f5YxbqUMJk+j6QX3Cf3jgZShGEFYWQJ5j9mJ6jM0tZHu2J9Yrg==} @@ -3721,8 +3727,8 @@ packages: cpu: [arm64] os: [android] - '@rolldown/binding-android-arm64@1.0.0-beta.60': - resolution: {integrity: sha512-hOW6iQXtpG4uCW1zGK56+KhEXGttSkTp2ykncW/nkOIF/jOKTqbM944Q73HVeMXP1mPRvE2cZwNp3xeLIeyIGQ==} + '@rolldown/binding-android-arm64@1.0.0-rc.1': + resolution: {integrity: sha512-He6ZoCfv5D7dlRbrhNBkuMVIHd0GDnjJwbICE1OWpG7G3S2gmJ+eXkcNLJjzjNDpeI2aRy56ou39AJM9AD8YFA==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [android] @@ -3733,8 +3739,8 @@ packages: cpu: [arm64] os: [darwin] - '@rolldown/binding-darwin-arm64@1.0.0-beta.60': - resolution: {integrity: sha512-vyDA4HXY2mP8PPtl5UE17uGPxUNG4m1wkfa3kAkR8JWrFbarV97UmLq22IWrNhtBPa89xqerzLK8KoVmz5JqCQ==} + '@rolldown/binding-darwin-arm64@1.0.0-rc.1': + resolution: {integrity: sha512-YzJdn08kSOXnj85ghHauH2iHpOJ6eSmstdRTLyaziDcUxe9SyQJgGyx/5jDIhDvtOcNvMm2Ju7m19+S/Rm1jFg==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [darwin] @@ -3745,8 +3751,8 @@ packages: cpu: [x64] os: [darwin] - '@rolldown/binding-darwin-x64@1.0.0-beta.60': - resolution: {integrity: sha512-WnxyqxAKP2BsxouwGY/RCF5UFw/LA4QOHhJ7VEl+UCelHokiwqNHRbryLAyRy3TE1FZ5eae+vAFcaetAu/kWLw==} + '@rolldown/binding-darwin-x64@1.0.0-rc.1': + resolution: {integrity: sha512-cIvAbqM+ZVV6lBSKSBtlNqH5iCiW933t1q8j0H66B3sjbe8AxIRetVqfGgcHcJtMzBIkIALlL9fcDrElWLJQcQ==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [darwin] @@ -3757,8 +3763,8 @@ packages: cpu: [x64] os: [freebsd] - '@rolldown/binding-freebsd-x64@1.0.0-beta.60': - resolution: {integrity: sha512-JtyWJ+zXOHof5gOUYwdTWI2kL6b8q9eNwqB/oD4mfUFaC/COEB2+47JMhcq78dey9Ahmec3DZKRDZPRh9hNAMQ==} + '@rolldown/binding-freebsd-x64@1.0.0-rc.1': + resolution: {integrity: sha512-rVt+B1B/qmKwCl1XD02wKfgh3vQPXRXdB/TicV2w6g7RVAM1+cZcpigwhLarqiVCxDObFZ7UgXCxPC7tpDoRog==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [freebsd] @@ -3769,8 +3775,8 @@ packages: cpu: [arm] os: [linux] - '@rolldown/binding-linux-arm-gnueabihf@1.0.0-beta.60': - resolution: {integrity: sha512-LrMoKqpHx+kCaNSk84iSBd4yVOymLIbxJQtvFjDN2CjQraownR+IXcwYDblFcj9ivmS54T3vCboXBbm3s1zbPQ==} + '@rolldown/binding-linux-arm-gnueabihf@1.0.0-rc.1': + resolution: {integrity: sha512-69YKwJJBOFprQa1GktPgbuBOfnn+EGxu8sBJ1TjPER+zhSpYeaU4N07uqmyBiksOLGXsMegymuecLobfz03h8Q==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm] os: [linux] @@ -3781,8 +3787,8 @@ packages: cpu: [arm64] os: [linux] - '@rolldown/binding-linux-arm64-gnu@1.0.0-beta.60': - resolution: {integrity: sha512-sqI+Vdx1gmXJMsXN3Fsewm3wlt7RHvRs1uysSp//NLsCoh9ZFEUr4ZzGhWKOg6Rvf+njNu/vCsz96x7wssLejQ==} + '@rolldown/binding-linux-arm64-gnu@1.0.0-rc.1': + resolution: {integrity: sha512-9JDhHUf3WcLfnViFWm+TyorqUtnSAHaCzlSNmMOq824prVuuzDOK91K0Hl8DUcEb9M5x2O+d2/jmBMsetRIn3g==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [linux] @@ -3793,8 +3799,8 @@ packages: cpu: [arm64] os: [linux] - '@rolldown/binding-linux-arm64-musl@1.0.0-beta.60': - resolution: {integrity: sha512-8xlqGLDtTP8sBfYwneTDu8+PRm5reNEHAuI/+6WPy9y350ls0KTFd3EJCOWEXWGW0F35ko9Fn9azmurBTjqOrQ==} + '@rolldown/binding-linux-arm64-musl@1.0.0-rc.1': + resolution: {integrity: sha512-UvApLEGholmxw/HIwmUnLq3CwdydbhaHHllvWiCTNbyGom7wTwOtz5OAQbAKZYyiEOeIXZNPkM7nA4Dtng7CLw==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [linux] @@ -3805,8 +3811,8 @@ packages: cpu: [x64] os: [linux] - '@rolldown/binding-linux-x64-gnu@1.0.0-beta.60': - resolution: {integrity: sha512-iR4nhVouVZK1CiGGGyz+prF5Lw9Lmz30Rl36Hajex+dFVFiegka604zBwzTp5Tl0BZnr50ztnVJ30tGrBhDr8Q==} + '@rolldown/binding-linux-x64-gnu@1.0.0-rc.1': + resolution: {integrity: sha512-uVctNgZHiGnJx5Fij7wHLhgw4uyZBVi6mykeWKOqE7bVy9Hcxn0fM/IuqdMwk6hXlaf9fFShDTFz2+YejP+x0A==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [linux] @@ -3817,8 +3823,8 @@ packages: cpu: [x64] os: [linux] - '@rolldown/binding-linux-x64-musl@1.0.0-beta.60': - resolution: {integrity: sha512-HbfNcqNeqxFjSMf1Kpe8itr2e2lr0Bm6HltD2qXtfU91bSSikVs9EWsa1ThshQ1v2ZvxXckGjlVLtah6IoslPg==} + '@rolldown/binding-linux-x64-musl@1.0.0-rc.1': + resolution: {integrity: sha512-T6Eg0xWwcxd/MzBcuv4Z37YVbUbJxy5cMNnbIt/Yr99wFwli30O4BPlY8hKeGyn6lWNtU0QioBS46lVzDN38bg==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [linux] @@ -3829,8 +3835,8 @@ packages: cpu: [arm64] os: [openharmony] - '@rolldown/binding-openharmony-arm64@1.0.0-beta.60': - resolution: {integrity: sha512-BiiamFcgTJ+ZFOUIMO9AHXUo9WXvHVwGfSrJ+Sv0AsTd2w3VN7dJGiH3WRcxKFetljJHWvGbM4fdpY5lf6RIvw==} + '@rolldown/binding-openharmony-arm64@1.0.0-rc.1': + resolution: {integrity: sha512-PuGZVS2xNJyLADeh2F04b+Cz4NwvpglbtWACgrDOa5YDTEHKwmiTDjoD5eZ9/ptXtcpeFrMqD2H4Zn33KAh1Eg==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [openharmony] @@ -3840,8 +3846,8 @@ packages: engines: {node: '>=14.0.0'} cpu: [wasm32] - '@rolldown/binding-wasm32-wasi@1.0.0-beta.60': - resolution: {integrity: sha512-6roXGbHMdR2ucnxXuwbmQvk8tuYl3VGu0yv13KxspyKBxxBd4RS6iykzLD6mX2gMUHhfX8SVWz7n/62gfyKHow==} + '@rolldown/binding-wasm32-wasi@1.0.0-rc.1': + resolution: {integrity: sha512-2mOxY562ihHlz9lEXuaGEIDCZ1vI+zyFdtsoa3M62xsEunDXQE+DVPO4S4x5MPK9tKulG/aFcA/IH5eVN257Cw==} engines: {node: '>=14.0.0'} cpu: [wasm32] @@ -3851,8 +3857,8 @@ packages: cpu: [arm64] os: [win32] - '@rolldown/binding-win32-arm64-msvc@1.0.0-beta.60': - resolution: {integrity: sha512-JBOm8/DC/CKnHyMHoJFdvzVHxUixid4dGkiTqGflxOxO43uSJMpl77pSPXvzwZ/VXwqblU2V0/PanyCBcRLowQ==} + '@rolldown/binding-win32-arm64-msvc@1.0.0-rc.1': + resolution: {integrity: sha512-oQVOP5cfAWZwRD0Q3nGn/cA9FW3KhMMuQ0NIndALAe6obqjLhqYVYDiGGRGrxvnjJsVbpLwR14gIUYnpIcHR1g==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [win32] @@ -3869,8 +3875,8 @@ packages: cpu: [x64] os: [win32] - '@rolldown/binding-win32-x64-msvc@1.0.0-beta.60': - resolution: {integrity: sha512-MKF0B823Efp+Ot8KsbwIuGhKH58pf+2rSM6VcqyNMlNBHheOM0Gf7JmEu+toc1jgN6fqjH7Et+8hAzsLVkIGfA==} + '@rolldown/binding-win32-x64-msvc@1.0.0-rc.1': + resolution: {integrity: sha512-Ydsxxx++FNOuov3wCBPaYjZrEvKOOGq3k+BF4BPridhg2pENfitSRD2TEuQ8i33bp5VptuNdC9IzxRKU031z5A==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [win32] @@ -3884,8 +3890,8 @@ packages: '@rolldown/pluginutils@1.0.0-beta.47': resolution: {integrity: sha512-8QagwMH3kNCuzD8EWL8R2YPW5e4OrHNSAHRFDdmFqEwEaD/KcNKjVoumo+gP2vW5eKB2UPbM6vTYiGZX0ixLnw==} - '@rolldown/pluginutils@1.0.0-beta.60': - resolution: {integrity: sha512-Jz4aqXRPVtqkH1E3jRDzLO5cgN5JwW+WG0wXGE4NiJd25nougv/AHzxmKCzmVQUYnxLmTM0M4wrZp+LlC2FKLg==} + '@rolldown/pluginutils@1.0.0-rc.1': + resolution: {integrity: sha512-UTBjtTxVOhodhzFVp/ayITaTETRHPUPYZPXQe0WU0wOgxghMojXxYjOiPOauKIYNWJAWS2fd7gJgGQK8GU8vDA==} '@rollup/rollup-android-arm-eabi@4.52.4': resolution: {integrity: sha512-BTm2qKNnWIQ5auf4deoetINJm2JzvihvGb9R6K/ETwKLql/Bb3Eg2H1FBp1gUb4YGbydMA3jcmQTR73q7J+GAA==} @@ -7868,9 +7874,6 @@ packages: resolution: {integrity: sha512-wpGPwyg/xrSp4H4Db4xYSeAr6+cFQGHfspHzDUdYxswDnUW0L5Ov63UuJiSr8NMSpyaChO4u1n0MXUvVPtrN6A==} engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} - magic-string@0.30.19: - resolution: {integrity: sha512-2N21sPY9Ws53PZvsEpVtNuSW+ScYbQdp4b9qUaL+9QkHUrGFKo56Lg9Emg5s9V/qrtNBmiR01sYhUOwu3H+VOw==} - magic-string@0.30.21: resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==} @@ -9641,8 +9644,8 @@ packages: engines: {node: ^20.19.0 || >=22.12.0} hasBin: true - rolldown@1.0.0-beta.60: - resolution: {integrity: sha512-YYgpv7MiTp9LdLj1fzGzCtij8Yi2OKEc3HQtfbIxW4yuSgpQz9518I69U72T5ErPA/ATOXqlcisiLrWy+5V9YA==} + rolldown@1.0.0-rc.1: + resolution: {integrity: sha512-M3AeZjYE6UclblEf531Hch0WfVC/NOL43Cc+WdF3J50kk5/fvouHhDumSGTh0oRjbZ8C4faaVr5r6Nx1xMqDGg==} engines: {node: ^20.19.0 || >=22.12.0} hasBin: true @@ -10826,6 +10829,7 @@ packages: whatwg-encoding@3.1.1: resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==} engines: {node: '>=18'} + deprecated: Use @exodus/bytes instead for a more spec-conformant and faster implementation whatwg-mimetype@4.0.0: resolution: {integrity: sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==} @@ -14578,7 +14582,7 @@ snapshots: '@oxc-project/runtime@0.92.0': {} - '@oxc-project/types@0.108.0': {} + '@oxc-project/types@0.110.0': {} '@oxc-project/types@0.93.0': {} @@ -15322,61 +15326,61 @@ snapshots: '@rolldown/binding-android-arm64@1.0.0-beta.41': optional: true - '@rolldown/binding-android-arm64@1.0.0-beta.60': + '@rolldown/binding-android-arm64@1.0.0-rc.1': optional: true '@rolldown/binding-darwin-arm64@1.0.0-beta.41': optional: true - '@rolldown/binding-darwin-arm64@1.0.0-beta.60': + '@rolldown/binding-darwin-arm64@1.0.0-rc.1': optional: true '@rolldown/binding-darwin-x64@1.0.0-beta.41': optional: true - '@rolldown/binding-darwin-x64@1.0.0-beta.60': + '@rolldown/binding-darwin-x64@1.0.0-rc.1': optional: true '@rolldown/binding-freebsd-x64@1.0.0-beta.41': optional: true - '@rolldown/binding-freebsd-x64@1.0.0-beta.60': + '@rolldown/binding-freebsd-x64@1.0.0-rc.1': optional: true '@rolldown/binding-linux-arm-gnueabihf@1.0.0-beta.41': optional: true - '@rolldown/binding-linux-arm-gnueabihf@1.0.0-beta.60': + '@rolldown/binding-linux-arm-gnueabihf@1.0.0-rc.1': optional: true '@rolldown/binding-linux-arm64-gnu@1.0.0-beta.41': optional: true - '@rolldown/binding-linux-arm64-gnu@1.0.0-beta.60': + '@rolldown/binding-linux-arm64-gnu@1.0.0-rc.1': optional: true '@rolldown/binding-linux-arm64-musl@1.0.0-beta.41': optional: true - '@rolldown/binding-linux-arm64-musl@1.0.0-beta.60': + '@rolldown/binding-linux-arm64-musl@1.0.0-rc.1': optional: true '@rolldown/binding-linux-x64-gnu@1.0.0-beta.41': optional: true - '@rolldown/binding-linux-x64-gnu@1.0.0-beta.60': + '@rolldown/binding-linux-x64-gnu@1.0.0-rc.1': optional: true '@rolldown/binding-linux-x64-musl@1.0.0-beta.41': optional: true - '@rolldown/binding-linux-x64-musl@1.0.0-beta.60': + '@rolldown/binding-linux-x64-musl@1.0.0-rc.1': optional: true '@rolldown/binding-openharmony-arm64@1.0.0-beta.41': optional: true - '@rolldown/binding-openharmony-arm64@1.0.0-beta.60': + '@rolldown/binding-openharmony-arm64@1.0.0-rc.1': optional: true '@rolldown/binding-wasm32-wasi@1.0.0-beta.41': @@ -15384,7 +15388,7 @@ snapshots: '@napi-rs/wasm-runtime': 1.0.7 optional: true - '@rolldown/binding-wasm32-wasi@1.0.0-beta.60': + '@rolldown/binding-wasm32-wasi@1.0.0-rc.1': dependencies: '@napi-rs/wasm-runtime': 1.1.1 optional: true @@ -15392,7 +15396,7 @@ snapshots: '@rolldown/binding-win32-arm64-msvc@1.0.0-beta.41': optional: true - '@rolldown/binding-win32-arm64-msvc@1.0.0-beta.60': + '@rolldown/binding-win32-arm64-msvc@1.0.0-rc.1': optional: true '@rolldown/binding-win32-ia32-msvc@1.0.0-beta.41': @@ -15401,7 +15405,7 @@ snapshots: '@rolldown/binding-win32-x64-msvc@1.0.0-beta.41': optional: true - '@rolldown/binding-win32-x64-msvc@1.0.0-beta.60': + '@rolldown/binding-win32-x64-msvc@1.0.0-rc.1': optional: true '@rolldown/pluginutils@1.0.0-beta.38': {} @@ -15410,7 +15414,7 @@ snapshots: '@rolldown/pluginutils@1.0.0-beta.47': {} - '@rolldown/pluginutils@1.0.0-beta.60': {} + '@rolldown/pluginutils@1.0.0-rc.1': {} '@rollup/rollup-android-arm-eabi@4.52.4': optional: true @@ -16371,6 +16375,14 @@ snapshots: chai: 5.3.3 tinyrainbow: 2.0.0 + '@vitest/mocker@3.2.4(vite@7.2.4(@types/node@24.10.1)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.20.6)(yaml@2.8.1))': + dependencies: + '@vitest/spy': 3.2.4 + estree-walker: 3.0.3 + magic-string: 0.30.21 + optionalDependencies: + vite: 7.2.4(@types/node@24.10.1)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.20.6)(yaml@2.8.1) + '@vitest/mocker@3.2.4(vite@7.2.4(@types/node@24.7.2)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.20.6)(yaml@2.8.1))': dependencies: '@vitest/spy': 3.2.4 @@ -19839,10 +19851,6 @@ snapshots: macos-release@3.4.0: {} - magic-string@0.30.19: - dependencies: - '@jridgewell/sourcemap-codec': 1.5.5 - magic-string@0.30.21: dependencies: '@jridgewell/sourcemap-codec': 1.5.5 @@ -22033,7 +22041,7 @@ snapshots: robust-predicates@3.0.2: {} - rolldown-plugin-dts@0.16.11(rolldown@1.0.0-beta.60)(typescript@5.9.3): + rolldown-plugin-dts@0.16.11(rolldown@1.0.0-rc.1)(typescript@5.9.3): dependencies: '@babel/generator': 7.28.3 '@babel/parser': 7.28.5 @@ -22043,8 +22051,8 @@ snapshots: debug: 4.4.3 dts-resolver: 2.1.2 get-tsconfig: 4.12.0 - magic-string: 0.30.19 - rolldown: 1.0.0-beta.60 + magic-string: 0.30.21 + rolldown: 1.0.0-rc.1 optionalDependencies: typescript: 5.9.3 transitivePeerDependencies: @@ -22108,24 +22116,24 @@ snapshots: '@rolldown/binding-win32-ia32-msvc': 1.0.0-beta.41 '@rolldown/binding-win32-x64-msvc': 1.0.0-beta.41 - rolldown@1.0.0-beta.60: + rolldown@1.0.0-rc.1: dependencies: - '@oxc-project/types': 0.108.0 - '@rolldown/pluginutils': 1.0.0-beta.60 + '@oxc-project/types': 0.110.0 + '@rolldown/pluginutils': 1.0.0-rc.1 optionalDependencies: - '@rolldown/binding-android-arm64': 1.0.0-beta.60 - '@rolldown/binding-darwin-arm64': 1.0.0-beta.60 - '@rolldown/binding-darwin-x64': 1.0.0-beta.60 - '@rolldown/binding-freebsd-x64': 1.0.0-beta.60 - '@rolldown/binding-linux-arm-gnueabihf': 1.0.0-beta.60 - '@rolldown/binding-linux-arm64-gnu': 1.0.0-beta.60 - '@rolldown/binding-linux-arm64-musl': 1.0.0-beta.60 - '@rolldown/binding-linux-x64-gnu': 1.0.0-beta.60 - '@rolldown/binding-linux-x64-musl': 1.0.0-beta.60 - '@rolldown/binding-openharmony-arm64': 1.0.0-beta.60 - '@rolldown/binding-wasm32-wasi': 1.0.0-beta.60 - '@rolldown/binding-win32-arm64-msvc': 1.0.0-beta.60 - '@rolldown/binding-win32-x64-msvc': 1.0.0-beta.60 + '@rolldown/binding-android-arm64': 1.0.0-rc.1 + '@rolldown/binding-darwin-arm64': 1.0.0-rc.1 + '@rolldown/binding-darwin-x64': 1.0.0-rc.1 + '@rolldown/binding-freebsd-x64': 1.0.0-rc.1 + '@rolldown/binding-linux-arm-gnueabihf': 1.0.0-rc.1 + '@rolldown/binding-linux-arm64-gnu': 1.0.0-rc.1 + '@rolldown/binding-linux-arm64-musl': 1.0.0-rc.1 + '@rolldown/binding-linux-x64-gnu': 1.0.0-rc.1 + '@rolldown/binding-linux-x64-musl': 1.0.0-rc.1 + '@rolldown/binding-openharmony-arm64': 1.0.0-rc.1 + '@rolldown/binding-wasm32-wasi': 1.0.0-rc.1 + '@rolldown/binding-win32-arm64-msvc': 1.0.0-rc.1 + '@rolldown/binding-win32-x64-msvc': 1.0.0-rc.1 rollup@4.52.4: dependencies: @@ -22773,8 +22781,8 @@ snapshots: diff: 8.0.2 empathic: 2.0.0 hookable: 5.5.3 - rolldown: 1.0.0-beta.60 - rolldown-plugin-dts: 0.16.11(rolldown@1.0.0-beta.60)(typescript@5.9.3) + rolldown: 1.0.0-rc.1 + rolldown-plugin-dts: 0.16.11(rolldown@1.0.0-rc.1)(typescript@5.9.3) semver: 7.7.3 tinyexec: 1.0.1 tinyglobby: 0.2.15 @@ -23145,6 +23153,27 @@ snapshots: d3-time: 3.1.0 d3-timer: 3.0.1 + vite-node@3.2.4(@types/node@24.10.1)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.20.6)(yaml@2.8.1): + dependencies: + cac: 6.7.14 + debug: 4.4.3 + es-module-lexer: 1.7.0 + pathe: 2.0.3 + vite: 7.2.4(@types/node@24.10.1)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.20.6)(yaml@2.8.1) + transitivePeerDependencies: + - '@types/node' + - jiti + - less + - lightningcss + - sass + - sass-embedded + - stylus + - sugarss + - supports-color + - terser + - tsx + - yaml + vite-node@3.2.4(@types/node@24.7.2)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.20.6)(yaml@2.8.1): dependencies: cac: 6.7.14 @@ -23211,6 +23240,49 @@ snapshots: tsx: 4.20.6 yaml: 2.8.1 + vitest@3.2.4(@types/debug@4.1.12)(@types/node@24.10.1)(jiti@2.6.1)(jsdom@27.0.0(bufferutil@4.0.9)(postcss@8.5.6))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.20.6)(yaml@2.8.1): + dependencies: + '@types/chai': 5.2.2 + '@vitest/expect': 3.2.4 + '@vitest/mocker': 3.2.4(vite@7.2.4(@types/node@24.10.1)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.20.6)(yaml@2.8.1)) + '@vitest/pretty-format': 3.2.4 + '@vitest/runner': 3.2.4 + '@vitest/snapshot': 3.2.4 + '@vitest/spy': 3.2.4 + '@vitest/utils': 3.2.4 + chai: 5.3.3 + debug: 4.4.3 + expect-type: 1.2.2 + magic-string: 0.30.21 + pathe: 2.0.3 + picomatch: 4.0.3 + std-env: 3.10.0 + tinybench: 2.9.0 + tinyexec: 0.3.2 + tinyglobby: 0.2.15 + tinypool: 1.1.1 + tinyrainbow: 2.0.0 + vite: 7.2.4(@types/node@24.10.1)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.20.6)(yaml@2.8.1) + vite-node: 3.2.4(@types/node@24.10.1)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.20.6)(yaml@2.8.1) + why-is-node-running: 2.3.0 + optionalDependencies: + '@types/debug': 4.1.12 + '@types/node': 24.10.1 + jsdom: 27.0.0(bufferutil@4.0.9)(postcss@8.5.6) + transitivePeerDependencies: + - jiti + - less + - lightningcss + - msw + - sass + - sass-embedded + - stylus + - sugarss + - supports-color + - terser + - tsx + - yaml + vitest@3.2.4(@types/debug@4.1.12)(@types/node@24.7.2)(jiti@2.6.1)(jsdom@27.0.0(bufferutil@4.0.9)(postcss@8.5.6))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.20.6)(yaml@2.8.1): dependencies: '@types/chai': 5.2.2 @@ -23224,7 +23296,7 @@ snapshots: chai: 5.3.3 debug: 4.4.3 expect-type: 1.2.2 - magic-string: 0.30.19 + magic-string: 0.30.21 pathe: 2.0.3 picomatch: 4.0.3 std-env: 3.10.0 diff --git a/tsconfig.json b/tsconfig.json index 275b2ce2..8a041820 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -16,7 +16,8 @@ "@databricks/appkit": ["packages/appkit/*"], "@databricks/appkit-ui": ["packages/appkit-ui/*"], "shared": ["packages/shared/src"], - "@tools/*": ["tools/*"] + "@tools/*": ["tools/*"], + "taskflow": ["packages/taskflow/src"] } }, "exclude": ["node_modules", "dist"] diff --git a/vitest.config.ts b/vitest.config.ts index 2e612f1b..535671a8 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -1,5 +1,5 @@ -import react from "@vitejs/plugin-react"; import path from "node:path"; +import react from "@vitejs/plugin-react"; import tsconfigPaths from "vite-tsconfig-paths"; import { defineConfig } from "vitest/config"; @@ -49,6 +49,14 @@ export default defineConfig({ environment: "node", }, }, + { + plugins: [tsconfigPaths()], + test: { + name: "taskflow", + root: "./packages/taskflow", + environment: "node", + }, + }, ], }, }); From 8b3850837651b6703ae1b2778155157d3b914efc Mon Sep 17 00:00:00 2001 From: Ditadi Date: Fri, 23 Jan 2026 16:03:54 +0100 Subject: [PATCH 02/13] feat(taskflow): add core types and error hierarchy --- packages/taskflow/src/core/errors.ts | 394 ++++++++++++++++++ packages/taskflow/src/core/types.ts | 55 +++ .../taskflow/src/tests/core/errors.test.ts | 358 ++++++++++++++++ .../taskflow/src/tests/core/types.test.ts | 82 ++++ 4 files changed, 889 insertions(+) create mode 100644 packages/taskflow/src/core/errors.ts create mode 100644 packages/taskflow/src/core/types.ts create mode 100644 packages/taskflow/src/tests/core/errors.test.ts create mode 100644 packages/taskflow/src/tests/core/types.test.ts diff --git a/packages/taskflow/src/core/errors.ts b/packages/taskflow/src/core/errors.ts new file mode 100644 index 00000000..58f5846e --- /dev/null +++ b/packages/taskflow/src/core/errors.ts @@ -0,0 +1,394 @@ +import type { TaskStatus } from "./types"; + +/** + * Error context that can be attached to any TaskSystemError + */ +export interface ErrorContext { + taskId?: string; + userId?: string; + templateName?: string; + idempotencyKey?: string; + [key: string]: unknown; // allow any additional context +} + +/** + * Structured error codes for consistent error handling + */ +export const ErrorCodes = { + // validation errors + VALIDATION_FAILED: "VALIDATION_FAILED", + CONFIG_VALIDATION_FAILED: "CONFIG_VALIDATION_FAILED", + + // resource errors + TASK_NOT_FOUND: "TASK_NOT_FOUND", + TEMPLATE_NOT_FOUND: "TEMPLATE_NOT_FOUND", + HANDLER_NOT_FOUND: "HANDLER_NOT_FOUND", + + // state errors + INVALID_STATE_TRANSITION: "INVALID_STATE_TRANSITION", + CONFLICT: "CONFLICT", + + // rate limiting / capacity errors + RATE_LIMIT_EXCEEDED: "RATE_LIMIT_EXCEEDED", + SLOT_TIMEOUT: "SLOT_TIMEOUT", + BACKPRESSURE: "BACKPRESSURE", + + // execution errors + RETRY_EXHAUSTED: "RETRY_EXHAUSTED", + STREAM_OVERFLOW: "STREAM_OVERFLOW", + + // system errors + INITIALIZATION_FAILED: "INITIALIZATION_FAILED", +} as const; + +export type ErrorCode = (typeof ErrorCodes)[keyof typeof ErrorCodes]; + +/** + * Base error class for all task system errors + * Provides structured error handling with context, timestamps, and cause chains. + */ +export class TaskSystemError extends Error { + readonly code: ErrorCode; + readonly context?: ErrorContext; + readonly timestamp: number; + readonly cause?: Error; + + constructor( + message: string, + code: ErrorCode = ErrorCodes.VALIDATION_FAILED, + context?: ErrorContext, + cause?: Error, + ) { + super(message); + this.name = "TaskSystemError"; + this.code = code; + this.context = context; + this.timestamp = Date.now(); + this.cause = cause; + + if (Error.captureStackTrace) { + Error.captureStackTrace(this, this.constructor); + } + } + + /** + * Serializes the error to a JSON-compatible object + */ + toJSON(): Record { + return { + name: this.name, + message: this.message, + code: this.code, + context: this.context, + timestamp: this.timestamp, + cause: this.cause + ? { + name: this.cause.name, + message: this.cause.message, + } + : undefined, + stack: this.stack, + }; + } +} + +/** + * Validation error for invalid task input or configuration + */ +export class ValidationError extends TaskSystemError { + readonly field?: string; + + constructor(message: string, field?: string, context?: ErrorContext) { + super(message, ErrorCodes.VALIDATION_FAILED, context); + this.name = "ValidationError"; + this.field = field; + } +} + +/** + * Configuration validation error for invalid system configuration + */ +export class ConfigValidationError extends TaskSystemError { + readonly configPath?: string; + + constructor(message: string, configPath?: string, context?: ErrorContext) { + super(message, ErrorCodes.CONFIG_VALIDATION_FAILED, { + ...context, + configPath, + }); + this.name = "ConfigValidationError"; + this.configPath = configPath; + } +} + +/** + * Resource not found error + */ +export class NotFoundError extends TaskSystemError { + readonly resourceType: "task" | "template" | "handler"; + + constructor( + message: string, + resourceType: "task" | "template" | "handler", + context?: ErrorContext, + ) { + const code = + resourceType === "task" + ? ErrorCodes.TASK_NOT_FOUND + : resourceType === "template" + ? ErrorCodes.TEMPLATE_NOT_FOUND + : ErrorCodes.HANDLER_NOT_FOUND; + super(message, code, { ...context, resourceType }); + this.name = "NotFoundError"; + this.resourceType = resourceType; + } +} + +/** + * Conflict error for duplicate operations + */ +export class ConflictError extends TaskSystemError { + constructor(message: string, context?: ErrorContext) { + super(message, ErrorCodes.CONFLICT, context); + this.name = "ConflictError"; + } +} + +/** + * Invalid state transition error + */ +export class TaskStateError extends TaskSystemError { + readonly currentState?: TaskStatus; + readonly attemptedTransition?: TaskStatus; + readonly validTransitions?: readonly TaskStatus[]; + + constructor( + message: string, + currentState?: TaskStatus, + attemptedTransition?: TaskStatus, + validTransitions?: readonly TaskStatus[], + ) { + super(message, ErrorCodes.INVALID_STATE_TRANSITION, { + currentState, + attemptedTransition, + validTransitions: validTransitions, + }); + + this.name = "TaskStateError"; + this.currentState = currentState; + this.attemptedTransition = attemptedTransition; + this.validTransitions = validTransitions; + } +} + +/** + * Slot acquisition timeout error + */ +export class SlotTimeoutError extends TaskSystemError { + readonly timeoutMs?: number; + constructor(message: string, timeoutMs?: number, context?: ErrorContext) { + super(message, ErrorCodes.SLOT_TIMEOUT, { ...context, timeoutMs }); + this.name = "SlotTimeoutError"; + this.timeoutMs = timeoutMs; + } +} + +/** + * HTTP 429 response structure for backpressure errors + */ +export interface HTTP429Response { + status: 429; + headers: { + "Retry-After"?: string; + "X-RateLimit-Limit"?: string; + "X-RateLimit-Remaining"?: string; + }; + body: { + error: string; + message: string; + retryAfterMs: number; + }; +} + +export class BackpressureError extends TaskSystemError { + readonly limit?: number; + readonly remaining?: number; + readonly retryAfterMs?: number; + + constructor( + message: string, + limit?: number, + remaining?: number, + retryAfterMs?: number, + context?: ErrorContext, + ) { + super(message, ErrorCodes.BACKPRESSURE, { + ...context, + limit, + remaining, + retryAfterMs, + }); + this.name = "BackpressureError"; + this.limit = limit; + this.remaining = remaining; + this.retryAfterMs = retryAfterMs; + } + + toHTTPResponse(): HTTP429Response { + return { + status: 429, + headers: { + "Retry-After": String(Math.ceil((this.retryAfterMs ?? 1000) / 1000)), + "X-RateLimit-Limit": String(this.limit ?? 0), + "X-RateLimit-Remaining": String(this.remaining ?? 0), + }, + body: { + error: "TooManyRequests", + message: this.message, + retryAfterMs: this.retryAfterMs ?? 1000, + }, + }; + } +} + +/** + * Initialization error for component startup failures + */ +export class InitializationError extends TaskSystemError { + readonly component?: string; + + constructor(message: string, component?: string, context?: ErrorContext) { + super(message, ErrorCodes.INITIALIZATION_FAILED, { ...context, component }); + this.name = "InitializationError"; + this.component = component; + } +} + +/** + * Retry exhausted error when all retry attempts have failed + */ +export class RetryExhaustedError extends TaskSystemError { + readonly attempts?: number; + readonly maxAttempts?: number; + constructor( + message: string, + attempts?: number, + maxAttempts?: number, + context?: ErrorContext, + cause?: Error, + ) { + super( + message, + ErrorCodes.RETRY_EXHAUSTED, + { + ...context, + attempts, + maxAttempts, + }, + cause, + ); + this.name = "RetryExhaustedError"; + this.attempts = attempts; + this.maxAttempts = maxAttempts; + } +} + +/** + * Stream overflow error when the stream has reached its maximum capacity + */ +export class StreamOverflowError extends TaskSystemError { + constructor(message: string, context?: ErrorContext) { + super(message, ErrorCodes.STREAM_OVERFLOW, context); + this.name = "StreamOverflowError"; + } +} + +// Known retryable error patterns +const RETRYABLE_ERROR_PATTERNS = [ + "ECONNRESET", + "ECONNREFUSED", + "ETIMEDOUT", + "ENOTFOUND", + "EAI_AGAIN", + "EHOSTUNREACH", + "ENETUNREACH", + "timeout", + "socket hang up", + "network", +]; + +// Known permanent error patterns +const PERMANENT_ERROR_PATTERNS = [ + "unauthorized", + "forbidden", + "invalid", + "malformed", + "not found", + "bad request", +]; + +/** + * Determines if an error is retryable + * + * - Returns true for BackpressureError, SlotTimeoutError + * - Returns true for network-related errors (ECONNRESET, timeout, etc) + * - Returns false for ValidationError, NotFoundError, TaskStateError + * - Returns true for unknown errors (fail-safe by default) + */ +export function isRetryableError(error: unknown): boolean { + // null/undefined are not retryable + if (error === null || error === undefined) return false; + + // check for known task system errors + if (isTaskSystemError(error)) { + // explicit retryable + if (error instanceof BackpressureError || error instanceof SlotTimeoutError) + return true; + + // explicit not retryable + if ( + error instanceof ValidationError || + error instanceof ConfigValidationError || + error instanceof NotFoundError || + error instanceof TaskStateError || + error instanceof ConflictError + ) + return false; + } + + // RetryExhaustedError means we've already tried - dont retry again + if (error instanceof RetryExhaustedError) return false; + + // check error for patterns + const message = + error instanceof Error + ? error.message.toLowerCase() + : String(error).toLowerCase(); + + // check for permanent errors patterns first + if (PERMANENT_ERROR_PATTERNS.some((pattern) => message.includes(pattern))) + return false; + + // check for retryable patterns + if (RETRYABLE_ERROR_PATTERNS.some((pattern) => message.includes(pattern))) + return true; + + // check for http status code in the error + if (error instanceof Error && "status" in error) { + const status = (error as Error & { status: number }).status; + // 4xx errors (except 429) are not retryable + if (status >= 400 && status < 500 && status !== 429) return false; + // 5xx errors and 429 are retryable + if (status >= 500 || status === 429) return true; + } + + // default: unknown error is retryable (fail-safe) + return true; +} + +/** + * Type guard to check if an error is a TaskSystemError + */ +export function isTaskSystemError(error: unknown): error is TaskSystemError { + return error instanceof TaskSystemError; +} diff --git a/packages/taskflow/src/core/types.ts b/packages/taskflow/src/core/types.ts new file mode 100644 index 00000000..06569ae2 --- /dev/null +++ b/packages/taskflow/src/core/types.ts @@ -0,0 +1,55 @@ +/** + * Core types for the task system + */ + +/** + * Represents the lifecycle status of a task. + * + * State Machine: + * - created -> running -> completed + * - created -> running -> failed + * - created -> running -> cancelled + * - created -> cancelled + * - failed -> created (via resetToPending) + */ +export type TaskStatus = + | "created" + | "running" + | "completed" + | "failed" + | "cancelled"; + +/** + * Represents the type of task execution. + * - "user": User-initiated tasks that support real-time streaming + * - "background": Background tasks that run without a connected client + */ +export type TaskType = "user" | "background"; + +/** + * Valid state transitions for the task state machine. + * Used for validation in Task.ts + */ +export const VALID_TRANSITIONS: Record = { + created: ["running", "cancelled"], + running: ["completed", "failed", "cancelled"], + completed: [], + failed: ["created"], // resetted + cancelled: [], +}; + +/** + * Checks if a transition from one status to another is valid. + */ +export function isValidTransition(from: TaskStatus, to: TaskStatus): boolean { + return VALID_TRANSITIONS[from]?.includes(to); +} + +/** + * Check if a status is terminal (no further transitions possible except reset) + */ +export function isTerminalStatus(status: TaskStatus): boolean { + return ( + status === "completed" || status === "failed" || status === "cancelled" + ); +} diff --git a/packages/taskflow/src/tests/core/errors.test.ts b/packages/taskflow/src/tests/core/errors.test.ts new file mode 100644 index 00000000..d6386d02 --- /dev/null +++ b/packages/taskflow/src/tests/core/errors.test.ts @@ -0,0 +1,358 @@ +import { describe, expect, it } from "vitest"; +import { + BackpressureError, + ConfigValidationError, + ConflictError, + ErrorCodes, + InitializationError, + isRetryableError, + isTaskSystemError, + NotFoundError, + RetryExhaustedError, + SlotTimeoutError, + StreamOverflowError, + TaskStateError, + TaskSystemError, + ValidationError, +} from "@/core/errors"; + +describe("Core Errors", () => { + describe("TaskSystemError", () => { + it("should create with message, context and timestamp", () => { + const error = new TaskSystemError( + "Test error", + ErrorCodes.VALIDATION_FAILED, + { + taskId: "task-123", + }, + ); + expect(error.name).toBe("TaskSystemError"); + expect(error.message).toBe("Test error"); + expect(error.code).toBe(ErrorCodes.VALIDATION_FAILED); + expect(error.context?.taskId).toEqual("task-123"); + expect(error.timestamp).toBeGreaterThan(0); + }); + + it("should include cause in error chain", () => { + const cause = new Error("Original error"); + const error = new TaskSystemError( + "Test error", + ErrorCodes.VALIDATION_FAILED, + undefined, + cause, + ); + + expect(error.cause).toBe(cause); + }); + + it("should serialize to JSON", () => { + const error = new TaskSystemError( + "Test error", + ErrorCodes.VALIDATION_FAILED, + { + taskId: "task-123", + }, + ); + const json = error.toJSON(); + + expect(json.name).toBe("TaskSystemError"); + expect(json.message).toBe("Test error"); + expect(json.code).toBe(ErrorCodes.VALIDATION_FAILED); + expect(json.context).toEqual({ taskId: "task-123" }); + expect(json.timestamp).toBeGreaterThan(0); + }); + + it("should serialize cause to JSON", () => { + const cause = new Error("Original error"); + const error = new TaskSystemError( + "Test error", + ErrorCodes.VALIDATION_FAILED, + undefined, + cause, + ); + const json = error.toJSON(); + expect(json.cause).toEqual({ + name: "Error", + message: "Original error", + }); + }); + }); + describe("ValidationError", () => { + it("should create with field property", () => { + const error = new ValidationError("Invalid name", "name"); + expect(error.name).toBe("ValidationError"); + expect(error.message).toBe("Invalid name"); + expect(error.field).toBe("name"); + expect(error.code).toBe(ErrorCodes.VALIDATION_FAILED); + }); + }); + describe("ConfigValidationError", () => { + it("should create with configPath", () => { + const error = new ConfigValidationError( + "Invalid config", + "executor.retry.maxAttempts", + ); + + expect(error.name).toBe("ConfigValidationError"); + expect(error.message).toBe("Invalid config"); + expect(error.configPath).toBe("executor.retry.maxAttempts"); + expect(error.code).toBe(ErrorCodes.CONFIG_VALIDATION_FAILED); + }); + }); + + describe("NotFoundError", () => { + it("should create with resourceType task", () => { + const error = new NotFoundError("Task not found", "task"); + expect(error.name).toBe("NotFoundError"); + expect(error.resourceType).toBe("task"); + expect(error.code).toBe(ErrorCodes.TASK_NOT_FOUND); + }); + it("should create with resourceType template", () => { + const error = new NotFoundError("Template not found", "template"); + expect(error.name).toBe("NotFoundError"); + expect(error.resourceType).toBe("template"); + expect(error.code).toBe(ErrorCodes.TEMPLATE_NOT_FOUND); + }); + it("should create with resourceType handler", () => { + const error = new NotFoundError("Handler not found", "handler"); + expect(error.name).toBe("NotFoundError"); + expect(error.resourceType).toBe("handler"); + expect(error.code).toBe(ErrorCodes.HANDLER_NOT_FOUND); + }); + }); + + describe("ConflictError", () => { + it("should create conflict error", () => { + const error = new ConflictError("Resource already exists"); + expect(error.name).toBe("ConflictError"); + expect(error.message).toBe("Resource already exists"); + expect(error.code).toBe(ErrorCodes.CONFLICT); + }); + }); + + describe("TaskStateError", () => { + it("should create with currentState and attemptedTransition", () => { + const error = new TaskStateError( + "Invalid transition", + "created", + "completed", + ["running", "cancelled"], + ); + expect(error.name).toBe("TaskStateError"); + expect(error.currentState).toBe("created"); + expect(error.attemptedTransition).toBe("completed"); + expect(error.validTransitions).toEqual(["running", "cancelled"]); + expect(error.code).toBe(ErrorCodes.INVALID_STATE_TRANSITION); + }); + }); + + describe("SlotTimeoutError", () => { + it("should create with timeoutMs property", () => { + const error = new SlotTimeoutError("Slot acquisition timeout", 5000); + + expect(error.name).toBe("SlotTimeoutError"); + expect(error.timeoutMs).toBe(5000); + expect(error.code).toBe(ErrorCodes.SLOT_TIMEOUT); + }); + }); + + describe("BackpressureError", () => { + it("should create with limit, remaining, and retryAfterMs", () => { + const error = new BackpressureError( + "Resource backpressure", + 100, + 50, + 5000, + ); + + expect(error.limit).toBe(100); + expect(error.remaining).toBe(50); + expect(error.retryAfterMs).toBe(5000); + expect(error.code).toBe(ErrorCodes.BACKPRESSURE); + expect(error.name).toBe("BackpressureError"); + }); + + it("should convert to HTTP 429 response", () => { + const error = new BackpressureError( + "Resource backpressure", + 100, + 50, + 5000, + ); + const response = error.toHTTPResponse(); + + expect(response.status).toBe(429); + expect(response.headers["Retry-After"]).toBe("5"); + expect(response.headers["X-RateLimit-Limit"]).toBe("100"); + expect(response.headers["X-RateLimit-Remaining"]).toBe("50"); + expect(response.body.error).toBe("TooManyRequests"); + expect(response.body.retryAfterMs).toBe(5000); + }); + + it("should use defaults in HTTP response when values not provided", () => { + const error = new BackpressureError("Rate limit exceeded"); + const response = error.toHTTPResponse(); + + expect(response.headers["Retry-After"]).toBe("1"); + expect(response.headers["X-RateLimit-Limit"]).toBe("0"); + expect(response.headers["X-RateLimit-Remaining"]).toBe("0"); + expect(response.body.retryAfterMs).toBe(1000); + }); + }); + + describe("InitializationError", () => { + it("should create with component property", () => { + const error = new InitializationError( + "Failed to initialize", + "DatabaseConnector", + ); + + expect(error.name).toBe("InitializationError"); + expect(error.component).toBe("DatabaseConnector"); + expect(error.code).toBe(ErrorCodes.INITIALIZATION_FAILED); + }); + }); + + describe("RetryExhaustedError", () => { + it("should create with attempts and maxAttempts", () => { + const cause = new Error("Final failure"); + const error = new RetryExhaustedError( + "All retries exhausted", + 3, + 3, + undefined, + cause, + ); + + expect(error.name).toBe("RetryExhaustedError"); + expect(error.attempts).toBe(3); + expect(error.maxAttempts).toBe(3); + expect(error.cause).toBe(cause); + expect(error.code).toBe(ErrorCodes.RETRY_EXHAUSTED); + }); + }); + + describe("StreamOverflowError", () => { + it("should create stream overflow error", () => { + const error = new StreamOverflowError("Buffer overflow", { + idempotencyKey: "123", + }); + + expect(error.name).toBe("StreamOverflowError"); + expect(error.message).toBe("Buffer overflow"); + expect(error.context?.idempotencyKey).toBe("123"); + expect(error.code).toBe(ErrorCodes.STREAM_OVERFLOW); + }); + }); + + describe("isRetryableError", () => { + it("should return true for BackpressureError", () => { + expect(isRetryableError(new BackpressureError("Rate limited"))).toBe( + true, + ); + }); + it("should return true for SlotTimeoutError", () => { + expect(isRetryableError(new SlotTimeoutError("Timeout"))).toBe(true); + }); + it("should return false for ValidationError", () => { + expect(isRetryableError(new ValidationError("Invalid"))).toBe(false); + }); + it("should return false for NotFoundError", () => { + expect(isRetryableError(new NotFoundError("Not found", "task"))).toBe( + false, + ); + }); + + it("should return false for TaskStateError", () => { + expect(isRetryableError(new TaskStateError("Invalid state"))).toBe(false); + }); + + it("should return false for ConfigValidationError", () => { + expect( + isRetryableError(new ConfigValidationError("Invalid config")), + ).toBe(false); + }); + it("should return false for ConflictError", () => { + expect(isRetryableError(new ConflictError("Conflict"))).toBe(false); + }); + it("should return false for RetryExhaustedError", () => { + expect(isRetryableError(new RetryExhaustedError("Exhausted", 3, 3))).toBe( + false, + ); + }); + + it("should return true for network-related errors", () => { + expect(isRetryableError(new Error("ECONNRESET"))).toBe(true); + expect(isRetryableError(new Error("ECONNREFUSED"))).toBe(true); + expect(isRetryableError(new Error("ETIMEDOUT"))).toBe(true); + expect(isRetryableError(new Error("socket hang up"))).toBe(true); + expect(isRetryableError(new Error("network error"))).toBe(true); + }); + + it("should return false for permanent errors", () => { + expect(isRetryableError(new Error("unauthorized"))).toBe(false); + expect(isRetryableError(new Error("forbidden"))).toBe(false); + expect(isRetryableError(new Error("invalid"))).toBe(false); + expect(isRetryableError(new Error("not found"))).toBe(false); + expect(isRetryableError(new Error("bad request"))).toBe(false); + }); + + it("should return true for unknown error (fail-safe default)", () => { + expect(isRetryableError(new Error("Unknown error"))).toBe(true); + }); + + it("should return false for null/undefined", () => { + expect(isRetryableError(null)).toBe(false); + expect(isRetryableError(undefined)).toBe(false); + }); + + it("should handle errors with HTTP status codes", () => { + const error400 = Object.assign(new Error("Bad Request"), { status: 400 }); + const error404 = Object.assign(new Error("Not Found"), { status: 404 }); + const error429 = Object.assign(new Error("Too Many Requests"), { + status: 429, + }); + const error500 = Object.assign(new Error("Internal Server Error"), { + status: 500, + }); + const error503 = Object.assign(new Error("Service Unavailable"), { + status: 503, + }); + + expect(isRetryableError(error400)).toBe(false); + expect(isRetryableError(error404)).toBe(false); + expect(isRetryableError(error429)).toBe(true); + expect(isRetryableError(error500)).toBe(true); + expect(isRetryableError(error503)).toBe(true); + }); + }); + + describe("isTaskSystemError", () => { + it("should return true for TaskSystemError", () => { + expect(isTaskSystemError(new TaskSystemError("Test error"))).toBe(true); + }); + + it("should return true for subclasses of TaskSystemError", () => { + expect(isTaskSystemError(new BackpressureError("Rate limited"))).toBe( + true, + ); + expect(isTaskSystemError(new ValidationError("Invalid"))).toBe(true); + expect(isTaskSystemError(new NotFoundError("Not found", "task"))).toBe( + true, + ); + expect(isTaskSystemError(new TaskStateError("Invalid state"))).toBe(true); + }); + + it("should return false for regular Error", () => { + expect(isTaskSystemError(new Error("Test error"))).toBe(false); + }); + + it("should return false for non-error values", () => { + expect(isTaskSystemError(null)).toBe(false); + expect(isTaskSystemError(undefined)).toBe(false); + expect(isTaskSystemError(123)).toBe(false); + expect(isTaskSystemError("Test")).toBe(false); + expect(isTaskSystemError({ message: "error" })).toBe(false); + }); + }); +}); diff --git a/packages/taskflow/src/tests/core/types.test.ts b/packages/taskflow/src/tests/core/types.test.ts new file mode 100644 index 00000000..d8d63dd3 --- /dev/null +++ b/packages/taskflow/src/tests/core/types.test.ts @@ -0,0 +1,82 @@ +import { describe, expect, it } from "vitest"; +import { + isTerminalStatus, + isValidTransition, + VALID_TRANSITIONS, + type TaskStatus, + type TaskType, +} from "@/core/types"; + +describe("Core Types", () => { + describe("TaskStatus", () => { + it("should have all valid statuses", () => { + const statuses: TaskStatus[] = [ + "created", + "running", + "completed", + "failed", + "cancelled", + ]; + expect(statuses).toHaveLength(5); + }); + }); + + describe("TaskType", () => { + it("should have user and background options", () => { + const types: TaskType[] = ["user", "background"]; + expect(types).toHaveLength(2); + }); + }); + + describe("VALID_TRANSITIONS", () => { + it("should define valid transitions for created", () => { + expect(VALID_TRANSITIONS.created).toEqual(["running", "cancelled"]); + }); + it("should define valid transitions for running", () => { + expect(VALID_TRANSITIONS.running).toEqual([ + "completed", + "failed", + "cancelled", + ]); + }); + it("should define no transitions for completed", () => { + expect(VALID_TRANSITIONS.completed).toEqual([]); + }); + it("should define reset transition for failed", () => { + expect(VALID_TRANSITIONS.failed).toEqual(["created"]); + }); + it("should define no transitions for cancelled", () => { + expect(VALID_TRANSITIONS.cancelled).toEqual([]); + }); + }); + + describe("isValidTransition", () => { + it("should return true for valid transitions", () => { + expect(isValidTransition("created", "running")).toBe(true); + expect(isValidTransition("created", "cancelled")).toBe(true); + expect(isValidTransition("running", "completed")).toBe(true); + expect(isValidTransition("running", "failed")).toBe(true); + expect(isValidTransition("running", "cancelled")).toBe(true); + expect(isValidTransition("failed", "created")).toBe(true); + }); + it("should return false for invalid transitions", () => { + expect(isValidTransition("created", "completed")).toBe(false); + expect(isValidTransition("created", "failed")).toBe(false); + expect(isValidTransition("completed", "running")).toBe(false); + expect(isValidTransition("completed", "created")).toBe(false); + expect(isValidTransition("cancelled", "running")).toBe(false); + expect(isValidTransition("running", "created")).toBe(false); + }); + }); + describe("isTerminalStatus", () => { + it("should return true for terminal statuses", () => { + expect(isTerminalStatus("completed")).toBe(true); + expect(isTerminalStatus("failed")).toBe(true); + expect(isTerminalStatus("cancelled")).toBe(true); + }); + it("should return false for non-terminal statuses", () => { + expect(isTerminalStatus("created")).toBe(false); + expect(isTerminalStatus("running")).toBe(false); + }); + }); +}); From 6791649f1797f718c1f357e6a1ae5aed37972521 Mon Sep 17 00:00:00 2001 From: Ditadi Date: Fri, 23 Jan 2026 16:56:45 +0100 Subject: [PATCH 03/13] feat(taskflow): add observability hooks layer --- commitlint.config.js | 1 + packages/taskflow/src/observability/hooks.ts | 142 ++++++++++++++++++ packages/taskflow/src/observability/index.ts | 12 ++ packages/taskflow/src/observability/noop.ts | 77 ++++++++++ packages/taskflow/src/observability/types.ts | 78 ++++++++++ .../src/tests/observability/noop.test.ts | 36 +++++ 6 files changed, 346 insertions(+) create mode 100644 packages/taskflow/src/observability/hooks.ts create mode 100644 packages/taskflow/src/observability/index.ts create mode 100644 packages/taskflow/src/observability/noop.ts create mode 100644 packages/taskflow/src/observability/types.ts create mode 100644 packages/taskflow/src/tests/observability/noop.test.ts diff --git a/commitlint.config.js b/commitlint.config.js index 42d896d5..381edbf8 100644 --- a/commitlint.config.js +++ b/commitlint.config.js @@ -7,6 +7,7 @@ export default { [ "appkit", // @databricks/appkit "appkit-ui", // @databricks/appkit-ui + "taskflow", // @databricks/taskflow "shared", // shared package "playground", // dev-playground app "docs", // documentation diff --git a/packages/taskflow/src/observability/hooks.ts b/packages/taskflow/src/observability/hooks.ts new file mode 100644 index 00000000..3e281df3 --- /dev/null +++ b/packages/taskflow/src/observability/hooks.ts @@ -0,0 +1,142 @@ +import type { Attributes, LogRecord, SpanCallback, SpanContext } from "./types"; + +/** + * Hook interface for task system observability + * + * Consumers implement this interface to integrate with their + * preferred telemetry system + * + * All methods have sensible no-op defaults, so consumers only + * need to implement what they care about. + */ +export interface TaskSystemHooks { + /** + * Start a span and execute a callback within it. + * The span is automatically ended when the callback completes. + * + * @param name - The name of the span (e.g. "task.execute") + * @param attributes - Initial span attributes + * @param fn - Callback to execute within the span + * @returns The callback's return value + */ + withSpan( + name: string, + attributes: Attributes, + fn: SpanCallback, + ): T | Promise; + + /** + * Get the current active span context for propagation. + * Returns undefined if no span is active. + */ + getActiveSpanContext(): SpanContext | undefined; + + /** + * Increment a counter metric. + * @param name - Metric name + * @param value - Amount to increment by + * @param attributes - Metric attributes/labels + */ + incrementCounter(name: string, value?: number, attributes?: Attributes): void; + + /** + * Record a gauge value. + * @param name - Metric name + * @param value - Value to record + * @param attributes - Metric attributes/label + */ + recordGauge(name: string, value: number, attributes?: Attributes): void; + + /** + * Record a histogram metric. + * @param name - Metric name + * @param value - Value to record + * @param attributes - Metric attributes/label + */ + recordHistogram(name: string, value: number, attributes?: Attributes): void; + + /** + * Emit a structured log record. + * @param record - The log record to emit + */ + log(record: LogRecord): void; +} + +/** + * Semantic metric names used by the task system + * Consumers can use these for dashboards and alerts + */ +export const TaskMetrics = { + // counters + TASKS_CREATED: "taskflow.tasks.created", + TASKS_STARTED: "taskflow.tasks.started", + TASKS_COMPLETED: "taskflow.tasks.completed", + TASKS_FAILED: "taskflow.tasks.failed", + TASKS_CANCELLED: "taskflow.tasks.cancelled", + TASKS_RETRIED: "taskflow.tasks.retried", + TASKS_RECOVERED: "taskflow.tasks.recovered", + + FLUSH_BATCHES: "taskflow.flush.batches", + FLUSH_ENTRIES: "taskflow.flush.entries", + FLUSH_ERRORS: "taskflow.flush.errors", + + GUARD_REJECTIONS: "taskflow.guard.rejections", + DLQ_ADDED: "taskflow.dlq.added", + DLQ_RETRIED: "taskflow.dlq.retried", + + // gauges + TASKS_RUNNING: "taskflow.tasks.running", + TASKS_QUEUED: "taskflow.tasks.queued", + SLOTS_AVAILABLE: "taskflow.slots.available", + DLQ_SIZE: "taskflow.dlq.size", + STREAMS_ACTIVE: "taskflow.streams.active", + + // histograms + TASK_DURATION_MS: "taskflow.task.duration_ms", + TASK_QUEUE_WAIT_MS: "taskflow.task.queue_wait_ms", + FLUSH_DURATION_MS: "taskflow.flush.duration_ms", + FLUSH_BATCH_SIZE: "taskflow.flush.batch_size", +} as const; + +/** + * Semantic span names used by the task system. + */ +export const TaskSpans = { + TASK_EXECUTE: "taskflow.task.execute", + TASK_HANDLER: "taskflow.task.handler", + TASK_RETRY: "taskflow.task.retry", + TASK_RECOVER: "taskflow.task.recover", + + FLUSH_BATCH: "taskflow.flush.batch", + FLUSH_WRITE: "taskflow.flush.write", + + GUARD_ADMIT: "taskflow.guard.admit", + GUARD_ACQUIRE_SLOT: "taskflow.guard.acquire_slot", + + STREAM_PUSH: "taskflow.stream.push", + STREAM_GENERATE: "taskflow.stream.generate", + + REPOSITORY_QUERY: "taskflow.repository.query", + REPOSITORY_WRITE: "taskflow.repository.write", +} as const; + +/** + * Common attribute keys for span and metrics + */ +export const TaskAttributes = { + TASK_ID: "taskflow.task.id", + TASK_NAME: "taskflow.task.name", + TASK_TYPE: "taskflow.task.type", + TASK_STATUS: "taskflow.task.status", + TASK_ATTEMPT: "taskflow.task.attempt", + + USER_ID: "taskflow.user.id", + IDEMPOTENCY_KEY: "taskflow.idempotency.key", + + ERROR_TYPE: "taskflow.error.type", + ERROR_MESSAGE: "taskflow.error.message", + ERROR_RETRYABLE: "taskflow.error.retryable", + + FLUSH_BATCH_SIZE: "taskflow.flush.batch_size", + REPOSITORY_TYPE: "taskflow.repository.type", +} as const; diff --git a/packages/taskflow/src/observability/index.ts b/packages/taskflow/src/observability/index.ts new file mode 100644 index 00000000..e1552716 --- /dev/null +++ b/packages/taskflow/src/observability/index.ts @@ -0,0 +1,12 @@ +export type { TaskSystemHooks } from "./hooks"; +export { TaskAttributes, TaskMetrics, TaskSpans } from "./hooks"; +export { createHooks, NOOP_SPAN, NoopSpan, noopHooks } from "./noop"; +export type { + Attributes, + LogRecord, + LogSeverity, + Span, + SpanCallback, + SpanContext, + SpanStatus, +} from "./types"; diff --git a/packages/taskflow/src/observability/noop.ts b/packages/taskflow/src/observability/noop.ts new file mode 100644 index 00000000..6010708d --- /dev/null +++ b/packages/taskflow/src/observability/noop.ts @@ -0,0 +1,77 @@ +import type { TaskSystemHooks } from "./hooks"; +import type { + Attributes, + LogRecord, + Span, + SpanContext, + SpanStatus, +} from "./types"; + +/** + * No-op span implementation. + * Used when observability is disabled or not configured. + */ +class NoopSpan implements Span { + setAttribute( + _key: string, + _value: string | number | boolean | undefined, + ): void {} + setAttributes(_attributes: Attributes): void {} + addEvent(_name: string, _attributes?: Attributes): void {} + setStatus(_status: SpanStatus, _message?: string): void {} + recordException(_error: Error): void {} + end(): void {} + getContext(): SpanContext { + return { traceId: "", spanId: "", traceFlags: 0 }; + } +} + +/** + * Singleton no-op span instance for reuse + */ +const NOOP_SPAN = new NoopSpan(); + +/** + * No-op hooks implementation. + * All methods do nothing - zero overhead when observability is disabled. + */ +export const noopHooks: TaskSystemHooks = { + withSpan( + _name: string, + _attributes: Attributes, + _fn: (span: Span) => T | Promise, + ): T | Promise { + return _fn(NOOP_SPAN); + }, + getActiveSpanContext(): SpanContext | undefined { + return undefined; + }, + incrementCounter( + _name: string, + _value?: number, + _attributes?: Attributes, + ): void {}, + recordGauge(_name: string, _value: number, _attributes?: Attributes): void {}, + recordHistogram( + _name: string, + _value: number, + _attributes?: Attributes, + ): void {}, + + log(_record: LogRecord): void {}, +}; + +/** + * Creates a hooks instance with partial implementation. + * Unimplemented methods fall back to no-op. + */ +export function createHooks( + partial: Partial, +): TaskSystemHooks { + return { ...noopHooks, ...partial }; +} + +/** + * Export NoopSpan for testing purposes + */ +export { NoopSpan, NOOP_SPAN }; diff --git a/packages/taskflow/src/observability/types.ts b/packages/taskflow/src/observability/types.ts new file mode 100644 index 00000000..d8bb0111 --- /dev/null +++ b/packages/taskflow/src/observability/types.ts @@ -0,0 +1,78 @@ +/** + * Observability types for the task system + * These interfaces provide a zero-dependency interface for tracing, metrics, and logging. + * Consumers can implement these to integrate with their preferred telemetry system + */ + +/** + * Span context for distributed tracing + * Matches OpenTelemetry span context structure for easy integration + */ +export interface SpanContext { + traceId: string; + spanId: string; + traceFlags?: number; +} + +/** + * Span status for indicating success or failure + */ +export type SpanStatus = "ok" | "error" | "unset"; + +/** + * Attributes that can be attached to spans, metrics and logs + */ +export type Attributes = Record; + +/** + * A span represents a unit of work with timing information + * Simplified interface that maps to OpenTelemetry Span interface + */ +export interface Span { + /** Set a single attribute */ + setAttribute(key: string, value: string | number | boolean): void; + /** Set multiple attributes */ + setAttributes(attributes: Attributes): void; + /** Record an event within the span */ + addEvent(name: string, attributes?: Attributes): void; + /** Set the span status */ + setStatus(status: SpanStatus, message?: string): void; + /** Record an exception */ + recordException(error: Error): void; + /** End the span (records duration) */ + end(): void; + /** Get the span context */ + getContext(): SpanContext; +} + +/** + * Metric value structure for recording measurements + */ +export interface MetricValue { + name: string; + value: number; + unit?: string; + attributes?: Attributes; +} + +/** + * Log severity levels + */ +export type LogSeverity = "debug" | "info" | "warn" | "error"; + +/** + * A log record with structured data + */ +export interface LogRecord { + severity: LogSeverity; + message: string; + attributes?: Attributes; + error?: Error; + spanContext?: SpanContext; +} + +/** + * Callback function type for span operations + * Supports both synchronous and asynchronous callbacks + */ +export type SpanCallback = (span: Span) => T | Promise; diff --git a/packages/taskflow/src/tests/observability/noop.test.ts b/packages/taskflow/src/tests/observability/noop.test.ts new file mode 100644 index 00000000..1611acf8 --- /dev/null +++ b/packages/taskflow/src/tests/observability/noop.test.ts @@ -0,0 +1,36 @@ +import { describe, expect, it, vi } from "vitest"; +import { createHooks, noopHooks } from "@/observability"; + +describe("noopHooks", () => { + describe("withSpan", () => { + it("should execute callback and return result", () => { + const result = noopHooks.withSpan("test", {}, () => 42); + expect(result).toBe(42); + }); + + it("should handle async callbacks", async () => { + const result = await noopHooks.withSpan("test", {}, async () => "async"); + expect(result).toBe("async"); + }); + + it("should propagate errors", () => { + expect(() => + noopHooks.withSpan("test", {}, () => { + throw new Error("test"); + }), + ).toThrow("test"); + }); + }); + + describe("createHooks", () => { + it("should override specific hooks while keeping noop defaults", () => { + const counter = vi.fn(); + const hooks = createHooks({ + incrementCounter: counter, + }); + hooks.incrementCounter("test", 1); + expect(counter).toHaveBeenCalledWith("test", 1); + expect(hooks.withSpan("test", {}, () => 42)).toBe(42); + }); + }); +}); From ca9f4a31b153197ea8e953fd123939738b544bf0 Mon Sep 17 00:00:00 2001 From: Ditadi Date: Sat, 24 Jan 2026 01:04:48 +0100 Subject: [PATCH 04/13] feat(taskflow): add domain model layer --- packages/taskflow/package.json | 3 + packages/taskflow/src/domain/events.ts | 250 +++++++ packages/taskflow/src/domain/handler.ts | 129 ++++ packages/taskflow/src/domain/index.ts | 4 + packages/taskflow/src/domain/task.ts | 316 +++++++++ packages/taskflow/src/domain/types.ts | 99 +++ .../taskflow/src/tests/domain/events.test.ts | 266 ++++++++ .../taskflow/src/tests/domain/handler.test.ts | 98 +++ .../taskflow/src/tests/domain/task.test.ts | 614 ++++++++++++++++++ pnpm-lock.yaml | 9 + 10 files changed, 1788 insertions(+) create mode 100644 packages/taskflow/src/domain/events.ts create mode 100644 packages/taskflow/src/domain/handler.ts create mode 100644 packages/taskflow/src/domain/index.ts create mode 100644 packages/taskflow/src/domain/task.ts create mode 100644 packages/taskflow/src/domain/types.ts create mode 100644 packages/taskflow/src/tests/domain/events.test.ts create mode 100644 packages/taskflow/src/tests/domain/handler.test.ts create mode 100644 packages/taskflow/src/tests/domain/task.test.ts diff --git a/packages/taskflow/package.json b/packages/taskflow/package.json index b71e7f8f..35f3a5c0 100644 --- a/packages/taskflow/package.json +++ b/packages/taskflow/package.json @@ -26,5 +26,8 @@ }, "devDependencies": { "vitest": "^3.2.4" + }, + "dependencies": { + "json-canonicalize": "^2.0.0" } } diff --git a/packages/taskflow/src/domain/events.ts b/packages/taskflow/src/domain/events.ts new file mode 100644 index 00000000..2a48ea2a --- /dev/null +++ b/packages/taskflow/src/domain/events.ts @@ -0,0 +1,250 @@ +import type { TaskType } from "@/core/types"; +import type { TaskExecutionOptions } from "./types"; + +/** + * Base event types that handlers can yield + */ +export type TaskEventType = + | "created" + | "start" + | "progress" + | "complete" + | "error" + | "cancelled" + | "heartbeat" + | "retry" + | "recovered" + | "custom"; + +/** + * What handlers yield - minimal input from user code + * + * @example + * yield { type: "progress", message: "Running query", payload: { statementId: "abc123" } } + */ +export interface TaskEventInput { + /** Optional event ID (auto-generated if not provided) */ + id?: string; + /** Event type */ + type: TaskEventType; + /** Human-readable message for the event */ + message?: string; + /** Result data (for completed events) */ + result?: unknown; + /** Error message (for error events) */ + error?: string; + /** Additional payload data for recovery */ + payload?: Record; + /** Custom event name (for custom events) */ + eventName?: string; + /** Task input (for created events) */ + input?: unknown; + /** Duration in milliseconds (for complete/error events) */ + durationMs?: number; + /** Event timestamp (auto-set if not provided) */ + timestamp?: number; + /** Current attempt number */ + attempt?: number; + /** Maximum retry attempts */ + maxAttempts?: number; + /** Delay until next retry in milliseconds (for retry events) */ + nextRetryDelayMs?: number; + /** Whether the error is retryable */ + retryable?: boolean; +} + +/** + * Context provided to task handlers during execution + */ +export interface TaskEventContext { + /** Unique task ID */ + taskId: string; + /** Task name/template */ + name: string; + /** Idempotency key for deduplication */ + idempotencyKey: string; + /** User ID */ + userId: string; + /** Task type */ + taskType: TaskType; + /** Execution options */ + executionOptions?: TaskExecutionOptions; +} + +/** + * Normalized event with task context - used in streams and internal processing + */ +export interface TaskEvent extends TaskEventInput { + /** Unique event ID */ + id: string; + /** Task ID this event belongs to */ + taskId: string; + /** Task name/template */ + name: string; + /** Idempotency key for the task */ + idempotencyKey: string; + /** User ID */ + userId: string; + /** Task type */ + taskType: TaskType; + /** Task input (included for context) */ + input?: unknown; + /** Execution options (included for context) */ + executionOptions?: TaskExecutionOptions; +} + +export type EventLogEntryType = + | "TASK_CREATED" + | "TASK_START" + | "TASK_PROGRESS" + | "TASK_COMPLETE" + | "TASK_ERROR" + | "TASK_CANCELLED" + | "TASK_HEARTBEAT" // WAL only, not stored in task_events + | "TASK_CUSTOM"; + +/** + * Entry written to Write-Ahead Log + */ +export interface EventLogEntry { + /** Entry type */ + type: EventLogEntryType; + /** Task ID */ + taskId: string; + /** Idempotency key */ + idempotencyKey: string; + /** Task name */ + name: string; + /** User ID */ + userId: string; + /** Task type */ + taskType: TaskType; + /** Event timestamp */ + timestamp: number; + /** Task input (for TASK_CREATED) */ + input?: unknown; + /** Event payload (custom fields from handler) */ + payload?: Record; + /** Task result (for TASK_COMPLETE) */ + result?: unknown; + /** Error message (for TASK_ERROR) */ + error?: string; + /** Execution options (for TASK_CREATED) */ + executionOptions?: TaskExecutionOptions; +} + +/** + * Maps TaskEventType to EventLogEntryType. + * + * Returns null for events that should not be persisted to WAL + */ +export function toEventLogEntryType( + type: TaskEventType, +): EventLogEntryType | null { + const mapping: Record = { + created: "TASK_CREATED", + start: "TASK_START", + progress: "TASK_PROGRESS", + complete: "TASK_COMPLETE", + error: "TASK_ERROR", + cancelled: "TASK_CANCELLED", + heartbeat: "TASK_HEARTBEAT", + retry: null, // not persisted to WAL - attempt count is in tasks table + recovered: null, // internal event, not persisted + custom: "TASK_CUSTOM", + }; + return mapping[type]; +} + +/** + * Maps EventLogEntryType to TaskEventType. + */ +export function toTaskEventType(type: EventLogEntryType): TaskEventType { + const mapping: Record = { + TASK_CREATED: "created", + TASK_START: "start", + TASK_PROGRESS: "progress", + TASK_COMPLETE: "complete", + TASK_ERROR: "error", + TASK_CANCELLED: "cancelled", + TASK_HEARTBEAT: "heartbeat", + TASK_CUSTOM: "custom", + }; + + return mapping[type]; +} + +/** + * Determines if an event type should be stored in the task_events table + * + * TASK_HEARTBEAT is WAL-only, not stored in task_events table + */ +export function shouldStoreInTaskEvents(type: EventLogEntryType): boolean { + return type !== "TASK_HEARTBEAT"; +} + +/** + * Determines if an event type is relevant for recovery + * + * These events should be replayed during task recovery + */ +export function isRecoveryRelevant(type: EventLogEntryType): boolean { + return ( + type === "TASK_CREATED" || + type === "TASK_PROGRESS" || + type === "TASK_COMPLETE" || + type === "TASK_ERROR" || + type === "TASK_CANCELLED" || + type === "TASK_CUSTOM" + ); +} + +/** + * Creates a TaskEvent from a TaskEventInput with full metadata + */ +export function createTaskEvent( + input: TaskEventInput, + context: TaskEventContext, +): TaskEvent { + return { + ...input, + id: input.id || generateEventId(), + taskId: context.taskId, + name: context.name, + idempotencyKey: context.idempotencyKey, + userId: context.userId, + taskType: context.taskType, + executionOptions: context.executionOptions, + timestamp: input.timestamp || Date.now(), + }; +} + +/** + * Converts a TaskEvent to an EventLogEntry for WAL persistence + */ +export function toEventLogEntry(event: TaskEvent): EventLogEntry | null { + const entryType = toEventLogEntryType(event.type); + if (!entryType) return null; + + return { + type: entryType, + taskId: event.taskId, + idempotencyKey: event.idempotencyKey, + name: event.name, + userId: event.userId, + taskType: event.taskType, + timestamp: event.timestamp ?? Date.now(), + input: event.input, + payload: event.payload, + result: event.result, + error: event.error, + executionOptions: event.executionOptions, + }; +} + +/** + * Generates a unique event ID + */ +function generateEventId(): string { + return `evt_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`; +} diff --git a/packages/taskflow/src/domain/handler.ts b/packages/taskflow/src/domain/handler.ts new file mode 100644 index 00000000..374f5d88 --- /dev/null +++ b/packages/taskflow/src/domain/handler.ts @@ -0,0 +1,129 @@ +import type { TaskEvent, TaskEventInput } from "./events"; +import type { TaskExecutionOptions } from "./types"; + +/** + * Context provided to task handlers during execution + */ +export interface TaskHandlerContext { + /** Unique task ID */ + taskId: string; + /** Task name/template */ + name: string; + /** User ID (null for background tasks) */ + userId: string | null; + /** Idempotency key for deduplication */ + idempotencyKey: string; + /** Current attempt number (1-indexed) */ + attempt: number; + /** AbortSignal for cancellation */ + signal: AbortSignal; +} + +export interface RecoveryContext extends TaskHandlerContext { + /** Previous events from the failed execution */ + previousEvents: TaskEvent[]; + /** Reason for recovery */ + recoveryReason: "stale" | "crash" | "timeout"; + /** Time since last event in milliseconds */ + timeSinceLastEventMs: number; +} + +/** + * Result of a task handler function + */ +export type TaskHandlerResult = T | void; + +/** + * Async generator handler that can yield progress events + * + * @example + * async function* myHandler(input: MyInput, context: TaskHandlerContext) { + * yield { type: "progress", message: "Starting..." }; + * const result = await doWork(input); + * yield { type: "progress", message: "Almost done...", payload: { percent: 90 } }; + * return result; + * } + */ +export type GeneratorTaskHandler = ( + input: TInput, + context: TaskHandlerContext, +) => AsyncGenerator; + +/** + * Promise-based handler that returns a result directly + * + * @example + * async function myHandler(input: MyInput, context: TaskHandlerContext) { + * const result = await doWork(input); + * return result; + * } + */ +export type PromiseTaskHandler = ( + input: TInput, + context: TaskHandlerContext, +) => Promise>; + +/** + * Union type for all supported handler types + */ +export type TaskHandler = + | GeneratorTaskHandler + | PromiseTaskHandler; + +/** + * Recovery handler for resuming failed/stale tasks + * + * Receives previous events for smart recovery + * @example + * async function* recoveryHandler(input: MyInput, ctx: RecoveryContext) { + * // check what was already done + * const statementId = ctx.previousEvents.find(e => e.payload?.statementId)?.payload?.statementId; + * if(statementId) { + * // already created the statement, so we can resume for polling + * const result = await pollForStatementResult(statementId); + * yield { type: "complete", message: "Statement completed", result: result }; + * } else { + * // start from scratch + * yield { type: "created", message: "Starting from scratch" }; + * const result = await createStatement(input); + * yield { type: "progress", message: "Statement created", payload: { statementId: result.statementId } }; + * // now we can poll for the result + * const result = await pollForStatementResult(result.statementId); + * yield { type: "complete", message: "Statement completed", result: result }; + * } + * } + */ +export type RecoveryHandler = ( + input: TInput, + ctx: RecoveryContext, +) => AsyncGenerator; + +/** + * Task definition for registration and execution + */ +export interface TaskDefinition { + /** Unique task name */ + name: string; + /** Main execution handler */ + handler: TaskHandler; + /** Optional recovery handler for smart recovery */ + recover?: RecoveryHandler; + /** Task description for documentation */ + description?: string; + /** Default execution options */ + defaultOptions: TaskExecutionOptions; +} + +/** + * Type guard to check if a value is an AsyncGenerator + */ +export function isAsyncGenerator( + value: unknown, +): value is AsyncGenerator { + return ( + value !== null && + typeof value === "object" && + Symbol.asyncIterator in value && + typeof (value as AsyncGenerator).next === "function" + ); +} diff --git a/packages/taskflow/src/domain/index.ts b/packages/taskflow/src/domain/index.ts new file mode 100644 index 00000000..91cfdfa5 --- /dev/null +++ b/packages/taskflow/src/domain/index.ts @@ -0,0 +1,4 @@ +export * from "./events"; +export * from "./handler"; +export { Task } from "./task"; +export * from "./types"; diff --git a/packages/taskflow/src/domain/task.ts b/packages/taskflow/src/domain/task.ts new file mode 100644 index 00000000..5b40057d --- /dev/null +++ b/packages/taskflow/src/domain/task.ts @@ -0,0 +1,316 @@ +import { createHash } from "node:crypto"; +import { canonicalize } from "json-canonicalize"; +import { TaskStateError } from "@/core/errors"; +import { + isTerminalStatus, + type TaskStatus, + type TaskType, + VALID_TRANSITIONS, +} from "@/core/types"; +import type { + TaskCreationParams, + TaskExecutionOptions, + TaskJSON, + TaskRecord, +} from "./types"; + +/** + * Task entity representing a unit of work in the task system + * + * State Machine: + * - created -> running -> completed + * - created -> running -> failed + * - created -> running -> cancelled + * - created -> cancelled + * - failed -> created (via resetToPending) + */ +export class Task { + /** Unique task identifier */ + readonly id: string; + /** Task name/template */ + readonly name: string; + /** Input data for the handler */ + readonly input: unknown; + /** User ID (null for background tasks) */ + readonly userId: string | null; + /** Idempotency key for deduplication */ + readonly idempotencyKey: string; + /** Creation timestamp */ + readonly createdAt: Date; + /** Task type: user or background */ + readonly type: TaskType; + /** Execution options */ + readonly executionOptions?: TaskExecutionOptions; + + /** Mutable internal state */ + private _status: TaskStatus; + private _startedAt?: Date; + private _completedAt?: Date; + private _lastHeartbeatAt?: Date; + private _attempt: number; + private _result?: unknown; + private _error?: string; + + /** Current task status */ + get status(): TaskStatus { + return this._status; + } + + /** When the task started executing */ + get startedAt(): Date | undefined { + return this._startedAt; + } + + /** When the task completed (success, failure or cancellation) */ + get completedAt(): Date | undefined { + return this._completedAt; + } + + /** Last heartbeat timestamp (for stale detection) */ + get lastHeartbeatAt(): Date | undefined { + return this._lastHeartbeatAt; + } + + /** Current attempt number (1-indexed when running) */ + get attempt(): number { + return this._attempt; + } + + /** Task result (when completed successfully) */ + get result(): unknown | undefined { + return this._result; + } + + /** Error message (when failed) */ + get error(): string | undefined { + return this._error; + } + + /** Duration in milliseconds (if started) */ + get durationMs(): number | undefined { + if (!this._startedAt) return undefined; + const endTime = this._completedAt ?? new Date(); + return endTime.getTime() - this._startedAt.getTime(); + } + + /** Whether the task is in a terminal state */ + get isTerminal(): boolean { + return isTerminalStatus(this._status); + } + + /** Whether the task is currently running */ + get isRunning(): boolean { + return this._status === "running"; + } + + constructor(params: TaskCreationParams) { + this.id = crypto.randomUUID(); + this.name = params.name; + this.input = params.input; + this.userId = params.userId; + this.type = params.type ?? "user"; + this.executionOptions = params.executionOptions; + this.createdAt = new Date(); + this._status = "created"; + this._attempt = 0; + + this.idempotencyKey = + params.idempotencyKey ?? Task.generateIdempotencyKey(params); + } + + /** + * Transition to running state + * @throws {TaskStateError} if transition is invalid + */ + start(): void { + this.assertNotTerminal("start"); + this.assertStatus(["created"], "start"); + + this._status = "running"; + this._startedAt = new Date(); + this._lastHeartbeatAt = new Date(); + this._attempt++; + } + + /** + * Transition to completed state + * @param result Optional result data + * @throws {TaskStateError} if transition is invalid + */ + complete(result?: unknown): void { + this.assertNotTerminal("complete"); + this.assertStatus(["running"], "complete"); + + this._status = "completed"; + this._completedAt = new Date(); + this._result = result; + } + + /** + * Transition to failed state + * @param error Error message or error object + * @throws {TaskStateError} if transition is invalid + */ + fail(error: string | Error): void { + this.assertNotTerminal("fail"); + this.assertStatus(["running"], "fail"); + + this._status = "failed"; + this._completedAt = new Date(); + this._error = + error instanceof Error ? error.message : (error ?? "Unknown error"); + } + + /** + * Transition to cancelled state + * @param reason Optional cancellation reason + * @throws {TaskStateError} if transition is invalid + */ + cancel(reason?: string): void { + this.assertNotTerminal("cancel"); + this.assertStatus(["created", "running"], "cancel"); + + this._status = "cancelled"; + this._completedAt = new Date(); + this._error = reason ?? "Task cancelled"; + } + + /** + * Record a heartbeat (updates lastHeartbeatAt) + * @throws {TaskStateError} if task is not running + */ + recordHeartbeat(): void { + this.assertStatus(["running"], "recordHeartbeat"); + this._lastHeartbeatAt = new Date(); + } + + /** + * Increment the attempt counter (for retries) + * @throws {TaskStateError} if task is not running + */ + incrementAttempt(): void { + this.assertStatus(["running"], "incrementAttempt"); + this._attempt++; + } + + /** + * Reset a failed task back to created state for retry + * @throws {TaskStateError} if task is not in failed state + */ + resetToPending(): void { + this.assertStatus(["failed"], "resetToPending"); + + this._status = "created"; + this._completedAt = undefined; + this._error = undefined; + } + + /** + * Serialize task to JSON-compatible object + */ + toJSON(): TaskJSON { + const json: TaskJSON = { + id: this.id, + name: this.name, + input: this.input, + userId: this.userId, + idempotencyKey: this.idempotencyKey, + type: this.type, + status: this._status, + attempt: this._attempt, + createdAt: this.createdAt.toISOString(), + }; + + if (this._result) json.result = this._result; + if (this._error) json.error = this._error; + if (this._startedAt) json.startedAt = this._startedAt.toISOString(); + if (this._completedAt) json.completedAt = this._completedAt.toISOString(); + if (this._lastHeartbeatAt) + json.lastHeartbeatAt = this._lastHeartbeatAt.toISOString(); + if (this._startedAt !== undefined) { + const duration = this.durationMs; + if (duration !== undefined) json.durationMs = duration; + } + if (this.executionOptions) json.executionOptions = this.executionOptions; + + return json; + } + + /** + * Reconstruct a Task from a database record + */ + static fromRecord(record: TaskRecord): Task { + const task = new Task({ + name: record.name, + input: JSON.parse(record.input), + userId: record.user_id, + type: record.task_type as TaskType, + idempotencyKey: record.idempotency_key, + executionOptions: record.execution_options + ? JSON.parse(record.execution_options) + : undefined, + }); + + // override readonly properties via Object.defineProperty + Object.defineProperty(task, "id", { value: record.id }); + Object.defineProperty(task, "createdAt", { + value: new Date(record.created_at), + }); + + // restore mutable state + task._status = record.status; + task._attempt = record.attempt; + + if (record.started_at) task._startedAt = new Date(record.started_at); + if (record.completed_at) task._completedAt = new Date(record.completed_at); + if (record.last_heartbeat_at) + task._lastHeartbeatAt = new Date(record.last_heartbeat_at); + if (record.result) task._result = JSON.parse(record.result); + task._error = record.error ?? undefined; + + return task; + } + + /** + * Generate a deterministic idempotency key from task parameters + * Uses json-canonicalize fro consistent key ordering + */ + static generateIdempotencyKey(params: TaskCreationParams): string { + const payload = { + name: params.name, + input: params.input, + userId: params.userId, + }; + return createHash("sha256").update(canonicalize(payload)).digest("hex"); + } + + /** + * Assert that the task is not in a terminal state + * @throws {TaskStateError} if task is terminal + */ + private assertNotTerminal(action: string): void { + if (this.isTerminal) { + throw new TaskStateError( + `Cannot ${action} a terminal task`, + this._status, + undefined, + VALID_TRANSITIONS[this._status], + ); + } + } + + /** + * Assert that the task is one of the allowed states + * @throws {TaskStateError} if task is not in the allowed states + */ + private assertStatus(allowed: TaskStatus[], action: string): void { + if (!allowed.includes(this._status)) { + throw new TaskStateError( + `Cannot ${action} from state ${this._status}, allowed: ${allowed.join(", ")}`, + this._status, + undefined, + allowed, + ); + } + } +} diff --git a/packages/taskflow/src/domain/types.ts b/packages/taskflow/src/domain/types.ts new file mode 100644 index 00000000..57732a2e --- /dev/null +++ b/packages/taskflow/src/domain/types.ts @@ -0,0 +1,99 @@ +import type { TaskStatus, TaskType } from "@/core/types"; + +/** + * Options for task execution behavior + */ +export interface TaskExecutionOptions { + /** override the default retry configuration */ + maxRetries?: number; + /** override the default timeout in milliseconds */ + timeoutMs?: number; +} + +/** + * Parameters for creating a new task + */ +export interface TaskCreationParams { + /** The registered task name/template */ + name: string; + /** Input data for the task handler */ + input: unknown; + /** User ID for user-initiated tasks, null for background tasks */ + userId: string | null; + /** Task type: user or background */ + type?: TaskType; + /** Execution options for the task */ + executionOptions?: TaskExecutionOptions; + /** Custom idempotency key (auto-generated if not provided) */ + idempotencyKey?: string; +} + +/** + * Row in `tasks` table + * Uses snake_case to match SQL database conventions + */ +export interface TaskRecord { + id: string; + name: string; + idempotency_key: string; + user_id: string | null; + task_type: TaskType; + status: TaskStatus; + input: string; + result: string | null; + error: string | null; + attempt: number; + created_at: string; // ISO timestamp + started_at: string | null; // ISO timestamp + completed_at: string | null; // ISO timestamp + last_heartbeat_at: string | null; // ISO timestamp + execution_options: string | null; // JSON stringified +} + +/** + * Event types stored in task_events table + * Subset of EventLogEntryType - only recovery-relevant events + */ +export type StoredEventType = + | "TASK_CREATED" + | "TASK_START" + | "TASK_PROGRESS" + | "TASK_COMPLETE" + | "TASK_ERROR" + | "TASK_CANCELLED" + | "TASK_CUSTOM"; + +/** + * Row in `task_events` table + * Uses snake_case to match SQL database conventions + */ +export interface TaskEventRecord { + id: string; + task_id: string; + idempotency_key: string; + event_type: StoredEventType; + payload: string | null; // JSON stringified + created_at: string; // ISO timestamp +} + +/** + * Serialized task representation for JSON responses + */ +export interface TaskJSON { + id: string; + name: string; + input: unknown; + userId: string | null; + idempotencyKey: string; + type: TaskType; + status: TaskStatus; + attempt: number; + result?: unknown; + error?: string; + createdAt: string; + startedAt?: string; + completedAt?: string; + lastHeartbeatAt?: string; + durationMs?: number; + executionOptions?: TaskExecutionOptions; +} diff --git a/packages/taskflow/src/tests/domain/events.test.ts b/packages/taskflow/src/tests/domain/events.test.ts new file mode 100644 index 00000000..a1525cb1 --- /dev/null +++ b/packages/taskflow/src/tests/domain/events.test.ts @@ -0,0 +1,266 @@ +import { describe, expect, it } from "vitest"; +import { + createTaskEvent, + isRecoveryRelevant, + shouldStoreInTaskEvents, + type TaskEvent, + type TaskEventContext, + type TaskEventInput, + toEventLogEntry, + toEventLogEntryType, + toTaskEventType, +} from "@/domain/events"; + +describe("Domain Events", () => { + describe("toEventLogEntryType", () => { + it("should map created to TASK_CREATED", () => { + expect(toEventLogEntryType("created")).toBe("TASK_CREATED"); + }); + it("should map start to TASK_START", () => { + expect(toEventLogEntryType("start")).toBe("TASK_START"); + }); + it("should map progress to TASK_PROGRESS", () => { + expect(toEventLogEntryType("progress")).toBe("TASK_PROGRESS"); + }); + it("should map complete to TASK_COMPLETE", () => { + expect(toEventLogEntryType("complete")).toBe("TASK_COMPLETE"); + }); + it("should map error to TASK_ERROR", () => { + expect(toEventLogEntryType("error")).toBe("TASK_ERROR"); + }); + it("should map cancelled to TASK_CANCELLED", () => { + expect(toEventLogEntryType("cancelled")).toBe("TASK_CANCELLED"); + }); + it("should map heartbeat to TASK_HEARTBEAT", () => { + expect(toEventLogEntryType("heartbeat")).toBe("TASK_HEARTBEAT"); + }); + it("should map custom to TASK_CUSTOM", () => { + expect(toEventLogEntryType("custom")).toBe("TASK_CUSTOM"); + }); + it("should return null for retry (not persisted to WAL)", () => { + expect(toEventLogEntryType("retry")).toBeNull(); + }); + it("should return null for recovered (internal event)", () => { + expect(toEventLogEntryType("recovered")).toBeNull(); + }); + }); + + describe("toTaskEventType", () => { + it("should map TASK_CREATED to created", () => { + expect(toTaskEventType("TASK_CREATED")).toBe("created"); + }); + it("should map TASK_START to start", () => { + expect(toTaskEventType("TASK_START")).toBe("start"); + }); + it("should map TASK_PROGRESS to progress", () => { + expect(toTaskEventType("TASK_PROGRESS")).toBe("progress"); + }); + it("should map TASK_COMPLETE to complete", () => { + expect(toTaskEventType("TASK_COMPLETE")).toBe("complete"); + }); + it("should map TASK_ERROR to error", () => { + expect(toTaskEventType("TASK_ERROR")).toBe("error"); + }); + it("should map TASK_CANCELLED to cancelled", () => { + expect(toTaskEventType("TASK_CANCELLED")).toBe("cancelled"); + }); + it("should map TASK_HEARTBEAT to heartbeat", () => { + expect(toTaskEventType("TASK_HEARTBEAT")).toBe("heartbeat"); + }); + it("should map TASK_CUSTOM to custom", () => { + expect(toTaskEventType("TASK_CUSTOM")).toBe("custom"); + }); + }); + describe("shouldStoreInTaskEvents", () => { + it("should return true for TASK_CREATED", () => { + expect(shouldStoreInTaskEvents("TASK_CREATED")).toBe(true); + }); + it("should return true for TASK_START", () => { + expect(shouldStoreInTaskEvents("TASK_START")).toBe(true); + }); + it("should return true for TASK_PROGRESS", () => { + expect(shouldStoreInTaskEvents("TASK_PROGRESS")).toBe(true); + }); + it("should return true for TASK_COMPLETE", () => { + expect(shouldStoreInTaskEvents("TASK_COMPLETE")).toBe(true); + }); + it("should return true for TASK_ERROR", () => { + expect(shouldStoreInTaskEvents("TASK_ERROR")).toBe(true); + }); + it("should return true for TASK_CANCELLED", () => { + expect(shouldStoreInTaskEvents("TASK_CANCELLED")).toBe(true); + }); + it("should return true for TASK_CUSTOM", () => { + expect(shouldStoreInTaskEvents("TASK_CUSTOM")).toBe(true); + }); + it("should return false for TASK_HEARTBEAT (WAL only)", () => { + expect(shouldStoreInTaskEvents("TASK_HEARTBEAT")).toBe(false); + }); + }); + + describe("isRecoveryRelevant", () => { + it("should return true for TASK_CREATED", () => { + expect(isRecoveryRelevant("TASK_CREATED")).toBe(true); + }); + it("should return true for TASK_PROGRESS", () => { + expect(isRecoveryRelevant("TASK_PROGRESS")).toBe(true); + }); + it("should return true for TASK_COMPLETE", () => { + expect(isRecoveryRelevant("TASK_COMPLETE")).toBe(true); + }); + it("should return true for TASK_ERROR", () => { + expect(isRecoveryRelevant("TASK_ERROR")).toBe(true); + }); + it("should return true for TASK_CANCELLED", () => { + expect(isRecoveryRelevant("TASK_CANCELLED")).toBe(true); + }); + it("should return true for TASK_CUSTOM", () => { + expect(isRecoveryRelevant("TASK_CUSTOM")).toBe(true); + }); + it("should return false for TASK_HEARTBEAT (WAL only)", () => { + expect(isRecoveryRelevant("TASK_HEARTBEAT")).toBe(false); + }); + }); + + describe("createTaskEvent", () => { + it("should create a full TaskEvent from input and context", () => { + const input: TaskEventInput = { + type: "progress", + message: "Running query", + payload: { percent: 50 }, + }; + const context: TaskEventContext = { + taskId: "123", + name: "my-task", + idempotencyKey: "abc123", + userId: "user123", + taskType: "user", + }; + const event = createTaskEvent(input, context); + + expect(event.type).toBe("progress"); + expect(event.message).toBe("Running query"); + expect(event.payload).toEqual({ percent: 50 }); + expect(event.taskId).toBe("123"); + expect(event.name).toBe("my-task"); + expect(event.idempotencyKey).toBe("abc123"); + expect(event.userId).toBe("user123"); + expect(event.taskType).toBe("user"); + expect(event.id).toMatch(/^evt_/); + expect(event.timestamp).toBeGreaterThan(0); + }); + + it("should use provided event ID if given", () => { + const input: TaskEventInput = { + id: "my-event-id", + type: "complete", + }; + const context: TaskEventContext = { + taskId: "123", + name: "my-task", + idempotencyKey: "abc123", + userId: "user123", + taskType: "user", + }; + const event = createTaskEvent(input, context); + expect(event.id).toBe("my-event-id"); + }); + + it("should include executionOptions from context", () => { + const input: TaskEventInput = { + type: "progress", + message: "Running query", + payload: { percent: 50 }, + }; + const context: TaskEventContext = { + taskId: "123", + name: "my-task", + idempotencyKey: "abc123", + userId: "user123", + taskType: "user", + executionOptions: { + maxRetries: 3, + timeoutMs: 10000, + }, + }; + const event = createTaskEvent(input, context); + expect(event.executionOptions).toEqual({ + maxRetries: 3, + timeoutMs: 10000, + }); + }); + }); + + describe("toEventLogEntry", () => { + it("should convert TaskEvent to EventLogEntry (for WAL persistence)", () => { + const event: TaskEvent = { + id: "evt_123", + taskId: "123", + name: "my-task", + type: "complete", + idempotencyKey: "abc123", + userId: "user123", + taskType: "user", + timestamp: Date.now(), + result: { data: "success" }, + }; + const entry = toEventLogEntry(event); + + expect(entry).not.toBeNull(); + expect(entry?.type).toBe("TASK_COMPLETE"); + expect(entry?.taskId).toBe("123"); + expect(entry?.result).toEqual({ data: "success" }); + }); + + it("should return null for retry events (not persisted to WAL)", () => { + const event: TaskEvent = { + id: "evt_123", + taskId: "123", + name: "my-task", + type: "retry", + idempotencyKey: "abc123", + userId: "user123", + taskType: "user", + timestamp: Date.now(), + nextRetryDelayMs: 1000, + }; + expect(toEventLogEntry(event)).toBeNull(); + }); + + it("should return null for recovered events (internal event)", () => { + const event: TaskEvent = { + id: "evt_123", + taskId: "123", + name: "my-task", + type: "recovered", + idempotencyKey: "abc123", + userId: "user123", + taskType: "user", + }; + + expect(toEventLogEntry(event)).toBeNull(); + }); + + it("should include executionOptions in entry", () => { + const event: TaskEvent = { + id: "evt_123", + taskId: "123", + name: "my-task", + type: "created", + idempotencyKey: "abc123", + userId: "user123", + taskType: "user", + timestamp: Date.now(), + executionOptions: { + maxRetries: 3, + timeoutMs: 10000, + }, + }; + const entry = toEventLogEntry(event); + expect(entry?.executionOptions).toEqual({ + maxRetries: 3, + timeoutMs: 10000, + }); + }); + }); +}); diff --git a/packages/taskflow/src/tests/domain/handler.test.ts b/packages/taskflow/src/tests/domain/handler.test.ts new file mode 100644 index 00000000..97e5278a --- /dev/null +++ b/packages/taskflow/src/tests/domain/handler.test.ts @@ -0,0 +1,98 @@ +import { describe, expect, it } from "vitest"; +import { + type GeneratorTaskHandler, + isAsyncGenerator, + type PromiseTaskHandler, + type TaskHandlerContext, +} from "@/domain/handler"; + +describe("Task Handler", () => { + const controller = new AbortController(); + const mockedHandler: TaskHandlerContext = { + taskId: "123", + name: "my-task", + userId: "user123", + idempotencyKey: "abc123", + attempt: 1, + signal: controller.signal, + }; + describe("Handler Types", () => { + it("should type check GeneratorTaskHandler", async () => { + const handler: GeneratorTaskHandler<{ value: number }, string> = + async function* (input, _ctx) { + yield { + type: "progress", + message: `Processing: ${input.value}`, + }; + return "done"; + }; + + const gen = handler({ value: 42 }, mockedHandler); + const first = await gen.next(); + expect(first.done).toBe(false); + expect(first.value).toEqual({ + type: "progress", + message: "Processing: 42", + }); + + const second = await gen.next(); + expect(second.done).toBe(true); + expect(second.value).toBe("done"); + }); + + it("should type check PromiseTaskHandler", async () => { + const handler: PromiseTaskHandler<{ value: number }, string> = async ( + input, + _ctx, + ) => { + return `Result: ${input.value}`; + }; + + const result = await handler({ value: 42 }, mockedHandler); + expect(result).toBe("Result: 42"); + }); + + it("should allow void return in PromiseTaskHandler", async () => { + const handler: PromiseTaskHandler<{ value: number }, void> = async ( + _input, + _ctx, + ) => { + // no return value + }; + + const result = await handler({ value: 42 }, mockedHandler); + expect(result).toBeUndefined(); + }); + }); + describe("isAsyncGenerator", () => { + it("should return true for async generator", async () => { + async function* gen() { + yield 1; + } + const g = gen(); + expect(isAsyncGenerator(g)).toBe(true); + }); + + it("should return false for regular promise", () => { + const p = Promise.resolve(1); + expect(isAsyncGenerator(p)).toBe(false); + }); + + it("should return false for null", () => { + expect(isAsyncGenerator(null)).toBe(false); + }); + + it("should return false for undefined", () => { + expect(isAsyncGenerator(undefined)).toBe(false); + }); + + it("should return false for plain object", () => { + expect(isAsyncGenerator({})).toBe(false); + }); + + it("should return false for object with only Symbol.asyncIterator", () => { + const fake = { [Symbol.asyncIterator]: () => {} }; + expect(isAsyncGenerator(fake)).toBe(false); // missing .next() + }); + }); +}); diff --git a/packages/taskflow/src/tests/domain/task.test.ts b/packages/taskflow/src/tests/domain/task.test.ts new file mode 100644 index 00000000..79940701 --- /dev/null +++ b/packages/taskflow/src/tests/domain/task.test.ts @@ -0,0 +1,614 @@ +import { describe, expect, it } from "vitest"; +import { TaskStateError } from "@/core/errors"; +import type { TaskRecord } from "@/domain"; +import { Task } from "@/domain/task"; + +describe("Task", () => { + describe("Constructor", () => { + it("should create a task with default values", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + + expect(task.id).toBeDefined(); + expect(task.name).toBe("my-task"); + expect(task.input).toEqual({ value: 42 }); + expect(task.userId).toBe("user123"); + expect(task.type).toBe("user"); + expect(task.status).toBe("created"); + expect(task.attempt).toBe(0); + expect(task.createdAt).toBeDefined(); + expect(task.idempotencyKey).toBeDefined(); + }); + + it("should create a task with custom type (background)", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: null, + type: "background", + }); + + expect(task.type).toBe("background"); + expect(task.userId).toBeNull(); + }); + + it("should use provided idempotencyKey", () => { + const task = new Task({ + name: "my-task", + input: {}, + userId: "user123", + idempotencyKey: "abc123", + }); + expect(task.idempotencyKey).toBe("abc123"); + }); + + it("should generate deterministic idempotencyKey", () => { + const params = { + name: "my-task", + input: { value: 42 }, + userId: "user123", + }; + const task1 = new Task(params); + const task2 = new Task(params); + expect(task1.idempotencyKey).toBe(task2.idempotencyKey); + }); + + it("should generate different idempotencykey for different inputs", () => { + const task1 = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + const task2 = new Task({ + name: "my-task", + input: { value: 43 }, + userId: "user123", + }); + expect(task1.idempotencyKey).not.toBe(task2.idempotencyKey); + }); + + it("should generate same idempotencyKey regardless of object key order", () => { + const task1 = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + const task2 = new Task({ + name: "my-task", + userId: "user123", + input: { value: 42 }, + }); + expect(task1.idempotencyKey).toBe(task2.idempotencyKey); + }); + }); + + describe("start()", () => { + it("should transition from created to running", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + task.start(); + expect(task.status).toBe("running"); + expect(task.attempt).toBe(1); + expect(task.startedAt).toBeDefined(); + expect(task.lastHeartbeatAt).toBeDefined(); + }); + + it("should throw if task is already running", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + task.start(); + expect(() => task.start()).toThrow(TaskStateError); + expect(() => task.start()).toThrow( + "Cannot start from state running, allowed: created", + ); + }); + + it("should throw if task is in terminal state", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + task.start(); + task.complete(); + + expect(() => task.start()).toThrow(TaskStateError); + expect(() => task.start()).toThrow("Cannot start a terminal task"); + }); + }); + describe("complete()", () => { + it("should transition from running to completed", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + + task.start(); + task.complete({ data: "result" }); + expect(task.status).toBe("completed"); + expect(task.completedAt).toBeDefined(); + expect(task.result).toEqual({ data: "result" }); + expect(task.isTerminal).toBe(true); + }); + + it("should work without result", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + task.start(); + task.complete(); + expect(task.status).toBe("completed"); + expect(task.result).toBeUndefined(); + }); + + it("should throw if task is not running", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + task.start(); + task.complete(); + expect(() => task.complete()).toThrow(TaskStateError); + expect(() => task.complete()).toThrow("Cannot complete a terminal task"); + }); + + it("should throw if task is already completed", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + task.start(); + task.complete(); + expect(() => task.complete()).toThrow(TaskStateError); + expect(() => task.complete()).toThrow("Cannot complete a terminal task"); + }); + }); + describe("fail()", () => { + it("should transition from running to failed with string error", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + task.start(); + task.fail("test error"); + expect(task.status).toBe("failed"); + expect(task.completedAt).toBeDefined(); + expect(task.error).toBe("test error"); + expect(task.isTerminal).toBe(true); + }); + + it("should extract message from Error object", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + task.start(); + task.fail(new Error("test error")); + expect(task.status).toBe("failed"); + }); + it("should throw if task is not running", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + expect(() => task.fail("error")).toThrow(TaskStateError); + expect(() => task.fail("error")).toThrow( + "Cannot fail from state created, allowed: running", + ); + }); + }); + + describe("cancel()", () => { + it("should transition from created to cancelled", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + task.cancel("test reason"); + expect(task.status).toBe("cancelled"); + expect(task.error).toBe("test reason"); + expect(task.isTerminal).toBe(true); + }); + it("should transition from running to cancelled", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + task.start(); + task.cancel("test reason"); + expect(task.status).toBe("cancelled"); + expect(task.error).toBe("test reason"); + }); + + it("should throw if task is in terminal state", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + task.start(); + task.complete(); + expect(() => task.cancel("test reason")).toThrow(TaskStateError); + }); + }); + + describe("recordHeartbeat()", () => { + it("should update lastHeartbeatAt", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + task.start(); + task.recordHeartbeat(); + expect(task.lastHeartbeatAt).toBeInstanceOf(Date); + }); + + it("should throw if task is not running", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + expect(() => task.recordHeartbeat()).toThrow( + "Cannot recordHeartbeat from state created, allowed: running", + ); + }); + }); + + describe("incrementAttempt()", () => { + it("should increment attempt counter", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + task.start(); + expect(task.attempt).toBe(1); + task.incrementAttempt(); + expect(task.attempt).toBe(2); + + task.incrementAttempt(); + expect(task.attempt).toBe(3); + }); + + it("should throw if task is not running", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + + expect(() => task.incrementAttempt()).toThrow(TaskStateError); + }); + }); + + describe("resetToPending()", () => { + it("should reset failed task to created", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + + task.start(); + task.fail("error"); + + expect(task.status).toBe("failed"); + expect(task.error).toBe("error"); + task.resetToPending(); + expect(task.status).toBe("created"); + expect(task.error).toBeUndefined(); + expect(task.completedAt).toBeUndefined(); + }); + + it("should throw if task is not failed", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + expect(() => task.resetToPending()).toThrow(TaskStateError); + task.start(); + expect(() => task.resetToPending()).toThrow(TaskStateError); + }); + }); + + describe("computed properties", () => { + describe("durationMs", () => { + it("should return undefined if task not started", async () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + + task.start(); + await new Promise((resolve) => setTimeout(resolve, 50)); + task.complete(); + + expect(task.durationMs).toBeGreaterThanOrEqual(50); + }); + + it("should calculate running duration for active tasks", async () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + task.start(); + await new Promise((resolve) => setTimeout(resolve, 30)); + expect(task.durationMs).toBeGreaterThanOrEqual(30); + }); + }); + + describe("isTerminal", () => { + it("should return false for created and running", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + expect(task.isTerminal).toBe(false); + task.start(); + expect(task.isTerminal).toBe(false); + }); + + it("should return true for completed, failed, cancelled", () => { + const task1 = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + + task1.start(); + task1.complete(); + expect(task1.isTerminal).toBe(true); + + const task2 = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + + task2.start(); + task2.fail("error"); + expect(task2.isTerminal).toBe(true); + + const task3 = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + task3.cancel(); + expect(task3.isTerminal).toBe(true); + }); + }); + + describe("isRunning", () => { + it("should return true only when running", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + expect(task.isRunning).toBe(false); + task.start(); + expect(task.isRunning).toBe(true); + task.complete(); + expect(task.isRunning).toBe(false); + }); + }); + }); + + describe("toJSON", () => { + it("should serialize task to JSON", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + type: "background", + }); + task.start(); + task.complete({ result: "done" }); + const json = task.toJSON(); + expect(json.id).toBe(task.id); + expect(json.name).toBe("my-task"); + expect(json.input).toEqual({ value: 42 }); + expect(json.userId).toBe("user123"); + expect(json.type).toBe("background"); + expect(json.status).toBe("completed"); + expect(json.result).toEqual({ result: "done" }); + expect(typeof json.createdAt).toBe("string"); + expect(typeof json.startedAt).toBe("string"); + expect(typeof json.completedAt).toBe("string"); + expect(typeof json.durationMs).toBe("number"); + }); + }); + + describe("generateIdempotencyKey", () => { + it("should generate consistent key for same params", () => { + const params = { + name: "my-task", + input: { value: 42 }, + userId: "user123", + }; + const key1 = Task.generateIdempotencyKey(params); + const key2 = Task.generateIdempotencyKey(params); + expect(key1).toBe(key2); + expect(key1).toHaveLength(64); + }); + + it("should handle null userId", () => { + const key = Task.generateIdempotencyKey({ + name: "my-task", + input: { value: 42 }, + userId: null, + }); + expect(key).toHaveLength(64); + }); + }); + + describe("fromRecord()", () => { + it("should reconstruct Task from database record", () => { + const record: TaskRecord = { + id: "task123", + name: "my-task", + idempotency_key: "abc123", + user_id: "user123", + task_type: "user", + status: "completed", + input: JSON.stringify({ value: 42 }), + result: JSON.stringify({ result: "done" }), + error: null, + attempt: 1, + created_at: new Date().toISOString(), + started_at: new Date().toISOString(), + completed_at: new Date().toISOString(), + last_heartbeat_at: new Date().toISOString(), + execution_options: JSON.stringify({ concurrency: 1 }), + }; + + const task = Task.fromRecord(record); + expect(task.id).toBe("task123"); + expect(task.name).toBe("my-task"); + expect(task.idempotencyKey).toBe("abc123"); + expect(task.userId).toBe("user123"); + expect(task.type).toBe("user"); + expect(task.status).toBe("completed"); + expect(task.input).toEqual({ value: 42 }); + expect(task.result).toEqual({ result: "done" }); + expect(task.error).toBeUndefined(); + }); + + it("should restore all mutable state", () => { + const record: TaskRecord = { + id: "task-abc123", + name: "test-task", + idempotency_key: "idem-key", + user_id: "user-123", + task_type: "background", + status: "failed", + input: "{}", + result: null, + error: "Something went wrong", + attempt: 3, + created_at: "2024-01-01T00:00:00.000Z", + started_at: "2024-01-01T00:00:01.000Z", + completed_at: "2024-01-01T00:00:05.000Z", + last_heartbeat_at: "2024-01-01T00:00:04.000Z", + execution_options: '{"maxRetries":5}', + }; + + const task = Task.fromRecord(record); + + expect(task.type).toBe("background"); + expect(task.status).toBe("failed"); + expect(task.attempt).toBe(3); + expect(task.error).toBe("Something went wrong"); + expect(task.startedAt).toBeInstanceOf(Date); + expect(task.completedAt).toBeInstanceOf(Date); + expect(task.lastHeartbeatAt).toBeInstanceOf(Date); + expect(task.executionOptions).toEqual({ maxRetries: 5 }); + }); + }); + describe("state machine transitions", () => { + it("should follow valid state transitions", () => { + // created -> running -> completed + const task1 = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + expect(task1.status).toBe("created"); + task1.start(); + expect(task1.status).toBe("running"); + task1.complete(); + expect(task1.status).toBe("completed"); + + // created -> running -> failed + const task2 = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + expect(task2.status).toBe("created"); + task2.start(); + task2.fail("error"); + expect(task2.status).toBe("failed"); + + // created -> running -> cancelled + const task3 = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + task3.start(); + task3.cancel("test reason"); + expect(task3.status).toBe("cancelled"); + + // created -> cancelled + const task4 = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + task4.cancel("test reason"); + expect(task4.status).toBe("cancelled"); + + // failed -> created (via resetToPending) + const task5 = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + task5.start(); + task5.fail("error"); + expect(task5.status).toBe("failed"); + task5.resetToPending(); + expect(task5.status).toBe("created"); + }); + + it("should reject invalid state transitions", () => { + const task = new Task({ + name: "my-task", + input: { value: 42 }, + userId: "user123", + }); + + // created -> complete (invalid, must be running) + expect(() => task.complete()).toThrow(TaskStateError); + + // created -> fail (invalid, must be running) + expect(() => task.fail("error")).toThrow(TaskStateError); + + task.start(); + task.complete(); + + // completed -> anything (invalid, terminal) + expect(() => task.start()).toThrow(TaskStateError); + expect(() => task.complete()).toThrow(TaskStateError); + expect(() => task.fail("error")).toThrow(TaskStateError); + expect(() => task.cancel()).toThrow(TaskStateError); + }); + }); +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 14988671..371f9e9d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -508,6 +508,10 @@ importers: version: 11.2.0 packages/taskflow: + dependencies: + json-canonicalize: + specifier: ^2.0.0 + version: 2.0.0 devDependencies: vitest: specifier: ^3.2.4 @@ -7570,6 +7574,9 @@ packages: json-buffer@3.0.1: resolution: {integrity: sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==} + json-canonicalize@2.0.0: + resolution: {integrity: sha512-yyrnK/mEm6Na3ChbJUWueXdapueW0p380RUyTW87XGb1ww8l8hU0pRrGC3vSWHe9CxrbPHX2fGUOZpNiHR0IIg==} + json-parse-even-better-errors@2.3.1: resolution: {integrity: sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==} @@ -19579,6 +19586,8 @@ snapshots: json-buffer@3.0.1: {} + json-canonicalize@2.0.0: {} + json-parse-even-better-errors@2.3.1: {} json-schema-traverse@0.4.1: {} From 001c088f6baf27b4f243c8da482a1b93f053c63a Mon Sep 17 00:00:00 2001 From: Ditadi Date: Mon, 26 Jan 2026 14:23:56 +0000 Subject: [PATCH 05/13] feat(taskflow): guard layer with validator, guard types and branded IDs --- packages/taskflow/src/core/branded.ts | 108 +++++++ packages/taskflow/src/core/errors.ts | 103 ++++++ packages/taskflow/src/domain/events.ts | 38 ++- packages/taskflow/src/domain/task.ts | 33 +- packages/taskflow/src/domain/types.ts | 7 +- packages/taskflow/src/guard/types.ts | 256 +++++++++++++++ packages/taskflow/src/guard/validator.ts | 196 ++++++++++++ .../taskflow/src/tests/domain/events.test.ts | 71 +++-- .../taskflow/src/tests/domain/task.test.ts | 175 ++++++----- .../src/tests/guard/validator.test.ts | 296 ++++++++++++++++++ 10 files changed, 1137 insertions(+), 146 deletions(-) create mode 100644 packages/taskflow/src/core/branded.ts create mode 100644 packages/taskflow/src/guard/types.ts create mode 100644 packages/taskflow/src/guard/validator.ts create mode 100644 packages/taskflow/src/tests/guard/validator.test.ts diff --git a/packages/taskflow/src/core/branded.ts b/packages/taskflow/src/core/branded.ts new file mode 100644 index 00000000..b9297c67 --- /dev/null +++ b/packages/taskflow/src/core/branded.ts @@ -0,0 +1,108 @@ +import { z } from "zod"; + +/** + * Branded types for Type-Safe IDs + * + * Branded types prevent accidentally mixing up different types of IDs + * at compile time, while remaining plain strings at runtime. + */ + +/** + * Creates a branded type - a string that carries a type-level tag. + */ +declare const __brand: unique symbol; +type Brand = T & { readonly [__brand]: B }; + +/** + * Task ID = unique identifier for a task. + */ +export type TaskId = Brand; + +/** + * Task Name = registered task handler name. + */ +export type TaskName = Brand; + +/** + * Idempotency key = used for task deduplication. + */ +export type IdempotencyKey = Brand; + +/** + * User ID = identifies the user who created the task. + */ +export type UserId = Brand; + +/** + * Event ID = unique identifier for a task event. + */ +export type EventId = Brand; + +/** + * Creates a TaskName from a string + */ +export function taskName(value: string): TaskName { + return value as TaskName; +} + +/** + * Creates a TaskId from a string + */ +export function taskId(value: string): TaskId { + return value as TaskId; +} + +/** + * Creates an IdempotencyKey from a string + */ +export function idempotencyKey(value: string): IdempotencyKey { + return value as IdempotencyKey; +} + +/** + * Creates a UserId from a string + */ +export function userId(value: string | null): UserId | null { + return value as UserId | null; +} + +/** + * Creates an EventId from a string + */ +export function eventId(value: string): EventId { + return value as EventId; +} + +/** + * Checks if a value is a non-empty string (basic validation) + */ +export function isValidId(value: string): value is Brand { + return typeof value === "string" && value.trim() !== ""; +} + +export const TaskNameSchema = z + .string() + .min(1) + .max(256) + .transform((value) => value as TaskName); + +export const TaskIdSchema = z + .string() + .min(1) + .transform((value) => value as TaskId); + +export const IdempotencyKeySchema = z + .string() + .length(64) + .transform((value) => value as IdempotencyKey); + +export const UserIdSchema = z + .string() + .min(1) + .max(256) + .transform((value) => value as UserId); + +export const EventIdSchema = z + .string() + .min(1) + .transform((value) => value as EventId); diff --git a/packages/taskflow/src/core/errors.ts b/packages/taskflow/src/core/errors.ts index 58f5846e..507fc107 100644 --- a/packages/taskflow/src/core/errors.ts +++ b/packages/taskflow/src/core/errors.ts @@ -71,6 +71,19 @@ export class TaskSystemError extends Error { } } + /** + * Type guard to check if an error is a TaskSystemError + */ + + static is(value: unknown): value is TaskSystemError { + return ( + value !== null && + typeof value === "object" && + "name" in value && + (value as Error).name === "TaskSystemError" + ); + } + /** * Serializes the error to a JSON-compatible object */ @@ -103,6 +116,15 @@ export class ValidationError extends TaskSystemError { this.name = "ValidationError"; this.field = field; } + + static is(value: unknown): value is ValidationError { + return ( + value !== null && + typeof value === "object" && + "name" in value && + (value as Error).name === "ValidationError" + ); + } } /** @@ -119,6 +141,15 @@ export class ConfigValidationError extends TaskSystemError { this.name = "ConfigValidationError"; this.configPath = configPath; } + + static is(value: unknown): value is ConfigValidationError { + return ( + value !== null && + typeof value === "object" && + "name" in value && + (value as Error).name === "ConfigValidationError" + ); + } } /** @@ -142,6 +173,15 @@ export class NotFoundError extends TaskSystemError { this.name = "NotFoundError"; this.resourceType = resourceType; } + + static is(value: unknown): value is NotFoundError { + return ( + value !== null && + typeof value === "object" && + "name" in value && + (value as Error).name === "NotFoundError" + ); + } } /** @@ -152,6 +192,15 @@ export class ConflictError extends TaskSystemError { super(message, ErrorCodes.CONFLICT, context); this.name = "ConflictError"; } + + static is(value: unknown): value is ConflictError { + return ( + value !== null && + typeof value === "object" && + "name" in value && + (value as Error).name === "ConflictError" + ); + } } /** @@ -179,6 +228,15 @@ export class TaskStateError extends TaskSystemError { this.attemptedTransition = attemptedTransition; this.validTransitions = validTransitions; } + + static is(value: unknown): value is TaskStateError { + return ( + value !== null && + typeof value === "object" && + "name" in value && + (value as Error).name === "TaskStateError" + ); + } } /** @@ -191,6 +249,15 @@ export class SlotTimeoutError extends TaskSystemError { this.name = "SlotTimeoutError"; this.timeoutMs = timeoutMs; } + + static is(value: unknown): value is SlotTimeoutError { + return ( + value !== null && + typeof value === "object" && + "name" in value && + (value as Error).name === "SlotTimeoutError" + ); + } } /** @@ -234,6 +301,15 @@ export class BackpressureError extends TaskSystemError { this.retryAfterMs = retryAfterMs; } + static is(value: unknown): value is BackpressureError { + return ( + value !== null && + typeof value === "object" && + "name" in value && + (value as Error).name === "BackpressureError" + ); + } + toHTTPResponse(): HTTP429Response { return { status: 429, @@ -262,6 +338,15 @@ export class InitializationError extends TaskSystemError { this.name = "InitializationError"; this.component = component; } + + static is(value: unknown): value is InitializationError { + return ( + value !== null && + typeof value === "object" && + "name" in value && + (value as Error).name === "InitializationError" + ); + } } /** @@ -291,6 +376,15 @@ export class RetryExhaustedError extends TaskSystemError { this.attempts = attempts; this.maxAttempts = maxAttempts; } + + static is(value: unknown): value is RetryExhaustedError { + return ( + value !== null && + typeof value === "object" && + "name" in value && + (value as Error).name === "RetryExhaustedError" + ); + } } /** @@ -301,6 +395,15 @@ export class StreamOverflowError extends TaskSystemError { super(message, ErrorCodes.STREAM_OVERFLOW, context); this.name = "StreamOverflowError"; } + + static is(value: unknown): value is StreamOverflowError { + return ( + value !== null && + typeof value === "object" && + "name" in value && + (value as Error).name === "StreamOverflowError" + ); + } } // Known retryable error patterns diff --git a/packages/taskflow/src/domain/events.ts b/packages/taskflow/src/domain/events.ts index 2a48ea2a..f37337bc 100644 --- a/packages/taskflow/src/domain/events.ts +++ b/packages/taskflow/src/domain/events.ts @@ -1,3 +1,11 @@ +import { + type EventId, + eventId, + type IdempotencyKey, + type TaskId, + type TaskName, + type UserId, +} from "@/core/branded"; import type { TaskType } from "@/core/types"; import type { TaskExecutionOptions } from "./types"; @@ -58,13 +66,13 @@ export interface TaskEventInput { */ export interface TaskEventContext { /** Unique task ID */ - taskId: string; + taskId: TaskId; /** Task name/template */ - name: string; + name: TaskName; /** Idempotency key for deduplication */ - idempotencyKey: string; + idempotencyKey: IdempotencyKey; /** User ID */ - userId: string; + userId: UserId | null; /** Task type */ taskType: TaskType; /** Execution options */ @@ -76,15 +84,15 @@ export interface TaskEventContext { */ export interface TaskEvent extends TaskEventInput { /** Unique event ID */ - id: string; + id: EventId; /** Task ID this event belongs to */ - taskId: string; + taskId: TaskId; /** Task name/template */ - name: string; + name: TaskName; /** Idempotency key for the task */ - idempotencyKey: string; + idempotencyKey: IdempotencyKey; /** User ID */ - userId: string; + userId: UserId | null; /** Task type */ taskType: TaskType; /** Task input (included for context) */ @@ -116,7 +124,7 @@ export interface EventLogEntry { /** Task name */ name: string; /** User ID */ - userId: string; + userId: string | null; /** Task type */ taskType: TaskType; /** Event timestamp */ @@ -208,7 +216,7 @@ export function createTaskEvent( ): TaskEvent { return { ...input, - id: input.id || generateEventId(), + id: input.id ? eventId(input.id) : generateEventId(), taskId: context.taskId, name: context.name, idempotencyKey: context.idempotencyKey, @@ -231,7 +239,7 @@ export function toEventLogEntry(event: TaskEvent): EventLogEntry | null { taskId: event.taskId, idempotencyKey: event.idempotencyKey, name: event.name, - userId: event.userId, + userId: event.userId ?? null, taskType: event.taskType, timestamp: event.timestamp ?? Date.now(), input: event.input, @@ -245,6 +253,8 @@ export function toEventLogEntry(event: TaskEvent): EventLogEntry | null { /** * Generates a unique event ID */ -function generateEventId(): string { - return `evt_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`; +function generateEventId(): EventId { + return eventId( + `evt_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`, + ); } diff --git a/packages/taskflow/src/domain/task.ts b/packages/taskflow/src/domain/task.ts index 5b40057d..69735d7b 100644 --- a/packages/taskflow/src/domain/task.ts +++ b/packages/taskflow/src/domain/task.ts @@ -1,5 +1,15 @@ import { createHash } from "node:crypto"; import { canonicalize } from "json-canonicalize"; +import { + type IdempotencyKey, + idempotencyKey, + type TaskId, + type TaskName, + taskId, + taskName, + type UserId, + userId, +} from "@/core/branded"; import { TaskStateError } from "@/core/errors"; import { isTerminalStatus, @@ -26,15 +36,15 @@ import type { */ export class Task { /** Unique task identifier */ - readonly id: string; + readonly id: TaskId; /** Task name/template */ - readonly name: string; + readonly name: TaskName; /** Input data for the handler */ readonly input: unknown; /** User ID (null for background tasks) */ - readonly userId: string | null; + readonly userId: UserId | null; /** Idempotency key for deduplication */ - readonly idempotencyKey: string; + readonly idempotencyKey: IdempotencyKey; /** Creation timestamp */ readonly createdAt: Date; /** Task type: user or background */ @@ -104,7 +114,7 @@ export class Task { } constructor(params: TaskCreationParams) { - this.id = crypto.randomUUID(); + this.id = taskId(crypto.randomUUID()); this.name = params.name; this.input = params.input; this.userId = params.userId; @@ -115,7 +125,8 @@ export class Task { this._attempt = 0; this.idempotencyKey = - params.idempotencyKey ?? Task.generateIdempotencyKey(params); + params.idempotencyKey ?? + idempotencyKey(Task.generateIdempotencyKey(params)); } /** @@ -241,11 +252,11 @@ export class Task { */ static fromRecord(record: TaskRecord): Task { const task = new Task({ - name: record.name, + name: taskName(record.name), input: JSON.parse(record.input), - userId: record.user_id, + userId: userId(record.user_id), type: record.task_type as TaskType, - idempotencyKey: record.idempotency_key, + idempotencyKey: idempotencyKey(record.idempotency_key), executionOptions: record.execution_options ? JSON.parse(record.execution_options) : undefined, @@ -281,7 +292,9 @@ export class Task { input: params.input, userId: params.userId, }; - return createHash("sha256").update(canonicalize(payload)).digest("hex"); + return idempotencyKey( + createHash("sha256").update(canonicalize(payload)).digest("hex"), + ); } /** diff --git a/packages/taskflow/src/domain/types.ts b/packages/taskflow/src/domain/types.ts index 57732a2e..dcbf423f 100644 --- a/packages/taskflow/src/domain/types.ts +++ b/packages/taskflow/src/domain/types.ts @@ -1,3 +1,4 @@ +import { IdempotencyKey, TaskId, TaskName, UserId } from "@/core/branded"; import type { TaskStatus, TaskType } from "@/core/types"; /** @@ -15,17 +16,17 @@ export interface TaskExecutionOptions { */ export interface TaskCreationParams { /** The registered task name/template */ - name: string; + name: TaskName; /** Input data for the task handler */ input: unknown; /** User ID for user-initiated tasks, null for background tasks */ - userId: string | null; + userId: UserId | null; /** Task type: user or background */ type?: TaskType; /** Execution options for the task */ executionOptions?: TaskExecutionOptions; /** Custom idempotency key (auto-generated if not provided) */ - idempotencyKey?: string; + idempotencyKey?: IdempotencyKey; } /** diff --git a/packages/taskflow/src/guard/types.ts b/packages/taskflow/src/guard/types.ts new file mode 100644 index 00000000..351d6525 --- /dev/null +++ b/packages/taskflow/src/guard/types.ts @@ -0,0 +1,256 @@ +import { IdempotencyKey, TaskName, UserId } from "@/core/branded"; +import type { Task } from "@/domain"; + +/** + * Configuration for rate limiting and admission control + */ +export interface BackpressureConfig { + /** Size of the sliding window in milliseconds */ + windowSizeMs: number; + /** Maximum tasks allowed per window (global) */ + maxTasksPerWindow: number; + /** Maximum tasks allowed per user per window */ + maxTasksPerUserWindow: number; + /** Maximum tasks that can be queued globally */ + maxQueuedSize: number; +} + +/** + * Reason for rejecting a task admission + */ +export type RejectionReason = + | "global_rate_limit" + | "user_rate_limit" + | "queue_full" + | "in_dlq"; + +/** + * Statistics for a single window + */ +export interface WindowStats { + /** Tasks accepted in current window */ + accepted: number; + /** Tasks rejected in current window */ + rejected: number; + /** Window start timestamp */ + startedAt: number; +} + +/** + * Statistics for admission control + */ +export interface AdmissionStats { + /** Current configuration */ + config: BackpressureConfig; + /** Current window statistics */ + window: WindowStats; + /** Rejection breakdown by reason */ + rejections: { + byReason: Record; + lastAt?: number; + }; + /** Lifetime totals */ + totals: { + accepted: number; + rejected: number; + }; +} + +/** + * Configuration for execution slot management + */ +export interface SlotManagerConfig { + /** Maximum concurrent executions globally */ + maxExecutionGlobal: number; + /** Maximum concurrent executions per user */ + maxExecutionPerUser: number; + /** Timeout for acquiring a slot in milliseconds */ + slotTimeoutMs: number; +} + +/** + * Statistics for slot management + */ +export interface SlotStats { + /** Current slot state */ + current: { + /** Slots currently in use */ + inUse: number; + /** Tasks waiting for a slot */ + waiting: number; + /** Available slots */ + available: number; + }; + /** Configuration limits */ + limits: { + global: number; + perUser: number; + }; + /** Slot events */ + events: { + /** Number of timeout events */ + timeouts: number; + /** Total slots acquired */ + acquired: number; + /** Total slots released */ + released: number; + }; +} + +/** + * Event types emitted by the DLQ + */ +export type DLQEventType = + | "dlq:added" + | "dlq:removed" + | "dlq:retried" + | "dlq:expired" + | "dlq:evicted" + | "dlq:retry_exhausted"; + +/** + * Event emitted when DLQ state changes + */ +export interface DLQEvent { + type: DLQEventType; + idempotencyKey: IdempotencyKey; + taskName: TaskName; + userId?: UserId; + reason?: string; + timestamp: number; + retryAttempt?: number; + error?: string; +} + +/** + * An entry in the dead letter queue + */ +export interface DLQEntry { + /** The failed task */ + task: Task; + /** When the task was added to DLQ */ + addedAt: number; + /** Reason for adding to DLQ */ + reason?: string; + /** Number of retry attempts from DLQ */ + retryCount: number; + /** Last retry attempt timestamp */ + lastRetryAt?: number; + /** Last error message */ + error?: string; +} + +/** + * Configuration for the dead letter queue + */ +export interface DLQConfig { + /** Maximum number of entries in DLQ */ + maxSize: number; + /** Time-to-live for DLQ entries in milliseconds */ + ttlMs: number; + /** Cleanup interval in milliseconds */ + cleanupIntervalMs: number; + /** Maximum retry attempts for DLQ */ + maxRetries: number; +} + +/** + * Statistics for the dead letter queue + */ +export interface DLQStats { + /** Current size */ + size: number; + /** Entries grouped by reason */ + byReason: Record; + /** Total entries ever added */ + totalAdded: number; + /** Total entries removed */ + totalRemoved: number; + /** Total entries expired */ + totalExpired: number; + /** Total entries evicted (due to capacity) */ + totalEvicted: number; + /** Total retry attempts */ + totalRetries: number; + /** Average age of entries in milliseconds */ + avgAgeMs: number; + /** Age of oldest entry in milliseconds */ + oldestAgeMs: number; + /** Last event timestamp */ + lastEventAt: number; +} + +/** + * Callback for DLQ events + */ +export type DLQEventListener = (event: DLQEvent) => void; + +/** + * Configuration for recovery slots (separate pool from execution slots) + */ +export interface RecoverySlotConfig { + /** Maximum concurrent recovery operations */ + maxRecoverySlots: number; + /** Timeout for acquiring a recovery slot */ + recoverySlotTimeoutMs: number; +} + +/** + * Statistics for recovery slots + */ +export interface RecoverySlotStats { + /** Slots currently in use */ + inUse: number; + /** Maximum slots available */ + limit: number; + /** Available slots */ + available: number; +} + +/** + * Combined statistics from all guard components + */ +export interface GuardStats { + admission: AdmissionStats; + slots: SlotStats; + dlq: DLQStats; + recovery: RecoverySlotStats; +} + +/** + * Guard configuration combined all sub-configs + */ + +export interface GuardConfig { + backpressure: BackpressureConfig; + slots: SlotManagerConfig; + dlq: DLQConfig; + recovery: RecoverySlotConfig; +} + +/** + * Default guard configuration values + */ +export const DEFAULT_GUARD_CONFIG: GuardConfig = { + backpressure: { + windowSizeMs: 60_000, // 1 minute + maxTasksPerWindow: 1000, + maxTasksPerUserWindow: 100, + maxQueuedSize: 500, + }, + slots: { + maxExecutionGlobal: 50, + maxExecutionPerUser: 10, + slotTimeoutMs: 30_000, // 30 seconds + }, + dlq: { + maxSize: 1000, + ttlMs: 24 * 60 * 60 * 1000, // 24 hours + cleanupIntervalMs: 60_000, // 1 minute + maxRetries: 3, + }, + recovery: { + maxRecoverySlots: 10, + recoverySlotTimeoutMs: 60_000, // 1 minute + }, +}; diff --git a/packages/taskflow/src/guard/validator.ts b/packages/taskflow/src/guard/validator.ts new file mode 100644 index 00000000..1613e7f8 --- /dev/null +++ b/packages/taskflow/src/guard/validator.ts @@ -0,0 +1,196 @@ +import type { ZodType } from "zod"; +/** + * Task validator + * Validates task inputs before admission into the task system + */ + +import { ValidationError } from "@/core/errors"; + +/** + * Configuration for the task validator + */ +export interface ValidatorConfig { + /** Maximum payload size in bytes */ + maxPayloadSizeBytes: number; + /** Maximum task name length */ + maxTaskNameLength: number; + /** Minimum task name length */ + minTaskNameLength: number; + /** Pattern for valid task names */ + taskNamePattern: RegExp; + /** Maximum user ID length */ + maxUserIdLength: number; + /** Minimum user ID length */ + minUserIdLength: number; +} + +/** + * Default validator configuration + */ +const DEFAULT_VALIDATOR_CONFIG: ValidatorConfig = { + maxPayloadSizeBytes: 1024 * 1024, // 1MB + maxTaskNameLength: 256, + minTaskNameLength: 1, + taskNamePattern: /^[a-zA-Z0-9_-]+$/, + maxUserIdLength: 256, + minUserIdLength: 1, +}; + +/** + * Input structure for task validation + */ +export interface TaskInput { + /** Task name/template */ + name: string; + /** Task input payload */ + input?: unknown; + /** Optional user ID */ + userId?: string; +} + +/** + * Task validator for validating task inputs before admission + * @example + * const validator = new TaskValidator(); + * + * // validate a full task input + * validator.validate({ name: "my-task", input: { data: 123 }, userId: "user-1"}); + * + * // validate against a Zod schema + * const schema = z.object({ data: z.number() }); + * const parsed = validator.validateInputSchema({ data: 123 }, schema); + */ +export class TaskValidator { + private readonly config: ValidatorConfig; + + constructor(config: Partial = {}) { + this.config = { ...DEFAULT_VALIDATOR_CONFIG, ...config }; + } + + /** + * Validates a complete task input + * @throws {ValidationError} if any validation fails + */ + validate(taskInput: TaskInput): void { + this.validateName(taskInput.name); + if (taskInput.input) this.validatePayload(taskInput.input); + if (taskInput.userId) this.validateUserId(taskInput.userId); + } + + /** + * Validates a task name + * @throws {ValidationError} if the name is invalid + */ + validateName(name: string): void { + if (typeof name !== "string") + throw new ValidationError("Task name must be a string", "name"); + + if (name.length < this.config.minTaskNameLength) + throw new ValidationError( + `Task name must be at least ${this.config.minTaskNameLength} character(s) `, + "name", + ); + + if (name.length > this.config.maxTaskNameLength) + throw new ValidationError( + `Task name must not exceed ${this.config.maxTaskNameLength} character `, + "name", + ); + + if (!this.config.taskNamePattern.test(name)) { + throw new ValidationError( + "Task name must contain only alphanumeric characters, underscores, or hyphens", + "name", + ); + } + } + + /** + * Validates a task payload + * @throws {ValidationError} if the payload is invalid or too large + */ + validatePayload(input: unknown): void { + const size = this.calculatePayloadSize(input); + if (size > this.config.maxPayloadSizeBytes) { + throw new ValidationError( + `Payload size (${size}) exceeds maximum allowed (${this.config.maxPayloadSizeBytes} bytes)`, + "input", + ); + } + } + + /** + * Validates a user ID + * @throws {ValidationError} if the user ID is invalid + */ + validateUserId(userId: string): void { + if (typeof userId !== "string") { + throw new ValidationError("User ID must be a string", "userId"); + } + + if (userId.length < this.config.minUserIdLength) { + throw new ValidationError( + `User ID must be at least ${this.config.minUserIdLength} character(s)`, + "userId", + ); + } + + if (userId.length > this.config.maxUserIdLength) { + throw new ValidationError( + `User ID must not exceed ${this.config.maxUserIdLength} characters`, + "userId", + ); + } + } + + /** + * Validates input against a Zod schema + * @returns The parsed and transformed data + * @throws {ValidationError} if validation fails + */ + validateInputSchema(input: unknown, schema: ZodType): T { + const result = schema.safeParse(input); + if (!result.success) { + const zodError = result.error; + const firstIssue = zodError.issues[0]; + const path = firstIssue.path.join(".") || "input"; + const message = firstIssue.message || "Invalid input"; + throw new ValidationError(`Input validation failed ${message}`, path, { + zodError: zodError.format(), + }); + } + return result.data; + } + + /** + * Calculate the byte size of a payload + * @throws {ValidationError} if the payload is not JSON serializable + */ + private calculatePayloadSize(input: unknown): number { + try { + const json = JSON.stringify(input); + return new TextEncoder().encode(json).length; + } catch { + throw new ValidationError("Payload must be a JSON serializable", "input"); + } + } +} + +/** + * Default validator instance with standard configuration + */ +export const defaultValidator = new TaskValidator(); + +/** Convenience function to validate task input string using the default validator */ +export function validateTaskInput(taskInput: TaskInput): void { + defaultValidator.validate(taskInput); +} + +/** + * Convenience function to validate input against a Zod schema + * @param input - The input to validate + * @param schema - The Zod schema to validate against + */ +export function validateInputSchema(input: unknown, schema: ZodType): T { + return defaultValidator.validateInputSchema(input, schema); +} diff --git a/packages/taskflow/src/tests/domain/events.test.ts b/packages/taskflow/src/tests/domain/events.test.ts index a1525cb1..c00db0af 100644 --- a/packages/taskflow/src/tests/domain/events.test.ts +++ b/packages/taskflow/src/tests/domain/events.test.ts @@ -1,4 +1,11 @@ import { describe, expect, it } from "vitest"; +import { + eventId, + idempotencyKey, + taskId, + taskName, + userId, +} from "@/core/branded"; import { createTaskEvent, isRecoveryRelevant, @@ -130,10 +137,10 @@ describe("Domain Events", () => { payload: { percent: 50 }, }; const context: TaskEventContext = { - taskId: "123", - name: "my-task", - idempotencyKey: "abc123", - userId: "user123", + taskId: taskId("123"), + name: taskName("my-task"), + idempotencyKey: idempotencyKey("abc123"), + userId: userId("user123"), taskType: "user", }; const event = createTaskEvent(input, context); @@ -156,10 +163,10 @@ describe("Domain Events", () => { type: "complete", }; const context: TaskEventContext = { - taskId: "123", - name: "my-task", - idempotencyKey: "abc123", - userId: "user123", + taskId: taskId("123"), + name: taskName("my-task"), + idempotencyKey: idempotencyKey("abc123"), + userId: userId("user123"), taskType: "user", }; const event = createTaskEvent(input, context); @@ -173,10 +180,10 @@ describe("Domain Events", () => { payload: { percent: 50 }, }; const context: TaskEventContext = { - taskId: "123", - name: "my-task", - idempotencyKey: "abc123", - userId: "user123", + taskId: taskId("123"), + name: taskName("my-task"), + idempotencyKey: idempotencyKey("abc123"), + userId: userId("user123"), taskType: "user", executionOptions: { maxRetries: 3, @@ -194,12 +201,12 @@ describe("Domain Events", () => { describe("toEventLogEntry", () => { it("should convert TaskEvent to EventLogEntry (for WAL persistence)", () => { const event: TaskEvent = { - id: "evt_123", - taskId: "123", - name: "my-task", + id: eventId("evt_123"), + taskId: taskId("123"), + name: taskName("my-task"), type: "complete", - idempotencyKey: "abc123", - userId: "user123", + idempotencyKey: idempotencyKey("abc123"), + userId: userId("user123"), taskType: "user", timestamp: Date.now(), result: { data: "success" }, @@ -214,12 +221,12 @@ describe("Domain Events", () => { it("should return null for retry events (not persisted to WAL)", () => { const event: TaskEvent = { - id: "evt_123", - taskId: "123", - name: "my-task", + id: eventId("evt_123"), + taskId: taskId("123"), + name: taskName("my-task"), type: "retry", - idempotencyKey: "abc123", - userId: "user123", + idempotencyKey: idempotencyKey("abc123"), + userId: userId("user123"), taskType: "user", timestamp: Date.now(), nextRetryDelayMs: 1000, @@ -229,12 +236,12 @@ describe("Domain Events", () => { it("should return null for recovered events (internal event)", () => { const event: TaskEvent = { - id: "evt_123", - taskId: "123", - name: "my-task", + id: eventId("evt_123"), + taskId: taskId("123"), + name: taskName("my-task"), type: "recovered", - idempotencyKey: "abc123", - userId: "user123", + idempotencyKey: idempotencyKey("abc123"), + userId: userId("user123"), taskType: "user", }; @@ -243,12 +250,12 @@ describe("Domain Events", () => { it("should include executionOptions in entry", () => { const event: TaskEvent = { - id: "evt_123", - taskId: "123", - name: "my-task", + id: eventId("evt_123"), + taskId: taskId("123"), + name: taskName("my-task"), type: "created", - idempotencyKey: "abc123", - userId: "user123", + idempotencyKey: idempotencyKey("abc123"), + userId: userId("user123"), taskType: "user", timestamp: Date.now(), executionOptions: { diff --git a/packages/taskflow/src/tests/domain/task.test.ts b/packages/taskflow/src/tests/domain/task.test.ts index 79940701..b1a9c38b 100644 --- a/packages/taskflow/src/tests/domain/task.test.ts +++ b/packages/taskflow/src/tests/domain/task.test.ts @@ -1,4 +1,5 @@ import { describe, expect, it } from "vitest"; +import { idempotencyKey, taskName, userId } from "@/core/branded"; import { TaskStateError } from "@/core/errors"; import type { TaskRecord } from "@/domain"; import { Task } from "@/domain/task"; @@ -7,9 +8,9 @@ describe("Task", () => { describe("Constructor", () => { it("should create a task with default values", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); expect(task.id).toBeDefined(); @@ -25,9 +26,9 @@ describe("Task", () => { it("should create a task with custom type (background)", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: null, + userId: userId(null), type: "background", }); @@ -37,19 +38,19 @@ describe("Task", () => { it("should use provided idempotencyKey", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: {}, - userId: "user123", - idempotencyKey: "abc123", + userId: userId("user123"), + idempotencyKey: idempotencyKey("abc123"), }); expect(task.idempotencyKey).toBe("abc123"); }); it("should generate deterministic idempotencyKey", () => { const params = { - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }; const task1 = new Task(params); const task2 = new Task(params); @@ -58,27 +59,27 @@ describe("Task", () => { it("should generate different idempotencykey for different inputs", () => { const task1 = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); const task2 = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 43 }, - userId: "user123", + userId: userId("user123"), }); expect(task1.idempotencyKey).not.toBe(task2.idempotencyKey); }); it("should generate same idempotencyKey regardless of object key order", () => { const task1 = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); const task2 = new Task({ - name: "my-task", - userId: "user123", + name: taskName("my-task"), + userId: userId("user123"), input: { value: 42 }, }); expect(task1.idempotencyKey).toBe(task2.idempotencyKey); @@ -88,9 +89,9 @@ describe("Task", () => { describe("start()", () => { it("should transition from created to running", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task.start(); expect(task.status).toBe("running"); @@ -101,9 +102,9 @@ describe("Task", () => { it("should throw if task is already running", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task.start(); expect(() => task.start()).toThrow(TaskStateError); @@ -114,9 +115,9 @@ describe("Task", () => { it("should throw if task is in terminal state", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task.start(); task.complete(); @@ -128,9 +129,9 @@ describe("Task", () => { describe("complete()", () => { it("should transition from running to completed", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task.start(); @@ -143,9 +144,9 @@ describe("Task", () => { it("should work without result", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task.start(); task.complete(); @@ -155,9 +156,9 @@ describe("Task", () => { it("should throw if task is not running", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task.start(); task.complete(); @@ -167,9 +168,9 @@ describe("Task", () => { it("should throw if task is already completed", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task.start(); task.complete(); @@ -180,9 +181,9 @@ describe("Task", () => { describe("fail()", () => { it("should transition from running to failed with string error", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task.start(); task.fail("test error"); @@ -194,9 +195,9 @@ describe("Task", () => { it("should extract message from Error object", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task.start(); task.fail(new Error("test error")); @@ -204,9 +205,9 @@ describe("Task", () => { }); it("should throw if task is not running", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); expect(() => task.fail("error")).toThrow(TaskStateError); expect(() => task.fail("error")).toThrow( @@ -218,9 +219,9 @@ describe("Task", () => { describe("cancel()", () => { it("should transition from created to cancelled", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task.cancel("test reason"); expect(task.status).toBe("cancelled"); @@ -229,9 +230,9 @@ describe("Task", () => { }); it("should transition from running to cancelled", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task.start(); task.cancel("test reason"); @@ -241,9 +242,9 @@ describe("Task", () => { it("should throw if task is in terminal state", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task.start(); task.complete(); @@ -254,9 +255,9 @@ describe("Task", () => { describe("recordHeartbeat()", () => { it("should update lastHeartbeatAt", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task.start(); task.recordHeartbeat(); @@ -265,9 +266,9 @@ describe("Task", () => { it("should throw if task is not running", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); expect(() => task.recordHeartbeat()).toThrow( "Cannot recordHeartbeat from state created, allowed: running", @@ -278,9 +279,9 @@ describe("Task", () => { describe("incrementAttempt()", () => { it("should increment attempt counter", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task.start(); expect(task.attempt).toBe(1); @@ -293,9 +294,9 @@ describe("Task", () => { it("should throw if task is not running", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); expect(() => task.incrementAttempt()).toThrow(TaskStateError); @@ -305,9 +306,9 @@ describe("Task", () => { describe("resetToPending()", () => { it("should reset failed task to created", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task.start(); @@ -323,9 +324,9 @@ describe("Task", () => { it("should throw if task is not failed", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); expect(() => task.resetToPending()).toThrow(TaskStateError); task.start(); @@ -337,9 +338,9 @@ describe("Task", () => { describe("durationMs", () => { it("should return undefined if task not started", async () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task.start(); @@ -351,9 +352,9 @@ describe("Task", () => { it("should calculate running duration for active tasks", async () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task.start(); await new Promise((resolve) => setTimeout(resolve, 30)); @@ -364,9 +365,9 @@ describe("Task", () => { describe("isTerminal", () => { it("should return false for created and running", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); expect(task.isTerminal).toBe(false); task.start(); @@ -375,9 +376,9 @@ describe("Task", () => { it("should return true for completed, failed, cancelled", () => { const task1 = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task1.start(); @@ -385,9 +386,9 @@ describe("Task", () => { expect(task1.isTerminal).toBe(true); const task2 = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task2.start(); @@ -395,9 +396,9 @@ describe("Task", () => { expect(task2.isTerminal).toBe(true); const task3 = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task3.cancel(); expect(task3.isTerminal).toBe(true); @@ -407,9 +408,9 @@ describe("Task", () => { describe("isRunning", () => { it("should return true only when running", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); expect(task.isRunning).toBe(false); task.start(); @@ -423,9 +424,9 @@ describe("Task", () => { describe("toJSON", () => { it("should serialize task to JSON", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), type: "background", }); task.start(); @@ -448,9 +449,9 @@ describe("Task", () => { describe("generateIdempotencyKey", () => { it("should generate consistent key for same params", () => { const params = { - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }; const key1 = Task.generateIdempotencyKey(params); const key2 = Task.generateIdempotencyKey(params); @@ -460,9 +461,9 @@ describe("Task", () => { it("should handle null userId", () => { const key = Task.generateIdempotencyKey({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: null, + userId: userId(null), }); expect(key).toHaveLength(64); }); @@ -535,9 +536,9 @@ describe("Task", () => { it("should follow valid state transitions", () => { // created -> running -> completed const task1 = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); expect(task1.status).toBe("created"); task1.start(); @@ -547,9 +548,9 @@ describe("Task", () => { // created -> running -> failed const task2 = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); expect(task2.status).toBe("created"); task2.start(); @@ -558,9 +559,9 @@ describe("Task", () => { // created -> running -> cancelled const task3 = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task3.start(); task3.cancel("test reason"); @@ -568,18 +569,18 @@ describe("Task", () => { // created -> cancelled const task4 = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task4.cancel("test reason"); expect(task4.status).toBe("cancelled"); // failed -> created (via resetToPending) const task5 = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); task5.start(); task5.fail("error"); @@ -590,9 +591,9 @@ describe("Task", () => { it("should reject invalid state transitions", () => { const task = new Task({ - name: "my-task", + name: taskName("my-task"), input: { value: 42 }, - userId: "user123", + userId: userId("user123"), }); // created -> complete (invalid, must be running) diff --git a/packages/taskflow/src/tests/guard/validator.test.ts b/packages/taskflow/src/tests/guard/validator.test.ts new file mode 100644 index 00000000..bf4b22b8 --- /dev/null +++ b/packages/taskflow/src/tests/guard/validator.test.ts @@ -0,0 +1,296 @@ +import { describe, expect, it } from "vitest"; +import * as z from "zod"; +import { ValidationError } from "@/core/errors"; +import { + defaultValidator, + TaskValidator, + validateInputSchema, + validateTaskInput, +} from "@/guard/validator"; + +describe("TaskValidator", () => { + const validator = new TaskValidator(); + + describe("validateTaskName", () => { + it("should accept valid task names (alphanumeric, underscore, hyphen", () => { + expect(() => validator.validateName("myTask")).not.toThrow(); + expect(() => validator.validateName("my_task")).not.toThrow(); + expect(() => validator.validateName("my-task")).not.toThrow(); + expect(() => validator.validateName("MyTask123")).not.toThrow(); + expect(() => validator.validateName("a")).not.toThrow(); + expect(() => validator.validateName("123Task")).not.toThrow(); + }); + + it("should reject empty task name", () => { + expect(() => validator.validateName("")).toThrow(ValidationError); + try { + validator.validateName(""); + } catch (error) { + expect((error as ValidationError).field).toBe("name"); + } + }); + + it("should reject task name with special character", () => { + expect(() => validator.validateName("my.task")).toThrow(ValidationError); + expect(() => validator.validateName("my task")).toThrow(ValidationError); + expect(() => validator.validateName("my@task")).toThrow(ValidationError); + expect(() => validator.validateName("my/task")).toThrow(ValidationError); + }); + + it("should reject task name exceeding max length", () => { + const longName = "a".repeat(257); + expect(() => validator.validateName(longName)).toThrow(ValidationError); + }); + + it("should accept task name at max length", () => { + const longName = "a".repeat(256); + expect(() => validator.validateName(longName)).not.toThrow(); + }); + }); + + describe("validatePayload", () => { + it("should accept valid JSON payload (object, array, string, number, null, boolean)", () => { + expect(() => validator.validatePayload({ key: "value" })).not.toThrow(); + expect(() => validator.validatePayload([1, 2, 3])).not.toThrow(); + expect(() => validator.validatePayload("string")).not.toThrow(); + expect(() => validator.validatePayload(123)).not.toThrow(); + expect(() => validator.validatePayload(null)).not.toThrow(); + expect(() => validator.validatePayload(true)).not.toThrow(); + }); + + it("should reject non-serializable payload (circular reference)", () => { + const circular: Record = {}; + circular.self = circular; + expect(() => validator.validatePayload(circular)).toThrow( + ValidationError, + ); + + try { + validator.validatePayload(circular); + } catch (error) { + expect((error as ValidationError).field).toBe("input"); + expect((error as ValidationError).message).toContain( + "JSON serializable", + ); + } + }); + + it("should reject payload exceeding max size", () => { + const customValidator = new TaskValidator({ maxPayloadSizeBytes: 10 }); + const largePayload = { data: "#".repeat(100) }; + expect(() => customValidator.validatePayload(largePayload)).toThrow( + ValidationError, + ); + }); + + it("should include size info in error message", () => { + const customValidator = new TaskValidator({ maxPayloadSizeBytes: 10 }); + try { + customValidator.validatePayload({ data: "#".repeat(100) }); + } catch (error) { + expect(error).toBeInstanceOf(ValidationError); + expect((error as ValidationError).message).toContain("bytes"); + expect((error as ValidationError).message).toContain("exceeds"); + } + }); + + it("should handle UTF-8 characters correctly", () => { + const customValidator = new TaskValidator({ maxPayloadSizeBytes: 20 }); + expect(() => customValidator.validatePayload("你好")).not.toThrow(); + }); + }); + + describe("validateUserId", () => { + it("should accept valid user IDs", () => { + expect(() => validator.validateUserId("user-123")).not.toThrow(); + expect(() => validator.validateUserId("a")).not.toThrow(); + expect(() => validator.validateUserId("user@example.com")).not.toThrow(); + expect(() => validator.validateUserId("123-456-789")).not.toThrow(); + }); + + it("should reject empty user ID", () => { + expect(() => validator.validateUserId("")).toThrow(); + try { + validator.validateUserId(""); + } catch (error) { + expect((error as ValidationError).field).toBe("userId"); + } + }); + + it("should reject userId exceeding max length", () => { + const longId = "a".repeat(257); + expect(() => validator.validateUserId(longId)).toThrow(ValidationError); + }); + + it("should accept userId at max length", () => { + const longId = "a".repeat(256); + expect(() => validator.validateUserId(longId)).not.toThrow(); + }); + }); + + describe("validate (full)", () => { + it("should validate complete task input", () => { + expect(() => + validator.validate({ + name: "myTask", + input: { key: "value" }, + userId: "user-123", + }), + ).not.toThrow(); + }); + + it("should validate task input without userId (background tasks)", () => { + expect(() => + validator.validate({ + name: "myTask", + input: { key: "value" }, + }), + ).not.toThrow(); + }); + + it("should validate task input without input", () => { + expect(() => + validator.validate({ + name: "myTask", + }), + ).not.toThrow(); + }); + + it("should throw on invalid name", () => { + expect(() => + validator.validate({ + name: "", + input: { key: "value" }, + }), + ).toThrow(ValidationError); + }); + + it("should throw on invalid payload", () => { + const circular: Record = {}; + circular.self = circular; + expect(() => + validator.validate({ + name: "", + input: circular, + }), + ).toThrow(ValidationError); + }); + + it("should throw on invalid userId", () => { + expect(() => + validator.validate({ + name: "", + userId: "", + }), + ).toThrow(ValidationError); + }); + }); + + describe("validateInputSchema", () => { + it("should validate input against a Zod schema", () => { + const schema = z.object({ + name: z.string(), + age: z.number(), + }); + + const result = validator.validateInputSchema( + { name: "John", age: 30 }, + schema, + ); + + expect(result).toEqual({ name: "John", age: 30 }); + }); + + it("should throw ValidationError on invalid input", () => { + const schema = z.object({ + name: z.string().min(1, "Name is required"), + }); + + expect(() => validator.validateInputSchema({ name: "" }, schema)).toThrow( + ValidationError, + ); + }); + }); + + describe("customConfiguration", () => { + it("should respect custom max payload size", () => { + const customValidator = new TaskValidator({ maxPayloadSizeBytes: 50 }); + expect(() => + customValidator.validatePayload({ small: "data" }), + ).not.toThrow(); + }); + + it("should respect custom task name pattern", () => { + const customValidator = new TaskValidator({ taskNamePattern: /^task_/ }); // task name must start + expect(() => + customValidator.validatePayload({ small: "data" }), + ).not.toThrow(); + }); + it("should respect custom name length limits", () => { + const customValidator = new TaskValidator({ + minUserIdLength: 3, + maxUserIdLength: 10, + }); + expect(() => customValidator.validateUserId("ab")).toThrow(); + expect(() => customValidator.validateUserId("abc")).not.toThrow(); + expect(() => customValidator.validateUserId("a".repeat(11))).toThrow( + ValidationError, + ); + }); + + it("should respect custom userId length limits", () => { + const customValidator = new TaskValidator({ + minUserIdLength: 5, + maxUserIdLength: 20, + }); + + expect(() => customValidator.validateUserId("abcd")).toThrow( + ValidationError, + ); + expect(() => customValidator.validateUserId("abcde")).not.toThrow(); + expect(() => customValidator.validateUserId("a".repeat(21))).toThrow( + ValidationError, + ); + }); + }); + + describe("helper functions", () => { + it("validateTaskInput should use default validator", () => { + expect(() => + validateTaskInput({ name: "testTask", input: { data: "test" } }), + ).not.toThrow(); + }); + + it("validateTaskInput should throw ValidationError if input is invalid", () => { + expect(() => + validateTaskInput({ name: "", input: { data: "test" } }), + ).toThrow(ValidationError); + }); + + it("validateInputSchema should use default validator", () => { + expect(() => + validateInputSchema({ data: "test" }, z.object({ data: z.string() })), + ).not.toThrow(); + }); + + it("validateInputSchema should throw ValidationError if input is invalid", () => { + expect(() => + validateInputSchema({ data: "test" }, z.object({ data: z.number() })), + ).toThrow(ValidationError); + }); + }); + describe("defaultValidator", () => { + it("should be a TaskValidator instance", () => { + expect(defaultValidator).toBeInstanceOf(TaskValidator); + }); + + it("should validate correctly", () => { + expect(() => + defaultValidator.validate({ + name: "testTask", + input: { data: "test" }, + }), + ).not.toThrow(); + }); + }); +}); From 770265650d7bec5938eca679e697b65115ec2e93 Mon Sep 17 00:00:00 2001 From: Ditadi Date: Mon, 26 Jan 2026 19:17:17 +0000 Subject: [PATCH 06/13] feat(taskflow): guard layer with backpressure, slot-manager, dlq and orchestrator --- packages/taskflow/src/domain/types.ts | 4 +- packages/taskflow/src/guard/backpressure.ts | 256 +++++++++ packages/taskflow/src/guard/dlq.ts | 404 +++++++++++++ packages/taskflow/src/guard/guard.ts | 277 +++++++++ packages/taskflow/src/guard/slot-manager.ts | 282 +++++++++ .../src/tests/guard/backpressure.test.ts | 269 +++++++++ packages/taskflow/src/tests/guard/dlq.test.ts | 493 ++++++++++++++++ .../taskflow/src/tests/guard/guard.test.ts | 537 ++++++++++++++++++ .../src/tests/guard/slot-manager.test.ts | 347 +++++++++++ 9 files changed, 2868 insertions(+), 1 deletion(-) create mode 100644 packages/taskflow/src/guard/backpressure.ts create mode 100644 packages/taskflow/src/guard/dlq.ts create mode 100644 packages/taskflow/src/guard/guard.ts create mode 100644 packages/taskflow/src/guard/slot-manager.ts create mode 100644 packages/taskflow/src/tests/guard/backpressure.test.ts create mode 100644 packages/taskflow/src/tests/guard/dlq.test.ts create mode 100644 packages/taskflow/src/tests/guard/guard.test.ts create mode 100644 packages/taskflow/src/tests/guard/slot-manager.test.ts diff --git a/packages/taskflow/src/domain/types.ts b/packages/taskflow/src/domain/types.ts index dcbf423f..002871e8 100644 --- a/packages/taskflow/src/domain/types.ts +++ b/packages/taskflow/src/domain/types.ts @@ -1,4 +1,4 @@ -import { IdempotencyKey, TaskId, TaskName, UserId } from "@/core/branded"; +import type { IdempotencyKey, TaskName, UserId } from "@/core/branded"; import type { TaskStatus, TaskType } from "@/core/types"; /** @@ -9,6 +9,8 @@ export interface TaskExecutionOptions { maxRetries?: number; /** override the default timeout in milliseconds */ timeoutMs?: number; + /** override the default max concurrent executions */ + maxConcurrentExecutions?: number; } /** diff --git a/packages/taskflow/src/guard/backpressure.ts b/packages/taskflow/src/guard/backpressure.ts new file mode 100644 index 00000000..2d973120 --- /dev/null +++ b/packages/taskflow/src/guard/backpressure.ts @@ -0,0 +1,256 @@ +import type { UserId } from "@/core/branded"; +import { BackpressureError, ValidationError } from "@/core/errors"; +import type { Task } from "@/domain"; +import { + noopHooks, + TaskAttributes, + TaskMetrics, + type TaskSystemHooks, +} from "@/observability"; +import type { + AdmissionStats, + BackpressureConfig, + RejectionReason, + WindowStats, +} from "./types"; + +/** + * Backpressure controller for rate limiting task admission + * + * Uses a sliding window algorithm to enforce: + * - Global rate limits (tasks per window) + * - Per-user rate limits (tasks per user per window) + * - Queue capacity limits + */ +export class Backpressure { + private readonly config: BackpressureConfig; + private readonly hooks: TaskSystemHooks; + + // sliding window timestamps + private globalTaskTimestamps: number[] = []; + private userTaskTimestamps: Map = new Map(); + + // queue tracking + private queueSize = 0; + + // statistics + private windowStartedAt: number = Date.now(); + private acceptedInWindow = 0; + private rejectedInWindow = 0; + private rejectionsByReason: Record = { + global_rate_limit: 0, + user_rate_limit: 0, + queue_full: 0, + in_dlq: 0, + }; + private lastRejectionAt?: number; + private totalAccepted = 0; + private totalRejected = 0; + + constructor(config: BackpressureConfig, hooks: TaskSystemHooks = noopHooks) { + this.config = config; + this.hooks = hooks; + } + + /** + * Check if a task can be admitted + * @throws {ValidationError} if task is in DLQ + * @throws {BackpressureError} if rate limits or queue capacity exceeded + */ + accept(task: Task, isInDLQ: boolean): void { + // check dlq first + if (isInDLQ) { + this.trackRejection("in_dlq", task); + throw new ValidationError( + "Task is in DLQ and cannot be resubmitted", + "idempotencyKey", + { + taskId: task.id, + idempotencyKey: task.idempotencyKey, + }, + ); + } + + const now = Date.now(); + const windowStart = now - this.config.windowSizeMs; + + // cleanup expired timestamps + this.globalTaskTimestamps = this.globalTaskTimestamps.filter( + (ts) => ts >= windowStart, + ); + + // check global window limit + if (this.globalTaskTimestamps.length >= this.config.maxTasksPerWindow) { + const retryAfterMs = this.calculateRetryAfterMs( + this.globalTaskTimestamps, + ); + this.trackRejection("global_rate_limit", task); + throw new BackpressureError( + "Global rate limit exceeded", + this.config.maxTasksPerWindow, + this.config.maxTasksPerWindow - this.globalTaskTimestamps.length, + retryAfterMs, + { + taskId: task.id, + taskName: task.name, + }, + ); + } + + // check per-user window limit + if (task.userId) { + const userTimestamps = this.userTaskTimestamps.get(task.userId) ?? []; + const filteredUserTimestamps = userTimestamps.filter( + (ts) => ts >= windowStart, + ); + this.userTaskTimestamps.set(task.userId, filteredUserTimestamps); + + if (filteredUserTimestamps.length >= this.config.maxTasksPerUserWindow) { + const retryAfterMs = this.calculateRetryAfterMs(filteredUserTimestamps); + this.trackRejection("user_rate_limit", task); + throw new BackpressureError( + "User rate limit exceeded", + this.config.maxTasksPerUserWindow, + this.config.maxTasksPerUserWindow - filteredUserTimestamps.length, + retryAfterMs, + { + taskId: task.id, + taskName: task.name, + userId: task.userId, + }, + ); + } + } + + // check queue capacity + if (this.queueSize >= this.config.maxQueuedSize) { + this.trackRejection("queue_full", task); + throw new BackpressureError( + "Queue capacity exceeded", + this.config.maxQueuedSize, + this.config.maxQueuedSize - this.queueSize, + 1000, // 1 second retry after + { + taskId: task.id, + taskName: task.name, + }, + ); + } + + // accept the task + this.globalTaskTimestamps.push(now); + this.queueSize++; + this.acceptedInWindow++; + this.totalAccepted++; + + if (task.userId) { + const userTimestamps = this.userTaskTimestamps.get(task.userId) ?? []; + userTimestamps.push(now); + this.userTaskTimestamps.set(task.userId, userTimestamps); + } + } + + /** + * Decrement queue size when a task acquires an execution slot + */ + decrementQueueSize(): void { + this.queueSize = Math.max(0, this.queueSize - 1); + } + + /** + * Get current queue size + */ + getQueueSize(): number { + return this.queueSize; + } + + /** + * Get global window timestamps + */ + getGlobalWindowSize(): number { + const windowStart = Date.now() - this.config.windowSizeMs; + return this.globalTaskTimestamps.filter((ts) => ts >= windowStart).length; + } + + /** + * Get user window timestamps + */ + getUserWindowSize(userId: UserId): number { + const windowStart = Date.now() - this.config.windowSizeMs; + const timestamps = this.userTaskTimestamps.get(userId) ?? []; + return timestamps.filter((ts) => ts >= windowStart).length; + } + + /** + * Get admission stats + */ + getStats(): AdmissionStats { + const windowStats: WindowStats = { + accepted: this.acceptedInWindow, + rejected: this.rejectedInWindow, + startedAt: this.windowStartedAt, + }; + + return { + config: this.config, + window: windowStats, + rejections: { + byReason: this.rejectionsByReason, + lastAt: this.lastRejectionAt, + }, + totals: { + accepted: this.totalAccepted, + rejected: this.totalRejected, + }, + }; + } + + /** + * Clear all state + */ + clear(): void { + this.globalTaskTimestamps = []; + this.userTaskTimestamps.clear(); + this.queueSize = 0; + this.windowStartedAt = Date.now(); + this.acceptedInWindow = 0; + this.rejectedInWindow = 0; + this.rejectionsByReason = { + global_rate_limit: 0, + user_rate_limit: 0, + queue_full: 0, + in_dlq: 0, + }; + this.lastRejectionAt = undefined; + this.totalAccepted = 0; + this.totalRejected = 0; + } + + /** + * Track a rejection and emit metrics + */ + private trackRejection(reason: RejectionReason, task: Task): void { + this.rejectedInWindow++; + this.totalRejected++; + this.rejectionsByReason[reason]++; + this.lastRejectionAt = Date.now(); + + this.hooks.incrementCounter(TaskMetrics.GUARD_REJECTIONS, 1, { + [TaskAttributes.TASK_NAME]: task.name, + reason, + }); + } + + /** + * Calculate when the client should retry based on oldest timestamp in window + */ + private calculateRetryAfterMs(timestamps: number[]): number { + if (timestamps.length === 0) return 0; + + const oldestTimestamp = Math.min(...timestamps); + const windowExpiresAt = oldestTimestamp + this.config.windowSizeMs; + const retryAfterMs = Math.max(0, windowExpiresAt - Date.now()); + + return retryAfterMs; + } +} diff --git a/packages/taskflow/src/guard/dlq.ts b/packages/taskflow/src/guard/dlq.ts new file mode 100644 index 00000000..88d71da1 --- /dev/null +++ b/packages/taskflow/src/guard/dlq.ts @@ -0,0 +1,404 @@ +import { type IdempotencyKey, idempotencyKey } from "@/core/branded"; +import type { Task } from "@/domain"; +import { + noopHooks, + TaskAttributes, + TaskMetrics, + type TaskSystemHooks, +} from "@/observability"; +import type { + DLQConfig, + DLQEntry, + DLQEvent, + DLQEventListener, + DLQStats, + RejectionReason, +} from "./types"; + +/** + * Dead Letter Queue for storing failed tasks + * + * Features: + * - Add/remove/retry tasks + * - TTL-based expiration with automatic cleanup + * - Max size with FIFO eviction + * - Event emission for all state changes + * - Comprehensive statistics + */ +export class DeadLetterQueue { + private readonly config: DLQConfig; + private readonly hooks: TaskSystemHooks; + + // entry storage (map preserves insertion order for FIFO eviction) + private entries: Map = new Map(); + + // event listeners + private eventListeners: Set = new Set(); + + // cleanup timer + private cleanupTimer: ReturnType | null = null; + + // statistics + private totalAdded = 0; + private totalRemoved = 0; + private totalExpired = 0; + private totalEvicted = 0; + private totalRetries = 0; + private lastEventAt?: number; + + constructor(config: DLQConfig, hooks: TaskSystemHooks = noopHooks) { + this.config = config; + this.hooks = hooks; + + this.startCleanupTimer(); + } + + /** + * Add a task to the DLQ + */ + add(task: Task, reason?: string, error?: string): void { + const now = Date.now(); + + // evict oldest if at capacity + if (this.entries.size >= this.config.maxSize) { + const oldestKey = this.entries.keys().next().value; + if (oldestKey) { + const evictedEntry = this.entries.get(oldestKey); + this.entries.delete(oldestKey); + this.totalEvicted++; + + if (evictedEntry) { + this.emitEvent({ + type: "dlq:evicted", + idempotencyKey: idempotencyKey(oldestKey), + taskName: evictedEntry.task.name, + userId: evictedEntry.task.userId ?? undefined, + reason: "capacity_exceeded", + timestamp: now, + }); + } + } + } + + // add the entry + const entry: DLQEntry = { + task, + addedAt: now, + reason, + retryCount: 0, + error, + }; + + this.entries.set(task.idempotencyKey, entry); + this.totalAdded++; + this.lastEventAt = now; + + // emit metrics + this.hooks.incrementCounter(TaskMetrics.DLQ_ADDED, 1, { + [TaskAttributes.TASK_NAME]: task.name, + reason: reason ?? "unknown", + }); + this.emitSizeGauge(); + + // emit event + this.emitEvent({ + type: "dlq:added", + idempotencyKey: task.idempotencyKey, + taskName: task.name, + userId: task.userId ?? undefined, + reason: reason, + timestamp: now, + error, + }); + } + + /** + * Remove a task from the DLQ + */ + remove(idempotencyKey: IdempotencyKey): boolean { + const entry = this.entries.get(idempotencyKey); + if (!entry) return false; + + const now = Date.now(); + this.entries.delete(idempotencyKey); + this.totalRemoved++; + this.lastEventAt = now; + + this.emitSizeGauge(); + + this.emitEvent({ + type: "dlq:removed", + idempotencyKey, + taskName: entry.task.name, + userId: entry.task.userId ?? undefined, + reason: entry.reason ?? "unknown", + timestamp: now, + }); + + return true; + } + + /** + * Check if a task is in the DLQ + */ + has(idempotencyKey: IdempotencyKey): boolean { + return this.entries.has(idempotencyKey); + } + + /** + * Get a DLQ entry by idempotency key + */ + get(idempotencyKey: IdempotencyKey): DLQEntry | undefined { + return this.entries.get(idempotencyKey); + } + + /** + * Get all DLQ entries + */ + getAll(): DLQEntry[] { + return Array.from(this.entries.values()); + } + + /** + * Current DLQ size + */ + get size(): number { + return this.entries.size; + } + + /** + * Retry a task from the DLQ + * Returns the task if successful, null if max retries exceeded or not found + */ + retry(idempotencyKey: IdempotencyKey): Task | null { + const entry = this.entries.get(idempotencyKey); + if (!entry) return null; + + const now = Date.now(); + + // check max retries + if (entry.retryCount >= this.config.maxRetries) { + this.lastEventAt = now; + + this.emitEvent({ + type: "dlq:retry_exhausted", + idempotencyKey, + taskName: entry.task.name, + userId: entry.task.userId ?? undefined, + reason: "max_retries_exceeded", + timestamp: now, + retryAttempt: entry.retryCount, + }); + + return null; + } + + // increment retry count and update last retry timestamp + entry.retryCount++; + entry.lastRetryAt = now; + this.totalRetries++; + this.lastEventAt = now; + + // remove from DLQ + this.entries.delete(idempotencyKey); + + // reset task to pending state + entry.task.resetToPending(); + + // emit metrics + this.hooks.incrementCounter(TaskMetrics.DLQ_RETRIED, 1, { + [TaskAttributes.TASK_NAME]: entry.task.name, + }); + this.emitSizeGauge(); + + // emit event + + this.emitEvent({ + type: "dlq:retried", + idempotencyKey, + taskName: entry.task.name, + userId: entry.task.userId ?? undefined, + timestamp: now, + retryAttempt: entry.retryCount, + }); + + return entry.task; + } + + /** + * Retry all tasks in the DLQ + */ + retryAll(): Task[] { + const tasks: Task[] = []; + const keys = Array.from(this.entries.keys()); + + for (const key of keys) { + const task = this.retry(idempotencyKey(key)); + if (task) tasks.push(task); + } + + return tasks; + } + + /** + * Retry tasks matching a filter + */ + retryWithFilter(filter: (entry: DLQEntry) => boolean): Task[] { + const tasks: Task[] = []; + + for (const entry of this.entries.values()) { + if (filter(entry)) { + const task = this.retry(entry.task.idempotencyKey); + if (task) tasks.push(task); + } + } + + return tasks; + } + + /** + * Subscribe to DLQ events + * Returns unsubscribe function + */ + onEvent(listener: DLQEventListener): () => void { + this.eventListeners.add(listener); + return () => this.eventListeners.delete(listener); + } + + /** + * Get DLQ stats + */ + getStats(): DLQStats { + const now = Date.now(); + const byReason: Record = { + global_rate_limit: 0, + user_rate_limit: 0, + queue_full: 0, + in_dlq: 0, + }; + let totalAgeMs = 0; + let oldestAgeMs = 0; + + for (const entry of this.entries.values()) { + byReason[entry.reason as RejectionReason] = + (byReason[entry.reason as RejectionReason] ?? 0) + 1; + + const ageMs = now - entry.addedAt; + totalAgeMs += ageMs; + + if (ageMs > oldestAgeMs) { + oldestAgeMs = ageMs; + } + } + + return { + size: this.entries.size, + byReason, + totalAdded: this.totalAdded, + totalRemoved: this.totalRemoved, + totalExpired: this.totalExpired, + totalEvicted: this.totalEvicted, + totalRetries: this.totalRetries, + avgAgeMs: this.entries.size > 0 ? totalAgeMs / this.entries.size : 0, + oldestAgeMs, + lastEventAt: this.lastEventAt ?? 0, + }; + } + + /** + * Clear all entries and reset stats + */ + clear(): void { + this.entries.clear(); + this.totalAdded = 0; + this.totalRemoved = 0; + this.totalExpired = 0; + this.totalEvicted = 0; + this.totalRetries = 0; + this.lastEventAt = undefined; + + this.emitSizeGauge(); + } + + /** + * Shutdown the DLQ (stop cleanup timer, clear entries) + */ + shutdown(): void { + if (this.cleanupTimer) { + clearInterval(this.cleanupTimer); + this.cleanupTimer = null; + } + + this.entries.clear(); + this.eventListeners.clear(); + } + + /** + * Start the cleanup timer for TTL expiration + */ + private startCleanupTimer(): void { + if (this.config.cleanupIntervalMs <= 0) return; + this.cleanupTimer = setInterval(() => { + this.cleanupExpiredEntries(); + }, this.config.cleanupIntervalMs); + + // don't keep the process alive just for cleanup + this.cleanupTimer.unref(); + } + + /** + * Remove expired entries based on TTL + */ + private cleanupExpiredEntries(): void { + const now = Date.now(); + const expiredKeys: IdempotencyKey[] = []; + + for (const [key, entry] of this.entries.entries()) { + if (now - entry.addedAt >= this.config.ttlMs) { + expiredKeys.push(idempotencyKey(key)); + } + } + + for (const key of expiredKeys) { + const entry = this.entries.get(key); + this.entries.delete(key); + this.totalExpired++; + this.lastEventAt = now; + + if (entry) { + this.emitEvent({ + type: "dlq:expired", + idempotencyKey: key, + taskName: entry.task.name, + userId: entry.task.userId ?? undefined, + reason: entry.reason ?? "unknown", + timestamp: now, + }); + } + } + + if (expiredKeys.length > 0) { + this.emitSizeGauge(); + } + } + + /** + * Emit an event to all listeners + */ + private emitEvent(event: DLQEvent): void { + for (const listener of this.eventListeners) { + try { + listener(event); + } catch { + // swallow listener errors + } + } + } + + /** + * Emit DLQ size gauge + */ + private emitSizeGauge(): void { + this.hooks.recordGauge(TaskMetrics.DLQ_SIZE, this.entries.size); + } +} diff --git a/packages/taskflow/src/guard/guard.ts b/packages/taskflow/src/guard/guard.ts new file mode 100644 index 00000000..a5f583bf --- /dev/null +++ b/packages/taskflow/src/guard/guard.ts @@ -0,0 +1,277 @@ +import type { IdempotencyKey, TaskName, UserId } from "@/core/branded"; +import { BackpressureError } from "@/core/errors"; +import type { Task } from "@/domain"; +import { noopHooks, type TaskSystemHooks } from "@/observability"; +import { Backpressure } from "./backpressure"; +import { DeadLetterQueue } from "./dlq"; +import { SlotManager } from "./slot-manager"; +import { + DEFAULT_GUARD_CONFIG, + type DLQEntry, + type DLQEventListener, + type DLQStats, + type GuardConfig, + type GuardStats, + type RecoverySlotStats, +} from "./types"; + +/** + * Merge partial config with defaults + */ +function mergeConfig(partial?: Partial): GuardConfig { + const defaults: GuardConfig = DEFAULT_GUARD_CONFIG; + + if (!partial) return defaults; + + return { + backpressure: { + ...defaults.backpressure, + ...partial.backpressure, + }, + slots: { + ...defaults.slots, + ...partial.slots, + }, + dlq: { + ...defaults.dlq, + ...partial.dlq, + }, + recovery: { + ...defaults.recovery, + ...partial.recovery, + }, + }; +} + +/** + * Guard is the main orchestrator for task admission control + * + * Combines: + * - Backpressure: Rate limiting and queue management + * - SlotManager: Concurrent execution slot management + * - DeadLetterQueue: Failed task storage and retry + * - Recovery slots: Separate pool for task recovery operations + */ +export class Guard { + private readonly config: GuardConfig; + private readonly hooks: TaskSystemHooks; + + // sub-components + private readonly backpressure: Backpressure; + private readonly slotManager: SlotManager; + private readonly dlq: DeadLetterQueue; + + // recovery slot tracking (simple counter, no waiting queue) + private recoverySlotsInUse = 0; + + constructor( + config: Partial, + hooks: TaskSystemHooks = noopHooks, + ) { + this.config = mergeConfig(config); + this.hooks = hooks; + + // initialize sub-components with their respective config and hooks + this.backpressure = new Backpressure(this.config.backpressure, this.hooks); + this.slotManager = new SlotManager(this.config.slots, this.hooks); + this.dlq = new DeadLetterQueue(this.config.dlq, this.hooks); + } + + /** + * Accept a task for processing + * Validates raate limits and queue capacity + * @throws {ValidationError} if task is in DLQ + * @throws {BackpressureError} if limits exceeded + */ + acceptTask(task: Task): void { + this.backpressure.accept(task, this.dlq.has(task.idempotencyKey)); + } + + /** + * Acquire an execution slot for a task + * Decrements queue size on success + * @throws {SlotTimeoutError} if timeout is reached + */ + async acquireExecutionSlot(task: Task): Promise { + await this.slotManager.acquire(task); + this.backpressure.decrementQueueSize(); + } + + /** + * Release an execution slot + */ + releaseExecutionSlot(task: Task): void { + this.slotManager.release(task); + } + + /** + * Acquire a recovery slot + * @throws {BackpressureError} if recovery capacity is exhausted + */ + acquireRecoverySlot(): void { + if (this.recoverySlotsInUse >= this.config.recovery.maxRecoverySlots) { + throw new BackpressureError( + "Recovery capacity exhausted", + this.config.recovery.maxRecoverySlots, + ); + } + this.recoverySlotsInUse++; + } + + /** + * Release a recovery slot + */ + releaseRecoverySlot(): void { + this.recoverySlotsInUse = Math.max(0, this.recoverySlotsInUse - 1); + } + + /** + * Add a task to dead letter queue + */ + addToDLQ(task: Task, reason?: string, error?: string): void { + this.dlq.add(task, reason, error); + } + + /** + * Remove a task from dead letter queue + */ + removeFromDLQ(idempotencyKey: IdempotencyKey): boolean { + return this.dlq.remove(idempotencyKey); + } + + /** + * Check if a task is in the DLQ + */ + isTaskInDLQ(idempotencyKey: IdempotencyKey): boolean { + return this.dlq.has(idempotencyKey); + } + + /** + * Get a DLQ entry + */ + getDLQEntry(idempotencyKey: IdempotencyKey): DLQEntry | undefined { + return this.dlq.get(idempotencyKey); + } + + /** + * Get all DLQ entries + */ + getDLQEntries(): DLQEntry[] { + return this.dlq.getAll(); + } + + /** + * Get DLQ size + */ + getDLQSize(): number { + return this.dlq.size; + } + + /** + * Retry a task from the DLQ + */ + retryFromDLQ(idempotencyKey: IdempotencyKey): Task | null { + return this.dlq.retry(idempotencyKey); + } + + /** + * Retry all tasks from the DLQ + */ + retryAllFromDLQ(): Task[] { + return this.dlq.retryAll(); + } + + /** + * Retry DLQ entries matching a filter + */ + retryDLQWithFilter(filter: (entry: DLQEntry) => boolean): Task[] { + return this.dlq.retryWithFilter(filter); + } + + /** + * Subscribe to DLQ events + */ + onDLQEvent(listener: DLQEventListener): () => void { + return this.dlq.onEvent(listener); + } + + /** + * Get DLQ statistics + */ + getDLQStats(): DLQStats { + return this.dlq.getStats(); + } + + /** + * Get comprehensive guard statistics + */ + getStats(): GuardStats { + const recoveryStats: RecoverySlotStats = { + inUse: this.recoverySlotsInUse, + limit: this.config.recovery.maxRecoverySlots, + available: Math.max( + 0, + this.config.recovery.maxRecoverySlots - this.recoverySlotsInUse, + ), + }; + return { + admission: this.backpressure.getStats(), + slots: this.slotManager.getStats(), + dlq: this.dlq.getStats(), + recovery: recoveryStats, + }; + } + + /** + * Get number of tasks waiting for execution slots + */ + getWaitingQueueSize(): number { + return this.slotManager.getWaitingQueueSize(); + } + + /** + * Get global queue size (tasks admitted but not yet executing) + */ + getGlobalQueueSize(): number { + return this.backpressure.getQueueSize(); + } + + /** + * Get global execution size (tasks currently executing) + */ + getGlobalExecutionSize(): number { + return this.slotManager.getGlobalExecutionSize(); + } + + /** + * Get user execution size + */ + getUserExecutionSize(userId: UserId): number { + return this.slotManager.getUserExecutionSize(userId); + } + + /** + * Get template execution size + */ + getTemplateExecutionSize(templateName: TaskName): number { + return this.slotManager.getTemplateExecutionSize(templateName); + } + + /** + * Shutdown the guard (clears all state, stop timers) + */ + shutdown(): void { + this.dlq.shutdown(); + this.clear(); + } + + /** + * Clear all state (keep timers running) + */ + clear(): void { + this.backpressure.clear(); + this.slotManager.clear(); + this.dlq.clear(); + this.recoverySlotsInUse = 0; + } +} diff --git a/packages/taskflow/src/guard/slot-manager.ts b/packages/taskflow/src/guard/slot-manager.ts new file mode 100644 index 00000000..4bb6d016 --- /dev/null +++ b/packages/taskflow/src/guard/slot-manager.ts @@ -0,0 +1,282 @@ +import type { TaskName, UserId } from "@/core/branded"; +import { SlotTimeoutError } from "@/core/errors"; +import type { Task } from "@/domain"; +import { + noopHooks, + TaskAttributes, + TaskMetrics, + type TaskSystemHooks, +} from "@/observability"; +import type { SlotManagerConfig, SlotStats } from "./types"; + +/** + * Waiting request in the slot acquisition queue + */ +interface WaitingRequest { + task: Task; + resolve: () => void; + reject: (error: Error) => void; + timeoutId: ReturnType; +} + +/** + * SlotManager controls concurrent execution slots + * + * Enforces: + * - Global execution limit + * - Per-user execution limit + * - Per-template execution limit + * - Timeout for slot acquisition + * + */ +export class SlotManager { + private readonly config: SlotManagerConfig; + private readonly hooks: TaskSystemHooks; + + // execution tracking + private globalExecutionCount = 0; + private userExecutionCounts: Map = new Map(); + private templateExecutionCounts: Map = new Map(); + + // waiting queue + private waitingQueue: WaitingRequest[] = []; + + // statistics + private slotTimeouts = 0; + private slotsAcquired = 0; + private slotsReleased = 0; + + constructor(config: SlotManagerConfig, hooks: TaskSystemHooks = noopHooks) { + this.config = config; + this.hooks = hooks; + } + + /** + * Acquire an execution slot for a task + * Waits in queue if no slots available, throws on timeout + */ + async acquire(task: Task): Promise { + // try immediate acquisition + if (this.tryAcquire(task)) { + this.emitGauges(); + return; + } + + // wait in queue + return new Promise((resolve, reject) => { + const timeoutId = setTimeout(() => { + // remove from waiting queue + const index = this.waitingQueue.findIndex( + (req) => req.task.id === task.id, + ); + if (index !== -1) { + this.waitingQueue.splice(index, 1); + } + + this.slotTimeouts++; + this.hooks.incrementCounter(TaskMetrics.GUARD_REJECTIONS, 1, { + [TaskAttributes.TASK_ID]: task.id, + [TaskAttributes.TASK_NAME]: task.name, + reason: "slot_timeout", + }); + + reject( + new SlotTimeoutError( + "Slot acquisition timeout", + this.config.slotTimeoutMs, + { + taskId: task.id, + taskName: task.name, + userId: task.userId ?? undefined, + }, + ), + ); + }, this.config.slotTimeoutMs); + + this.waitingQueue.push({ + task, + resolve: () => { + this.emitGauges(); + resolve(); + }, + reject, + timeoutId, + }); + }); + } + + /** + * Releases an execution slot + */ + release(task: Task): void { + this.globalExecutionCount = Math.max(0, this.globalExecutionCount - 1); + this.slotsReleased++; + + // decrement user count + if (task.userId) { + const userCount = this.userExecutionCounts.get(task.userId) ?? 0; + this.userExecutionCounts.set(task.userId, Math.max(0, userCount - 1)); + } + + // decrement template count + const templateCount = this.templateExecutionCounts.get(task.name) ?? 0; + this.templateExecutionCounts.set(task.name, Math.max(0, templateCount - 1)); + + this.emitGauges(); + + // process waiting queue + this.processWaitingQueue(); + } + + /** + * Get number of tasks waiting for slots + */ + getWaitingQueueSize(): number { + return this.waitingQueue.length; + } + + /** + * Get global execution size + */ + getGlobalExecutionSize(): number { + return this.globalExecutionCount; + } + + /** + * Get user execution size + */ + getUserExecutionSize(userId: UserId): number { + return this.userExecutionCounts.get(userId) ?? 0; + } + + /** + * Get template execution size + */ + getTemplateExecutionSize(templateName: TaskName): number { + return this.templateExecutionCounts.get(templateName) ?? 0; + } + + /** + * Get slot statistics + */ + getStats(): SlotStats { + return { + current: { + inUse: this.globalExecutionCount, + waiting: this.waitingQueue.length, + available: Math.max( + 0, + this.config.maxExecutionGlobal - this.globalExecutionCount, + ), + }, + limits: { + global: this.config.maxExecutionGlobal, + perUser: this.config.maxExecutionPerUser, + }, + events: { + timeouts: this.slotTimeouts, + acquired: this.slotsAcquired, + released: this.slotsReleased, + }, + }; + } + + /** + * Clear all state + */ + clear(): void { + // clear waiting queue timeouts + for (const request of this.waitingQueue) { + clearTimeout(request.timeoutId); + } + + this.globalExecutionCount = 0; + this.userExecutionCounts.clear(); + this.templateExecutionCounts.clear(); + this.waitingQueue = []; + this.slotTimeouts = 0; + this.slotsAcquired = 0; + this.slotsReleased = 0; + } + + /** + * Try to acquire a slot immediately + * Returns true if successful, false if blocked + */ + private tryAcquire(task: Task): boolean { + // check global limit + if (this.globalExecutionCount >= this.config.maxExecutionGlobal) { + return false; + } + + // check user limit + if (task.userId) { + const userCount = this.userExecutionCounts.get(task.userId) ?? 0; + if (userCount >= this.config.maxExecutionPerUser) { + return false; + } + } + + // check template limit + const templateLimit = task.executionOptions?.maxConcurrentExecutions; + if (templateLimit) { + const templateCount = this.templateExecutionCounts.get(task.name) ?? 0; + if (templateCount >= templateLimit) { + return false; + } + } + + // acquire the slot + this.globalExecutionCount++; + this.slotsAcquired++; + + if (task.userId) { + const userCount = this.userExecutionCounts.get(task.userId) ?? 0; + this.userExecutionCounts.set(task.userId, userCount + 1); + } + + const templateCount = this.templateExecutionCounts.get(task.name) ?? 0; + this.templateExecutionCounts.set(task.name, templateCount + 1); + + return true; + } + + /** + * Process waiting queue after a slot is released + */ + private processWaitingQueue(): void { + for (let i = 0; i < this.waitingQueue.length; i++) { + const request = this.waitingQueue[i]; + + if (this.tryAcquire(request.task)) { + // remove from queue + this.waitingQueue.splice(i, 1); + + // clear timeout + clearTimeout(request.timeoutId); + + // resolve the promise + request.resolve(); + + // only process one at a time + return; + } + } + } + + /** + * Emit gauges metrics for slot usage + */ + private emitGauges(): void { + this.hooks.recordGauge( + TaskMetrics.SLOTS_AVAILABLE, + this.getStats().current.available, + ); + this.hooks.recordGauge( + TaskMetrics.TASKS_RUNNING, + this.globalExecutionCount, + ); + + this.hooks.recordGauge(TaskMetrics.TASKS_QUEUED, this.waitingQueue.length); + } +} diff --git a/packages/taskflow/src/tests/guard/backpressure.test.ts b/packages/taskflow/src/tests/guard/backpressure.test.ts new file mode 100644 index 00000000..bff0772a --- /dev/null +++ b/packages/taskflow/src/tests/guard/backpressure.test.ts @@ -0,0 +1,269 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { idempotencyKey, taskName, userId } from "@/core/branded"; +import { BackpressureError, ValidationError } from "@/core/errors"; +import { Task } from "@/domain"; +import { Backpressure } from "@/guard/backpressure"; +import type { BackpressureConfig } from "@/guard/types"; +import { noopHooks, type TaskSystemHooks } from "@/observability"; + +const createTask = (overrides?: Partial): Task => + new Task({ + name: taskName(overrides?.name ?? "test-task"), + input: { data: "test" }, + userId: userId(overrides?.userId ?? "test-user"), + idempotencyKey: idempotencyKey( + overrides?.idempotencyKey ?? "test-idempotency-key", + ), + }); + +const defaultConfig: BackpressureConfig = { + windowSizeMs: 60_000, + maxTasksPerWindow: 100, + maxTasksPerUserWindow: 10, + maxQueuedSize: 50, +}; + +describe("Backpressure", () => { + let backpressure: Backpressure; + beforeEach(() => { + backpressure = new Backpressure(defaultConfig); + }); + + afterEach(() => { + backpressure.clear(); + }); + + describe("accept", () => { + it("should accept tasks within global window limit", () => { + const task = createTask({}); + expect(() => backpressure.accept(task, false)).not.toThrow(); + expect(backpressure.getQueueSize()).toBe(1); + }); + + it("should reject tasks when global window limit is exceeded", () => { + const backpressure = new Backpressure({ + ...defaultConfig, + maxTasksPerWindow: 2, + }); + + backpressure.accept( + createTask({ idempotencyKey: idempotencyKey("key-1") }), + false, + ); + backpressure.accept( + createTask({ idempotencyKey: idempotencyKey("key-2") }), + false, + ); + + expect(() => + backpressure.accept( + createTask({ idempotencyKey: idempotencyKey("key-3") }), + false, + ), + ).toThrow(BackpressureError); + }); + + it("should accept tasks within user window limit", () => { + const backpressure = new Backpressure({ + ...defaultConfig, + maxTasksPerUserWindow: 2, + }); + + backpressure.accept(createTask({ userId: userId("user-1") }), false); + backpressure.accept(createTask({ userId: userId("user-2") }), false); + + expect(backpressure.getQueueSize()).toBe(2); + }); + + it("should reject tasks when user window limit is exceeded", () => { + const backpressure = new Backpressure({ + ...defaultConfig, + maxTasksPerUserWindow: 1, + }); + + backpressure.accept(createTask({ userId: userId("user-1") }), false); + expect(() => + backpressure.accept(createTask({ userId: userId("user-1") }), false), + ).toThrow(BackpressureError); + }); + + it("should allow different users within their own limits", () => { + const backpressure = new Backpressure({ + ...defaultConfig, + maxTasksPerUserWindow: 2, + }); + + backpressure.accept(createTask({ userId: userId("user-1") }), false); + backpressure.accept(createTask({ userId: userId("user-2") }), false); + + expect(backpressure.getQueueSize()).toBe(2); + }); + + it("should accept tasks within global queue limit", () => { + const backpressure = new Backpressure({ + ...defaultConfig, + maxQueuedSize: 2, + }); + + backpressure.accept(createTask({}), false); + backpressure.accept(createTask({}), false); + + expect(backpressure.getQueueSize()).toBe(2); + }); + + it("should reject tasks when global queue limit is exceeded", () => { + const backpressure = new Backpressure({ + ...defaultConfig, + maxQueuedSize: 1, + }); + + backpressure.accept(createTask({}), false); + expect(() => backpressure.accept(createTask({}), false)).toThrow( + BackpressureError, + ); + }); + + it("should reject tasks that are in DLQ", () => { + const task = createTask(); + expect(() => backpressure.accept(task, true)).toThrow(ValidationError); + + try { + backpressure.accept(task, true); + } catch (error) { + expect(ValidationError.is(error)).toBe(true); + expect((error as ValidationError).message).toContain("DLQ"); + } + }); + + it("should calculate retryAfterMs based on window reset time", () => { + vi.useFakeTimers(); + const now = Date.now(); + vi.setSystemTime(now); + + const backpressure = new Backpressure({ + ...defaultConfig, + maxTasksPerWindow: 1, + windowSizeMs: 1000, + }); + + backpressure.accept(createTask(), false); + vi.advanceTimersByTime(200); // 200ms later + + try { + backpressure.accept(createTask(), false); + } catch (error) { + expect(BackpressureError.is(error)).toBe(true); + const retryAfter = (error as BackpressureError).retryAfterMs; + expect(retryAfter).toBeGreaterThanOrEqual(700); + expect(retryAfter).toBeLessThanOrEqual(800); + } + + vi.useRealTimers(); + }); + + it("should reset counts after window expires", () => { + vi.useFakeTimers(); + + const backpressure = new Backpressure({ + ...defaultConfig, + maxTasksPerWindow: 1, + windowSizeMs: 1000, + }); + + backpressure.accept(createTask(), false); + + // advance past the window + vi.advanceTimersByTime(1100); + + // should accept now since window reset + expect(() => backpressure.accept(createTask(), false)).not.toThrow(); + + vi.useRealTimers(); + }); + + it("should handle background tasks (null userId)", () => { + const task = createTask({ userId: null }); + + expect(() => backpressure.accept(task, false)).not.toThrow(); + expect(backpressure.getQueueSize()).toBe(1); + }); + }); + + describe("decrementQueueSize", () => { + it("should decrement the queue size", () => { + backpressure.accept(createTask(), false); + backpressure.accept(createTask(), false); + + expect(backpressure.getQueueSize()).toBe(2); + backpressure.decrementQueueSize(); + expect(backpressure.getQueueSize()).toBe(1); + }); + + it("should not go below zero", () => { + backpressure.decrementQueueSize(); + backpressure.decrementQueueSize(); + + expect(backpressure.getQueueSize()).toBe(0); + }); + }); + + describe("getStats", () => { + it("should return admission stats", () => { + backpressure.accept(createTask(), false); + const stats = backpressure.getStats(); + + expect(stats.config).toEqual(defaultConfig); + expect(stats.window.accepted).toBe(1); + expect(stats.totals.accepted).toBe(1); + expect(stats.totals.rejected).toBe(0); + }); + + it("should track rejections by reason", () => { + const backpressure = new Backpressure({ + ...defaultConfig, + maxTasksPerWindow: 1, + }); + + backpressure.accept(createTask(), false); + try { + backpressure.accept(createTask(), false); + } catch {} + + const stats = backpressure.getStats(); + + expect(stats.rejections.byReason.global_rate_limit).toBe(1); + expect(stats.totals.rejected).toBe(1); + }); + }); + + describe("clear", () => { + it("should reset all state", () => { + backpressure.accept(createTask(), false); + + backpressure.clear(); + + expect(backpressure.getQueueSize()).toBe(0); + expect(backpressure.getStats().totals.accepted).toBe(0); + }); + }); + + describe("observability hooks", () => { + it("should call incrementCounter on rejection", () => { + const mockHooks: TaskSystemHooks = { + ...noopHooks, + incrementCounter: vi.fn(), + }; + + const backpressure = new Backpressure( + { ...defaultConfig, maxTasksPerWindow: 1 }, + mockHooks, + ); + backpressure.accept(createTask(), false); + try { + backpressure.accept(createTask(), false); + } catch {} + + expect(mockHooks.incrementCounter).toHaveBeenCalled(); + }); + }); +}); diff --git a/packages/taskflow/src/tests/guard/dlq.test.ts b/packages/taskflow/src/tests/guard/dlq.test.ts new file mode 100644 index 00000000..b67ab454 --- /dev/null +++ b/packages/taskflow/src/tests/guard/dlq.test.ts @@ -0,0 +1,493 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { idempotencyKey, taskName, userId } from "@/core/branded"; +import { Task } from "@/domain"; +import { DeadLetterQueue } from "@/guard/dlq"; +import type { DLQConfig, DLQEvent } from "@/guard/types"; +import { noopHooks, type TaskSystemHooks } from "@/observability"; + +const createTask = (overrides?: Partial): Task => + new Task({ + name: taskName(overrides?.name ?? "test-task"), + input: { data: "test" }, + userId: userId(overrides?.userId ?? "test-user"), + idempotencyKey: idempotencyKey( + overrides?.idempotencyKey ?? "test-idempotency-key", + ), + }); + +const createFailedTask = (overrides?: Partial): Task => { + const task = createTask(overrides); + task.start(); + task.fail(new Error("test error")); + + return task; +}; + +const defaultConfig: DLQConfig = { + maxSize: 100, + ttlMs: 24 * 60 * 60 * 1000, + cleanupIntervalMs: 60_000, + maxRetries: 3, +}; + +describe("DeadLetterQueue", () => { + let dlq: DeadLetterQueue; + + beforeEach(() => { + dlq = new DeadLetterQueue(defaultConfig); + }); + + afterEach(() => { + dlq.shutdown(); + }); + + describe("add", () => { + it("should add a task to the DLQ", () => { + const task = createTask(); + + dlq.add(task, "test reason"); + + expect(dlq.has(task.idempotencyKey)).toBe(true); + expect(dlq.size).toBe(1); + }); + + it("should store reason and error", () => { + const task = createTask(); + + dlq.add(task, "test reason", "Error message"); + + const entry = dlq.get(task.idempotencyKey); + expect(entry?.reason).toBe("test reason"); + expect(entry?.error).toBe("Error message"); + }); + + it("should set addedAt timestamp", () => { + const task = createTask(); + const before = Date.now(); + + dlq.add(task); + + const entry = dlq.get(task.idempotencyKey); + expect(entry?.addedAt).toBeGreaterThanOrEqual(before); + expect(entry?.addedAt).toBeLessThanOrEqual(Date.now()); + }); + it("should initialize retryCount to 0", () => { + const task = createTask(); + + dlq.add(task); + + const entry = dlq.get(task.idempotencyKey); + expect(entry?.retryCount).toBe(0); + }); + }); + + describe("get", () => { + it("should get DLQ entry with metadata", () => { + const task = createTask(); + dlq.add(task, "test reason"); + + const entry = dlq.get(task.idempotencyKey); + + expect(entry).toBeDefined(); + expect(entry?.task.id).toBe(task.id); + expect(entry?.reason).toBe("test reason"); + }); + it("should return undefined for non-existent entry", () => { + const entry = dlq.get(idempotencyKey("a".repeat(64))); + + expect(entry).toBeUndefined(); + }); + }); + + describe("getAll", () => { + it("should list all DLQ entries", () => { + dlq.add(createTask({ idempotencyKey: idempotencyKey("a".repeat(64)) })); + dlq.add(createTask({ idempotencyKey: idempotencyKey("b".repeat(64)) })); + + const entries = dlq.getAll(); + + expect(entries).toHaveLength(2); + }); + }); + + describe("remove", () => { + it("should remove task from DLQ", () => { + const task = createTask(); + dlq.add(task); + + const removed = dlq.remove(task.idempotencyKey); + + expect(removed).toBe(true); + expect(dlq.has(task.idempotencyKey)).toBe(false); + }); + + it("should return false when removing non-existent entry", () => { + const removed = dlq.remove(idempotencyKey("a".repeat(64))); + + expect(removed).toBe(false); + }); + }); + + describe("eviction", () => { + it("should evict oldest when DLQ is full", () => { + const smallDlq = new DeadLetterQueue({ ...defaultConfig, maxSize: 2 }); + + smallDlq.add( + createTask({ idempotencyKey: idempotencyKey("a".repeat(64)) }), + ); + smallDlq.add( + createTask({ idempotencyKey: idempotencyKey("b".repeat(64)) }), + ); + smallDlq.add( + createTask({ idempotencyKey: idempotencyKey("c".repeat(64)) }), + ); + + expect(smallDlq.size).toBe(2); + expect(smallDlq.has(idempotencyKey("a".repeat(64)))).toBe(false); // evicted + expect(smallDlq.has(idempotencyKey("b".repeat(64)))).toBe(true); + expect(smallDlq.has(idempotencyKey("c".repeat(64)))).toBe(true); + + smallDlq.shutdown(); + }); + }); + + describe("TTL expiration", () => { + it("should cleanup expired entries based on TTL", () => { + vi.useFakeTimers(); + + const shortTtlDlq = new DeadLetterQueue({ + ...defaultConfig, + ttlMs: 100, + cleanupIntervalMs: 50, + }); + + shortTtlDlq.add(createTask()); + expect(shortTtlDlq.size).toBe(1); + + // Advance time past TTL and cleanup interval + vi.advanceTimersByTime(150); + + expect(shortTtlDlq.size).toBe(0); + + shortTtlDlq.shutdown(); + vi.useRealTimers(); + }); + }); + + describe("retry", () => { + it("should retry a task from DLQ", () => { + const task = createFailedTask(); + dlq.add(task, "test failure"); + + const retriedTask = dlq.retry(task.idempotencyKey); + + expect(retriedTask).not.toBeNull(); + expect(retriedTask?.status).toBe("created"); // resetToPending + expect(dlq.has(task.idempotencyKey)).toBe(false); + }); + + it("should return null when retrying non-existent entry", () => { + const result = dlq.retry(idempotencyKey("a".repeat(64))); + + expect(result).toBeNull(); + }); + + it("should track retry count", () => { + const task = createFailedTask(); + dlq.add(task, "test failure"); + + // Entry starts with retryCount 0 + const entryBefore = dlq.get(task.idempotencyKey); + expect(entryBefore?.retryCount).toBe(0); + + // Retry + dlq.retry(task.idempotencyKey); + + // Stats should show 1 retry + expect(dlq.getStats().totalRetries).toBe(1); + }); + + it("should respect max retries limit", () => { + const limitedDlq = new DeadLetterQueue({ + ...defaultConfig, + maxRetries: 0, + }); + + const task = createFailedTask(); + limitedDlq.add(task, "test failure"); + + const result = limitedDlq.retry(task.idempotencyKey); + + expect(result).toBeNull(); + expect(limitedDlq.has(task.idempotencyKey)).toBe(true); // Still in DLQ + + limitedDlq.shutdown(); + }); + + it("should retry all tasks from DLQ", () => { + const task1 = createFailedTask({ + idempotencyKey: idempotencyKey("a".repeat(64)), + }); + const task2 = createFailedTask({ + idempotencyKey: idempotencyKey("b".repeat(64)), + }); + + dlq.add(task1); + dlq.add(task2); + + const retriedTasks = dlq.retryAll(); + + expect(retriedTasks).toHaveLength(2); + expect(dlq.size).toBe(0); + }); + + it("should retry DLQ entries with filter", () => { + const task1 = createFailedTask({ + idempotencyKey: idempotencyKey("a".repeat(64)), + name: taskName("task-a"), + }); + const task2 = createFailedTask({ + idempotencyKey: idempotencyKey("b".repeat(64)), + name: taskName("task-b"), + }); + + dlq.add(task1); + dlq.add(task2); + + const retriedTasks = dlq.retryWithFilter( + (entry) => entry.task.name === taskName("task-a"), + ); + + expect(retriedTasks).toHaveLength(1); + expect(retriedTasks[0].name).toBe(taskName("task-a")); + expect(dlq.size).toBe(1); + expect(dlq.has(idempotencyKey("b".repeat(64)))).toBe(true); + }); + }); + + describe("events", () => { + it("should emit dlq:added event", () => { + const events: DLQEvent[] = []; + dlq.onEvent((event) => events.push(event)); + + dlq.add(createTask(), "test reason"); + + expect(events).toHaveLength(1); + expect(events[0].type).toBe("dlq:added"); + expect(events[0].reason).toBe("test reason"); + }); + + it("should emit dlq:removed event", () => { + const task = createTask(); + dlq.add(task); + + const events: DLQEvent[] = []; + dlq.onEvent((event) => events.push(event)); + + dlq.remove(task.idempotencyKey); + + expect(events).toHaveLength(1); + expect(events[0].type).toBe("dlq:removed"); + }); + + it("should emit dlq:retried event", () => { + const task = createFailedTask(); + dlq.add(task); + + const events: DLQEvent[] = []; + dlq.onEvent((event) => events.push(event)); + + dlq.retry(task.idempotencyKey); + + expect(events).toHaveLength(1); + expect(events[0].type).toBe("dlq:retried"); + }); + + it("should emit dlq:retry_exhausted event when max retries reached", () => { + const limitedDlq = new DeadLetterQueue({ + ...defaultConfig, + maxRetries: 0, + }); + + const task = createFailedTask(); + limitedDlq.add(task); + + const events: DLQEvent[] = []; + limitedDlq.onEvent((event) => events.push(event)); + + limitedDlq.retry(task.idempotencyKey); + + expect(events).toHaveLength(1); + expect(events[0].type).toBe("dlq:retry_exhausted"); + + limitedDlq.shutdown(); + }); + + it("should emit dlq:expired event", () => { + vi.useFakeTimers(); + + const shortTtlDlq = new DeadLetterQueue({ + ...defaultConfig, + ttlMs: 100, + cleanupIntervalMs: 50, + }); + + const events: DLQEvent[] = []; + shortTtlDlq.onEvent((event) => events.push(event)); + + shortTtlDlq.add(createTask()); + + expect(events).toHaveLength(1); // dlq:added + + vi.advanceTimersByTime(150); + + expect(events).toHaveLength(2); + expect(events[1].type).toBe("dlq:expired"); + + shortTtlDlq.shutdown(); + vi.useRealTimers(); + }); + + it("should emit dlq:evicted event when entry is evicted for capacity", () => { + const smallDlq = new DeadLetterQueue({ ...defaultConfig, maxSize: 1 }); + + const events: DLQEvent[] = []; + smallDlq.onEvent((event) => events.push(event)); + + smallDlq.add( + createTask({ idempotencyKey: idempotencyKey("a".repeat(64)) }), + ); + smallDlq.add( + createTask({ idempotencyKey: idempotencyKey("b".repeat(64)) }), + ); + + // dlq:added, dlq:evicted, dlq:added + expect(events).toHaveLength(3); + expect(events[1].type).toBe("dlq:evicted"); + + smallDlq.shutdown(); + }); + it("should allow unsubscribing from events", () => { + const events: DLQEvent[] = []; + const unsubscribe = dlq.onEvent((event) => events.push(event)); + + dlq.add(createTask({ idempotencyKey: idempotencyKey("a".repeat(64)) })); + expect(events).toHaveLength(1); + + unsubscribe(); + + dlq.add(createTask({ idempotencyKey: idempotencyKey("b".repeat(64)) })); + expect(events).toHaveLength(1); // Still 1, no new events + }); + }); + + describe("getStats", () => { + it("should return DLQ statistics", () => { + dlq.add( + createTask({ + idempotencyKey: idempotencyKey("a".repeat(64)), + name: taskName("task-a"), + }), + "timeout", + ); + dlq.add( + createTask({ + idempotencyKey: idempotencyKey("b".repeat(64)), + name: taskName("task-a"), + }), + "timeout", + ); + dlq.add( + createTask({ + idempotencyKey: idempotencyKey("c".repeat(64)), + name: taskName("task-b"), + }), + "error", + ); + + const stats = dlq.getStats(); + + expect(stats.size).toBe(3); + expect(stats.totalAdded).toBe(3); + }); + it("should calculate average time in DLQ", () => { + vi.useFakeTimers(); + + dlq.add(createTask()); + + vi.advanceTimersByTime(1000); + + const stats = dlq.getStats(); + expect(stats.avgAgeMs).toBeGreaterThanOrEqual(1000); + + vi.useRealTimers(); + }); + it("should track oldest entry age", () => { + vi.useFakeTimers(); + + dlq.add(createTask({ idempotencyKey: idempotencyKey("a".repeat(64)) })); + vi.advanceTimersByTime(500); + dlq.add(createTask({ idempotencyKey: idempotencyKey("b".repeat(64)) })); + vi.advanceTimersByTime(500); + + const stats = dlq.getStats(); + expect(stats.oldestAgeMs).toBeGreaterThanOrEqual(1000); + vi.useRealTimers(); + }); + }); + + describe("clear", () => { + it("should clear all entries and reset stats", () => { + const task = createFailedTask(); + dlq.add(task); + dlq.retry(task.idempotencyKey); + + // re-add for stats + const task2 = createTask(); + dlq.add(task2); + + dlq.clear(); + + expect(dlq.size).toBe(0); + expect(dlq.getStats().totalRetries).toBe(0); + expect(dlq.getStats().totalAdded).toBe(0); + }); + }); + + describe("shutdown", () => { + it("shoudl clear entries and stop cleanup timer", () => { + dlq.add(createTask()); + dlq.shutdown(); + + expect(dlq.size).toBe(0); + }); + }); + + describe("observability hooks", () => { + it("should record gauge for DLQ size", () => { + const mockHooks: TaskSystemHooks = { + ...noopHooks, + recordGauge: vi.fn(), + }; + const observedDLQ = new DeadLetterQueue(defaultConfig, mockHooks); + observedDLQ.add(createTask()); + + expect(mockHooks.recordGauge).toHaveBeenCalled(); + + observedDLQ.shutdown(); + }); + + it("should increment counter on add", () => { + const mockHooks: TaskSystemHooks = { + ...noopHooks, + incrementCounter: vi.fn(), + }; + const observedDLQ = new DeadLetterQueue(defaultConfig, mockHooks); + observedDLQ.add(createTask()); + + expect(mockHooks.incrementCounter).toHaveBeenCalled(); + + observedDLQ.shutdown(); + }); + }); +}); diff --git a/packages/taskflow/src/tests/guard/guard.test.ts b/packages/taskflow/src/tests/guard/guard.test.ts new file mode 100644 index 00000000..2815850c --- /dev/null +++ b/packages/taskflow/src/tests/guard/guard.test.ts @@ -0,0 +1,537 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { idempotencyKey, taskName, userId } from "@/core/branded"; +import { + BackpressureError, + SlotTimeoutError, + ValidationError, +} from "@/core/errors"; +import { Task } from "@/domain/task"; +import { Guard } from "@/guard/guard"; +import type { GuardConfig } from "@/guard/types"; + +const createTask = (overrides: Partial = {}): Task => + new Task({ + name: taskName(overrides.name ?? "test-task"), + input: { data: "test" }, + userId: userId(overrides.userId ?? "user-1"), + idempotencyKey: idempotencyKey( + overrides.idempotencyKey ?? "test-idempotency-key", + ), + executionOptions: overrides.executionOptions ?? undefined, + }); + +const createFailedTask = (overrides: Partial = {}): Task => { + const task = createTask(overrides); + task.start(); + task.fail(new Error("Test error")); + return task; +}; + +const defaultConfig: Partial = { + backpressure: { + windowSizeMs: 60_000, + maxTasksPerWindow: 100, + maxTasksPerUserWindow: 10, + maxQueuedSize: 50, + }, + slots: { + maxExecutionGlobal: 10, + maxExecutionPerUser: 5, + slotTimeoutMs: 5000, + }, + dlq: { + maxSize: 100, + ttlMs: 24 * 60 * 60 * 1000, + cleanupIntervalMs: 60_000, + maxRetries: 3, + }, + recovery: { + maxRecoverySlots: 5, + recoverySlotTimeoutMs: 30_000, + }, +}; + +describe("Guard", () => { + let guard: Guard; + + beforeEach(() => { + guard = new Guard(defaultConfig as unknown as GuardConfig); + }); + + afterEach(() => { + guard.shutdown(); + }); + + describe("acceptTask", () => { + it("should accept a task", () => { + const task = createTask(); + + expect(() => guard.acceptTask(task)).not.toThrow(); + expect(guard.getGlobalQueueSize()).toBe(1); + }); + + it("should throw BackpressureError when global queue is full", () => { + const smallGuard = new Guard({ + ...defaultConfig, + backpressure: { ...defaultConfig.backpressure!, maxQueuedSize: 1 }, + slots: defaultConfig.slots!, + dlq: defaultConfig.dlq!, + recovery: defaultConfig.recovery!, + }); + + smallGuard.acceptTask( + createTask({ idempotencyKey: idempotencyKey("a".repeat(64)) }), + ); + + expect(() => + smallGuard.acceptTask( + createTask({ idempotencyKey: idempotencyKey("b".repeat(64)) }), + ), + ).toThrow(BackpressureError); + + smallGuard.shutdown(); + }); + + it("should throw BackpressureError when window limit reached", () => { + const smallGuard = new Guard({ + ...defaultConfig, + backpressure: { ...defaultConfig.backpressure!, maxTasksPerWindow: 1 }, + slots: defaultConfig.slots!, + dlq: defaultConfig.dlq!, + recovery: defaultConfig.recovery!, + }); + + smallGuard.acceptTask( + createTask({ idempotencyKey: idempotencyKey("a".repeat(64)) }), + ); + + expect(() => + smallGuard.acceptTask( + createTask({ idempotencyKey: idempotencyKey("b".repeat(64)) }), + ), + ).toThrow(BackpressureError); + + smallGuard.shutdown(); + }); + + it("should throw BackpressureError when user limit reached", () => { + const smallGuard = new Guard({ + ...defaultConfig, + backpressure: { + ...defaultConfig.backpressure!, + maxTasksPerUserWindow: 1, + }, + slots: defaultConfig.slots!, + dlq: defaultConfig.dlq!, + recovery: defaultConfig.recovery!, + }); + + smallGuard.acceptTask( + createTask({ + idempotencyKey: idempotencyKey("a".repeat(64)), + userId: userId("user-1"), + }), + ); + + expect(() => + smallGuard.acceptTask( + createTask({ + idempotencyKey: idempotencyKey("b".repeat(64)), + userId: userId("user-1"), + }), + ), + ).toThrow(BackpressureError); + + smallGuard.shutdown(); + }); + + it("should throw ValidationError when task is in DLQ", () => { + const task = createTask(); + guard.addToDLQ(task); + + expect(() => guard.acceptTask(task)).toThrow(ValidationError); + }); + }); + + describe("acquireExecutionSlot", () => { + it("should acquire an execution slot", async () => { + const task = createTask(); + guard.acceptTask(task); + + await guard.acquireExecutionSlot(task); + + expect(guard.getGlobalExecutionSize()).toBe(1); + expect(guard.getUserExecutionSize(task.userId!)).toBe(1); + expect(guard.getTemplateExecutionSize(task.name)).toBe(1); + }); + + it("should decrement queue size when acquiring slot", async () => { + const task = createTask(); + guard.acceptTask(task); + + expect(guard.getGlobalQueueSize()).toBe(1); + + await guard.acquireExecutionSlot(task); + + expect(guard.getGlobalQueueSize()).toBe(0); + }); + + it("should throw SlotTimeoutError when timeout reached", async () => { + vi.useFakeTimers(); + + const smallGuard = new Guard({ + ...defaultConfig, + slots: { + ...defaultConfig.slots!, + maxExecutionGlobal: 1, + slotTimeoutMs: 100, + }, + backpressure: defaultConfig.backpressure!, + dlq: defaultConfig.dlq!, + recovery: defaultConfig.recovery!, + }); + + const task1 = createTask({ + idempotencyKey: idempotencyKey("a".repeat(64)), + }); + const task2 = createTask({ + idempotencyKey: idempotencyKey("b".repeat(64)), + }); + + smallGuard.acceptTask(task1); + smallGuard.acceptTask(task2); + + await smallGuard.acquireExecutionSlot(task1); + + const acquirePromise = smallGuard.acquireExecutionSlot(task2); + vi.advanceTimersByTime(150); + + await expect(acquirePromise).rejects.toThrow(SlotTimeoutError); + + smallGuard.shutdown(); + vi.useRealTimers(); + }); + + it("should wait in queue and acquire slot when released", async () => { + const smallGuard = new Guard({ + ...defaultConfig, + slots: { + ...defaultConfig.slots!, + maxExecutionGlobal: 1, + slotTimeoutMs: 5000, + }, + backpressure: defaultConfig.backpressure!, + dlq: defaultConfig.dlq!, + recovery: defaultConfig.recovery!, + }); + + const task1 = createTask({ + idempotencyKey: idempotencyKey("a".repeat(64)), + }); + const task2 = createTask({ + idempotencyKey: idempotencyKey("b".repeat(64)), + }); + + smallGuard.acceptTask(task1); + smallGuard.acceptTask(task2); + + await smallGuard.acquireExecutionSlot(task1); + expect(smallGuard.getGlobalExecutionSize()).toBe(1); + + const slot2Promise = smallGuard.acquireExecutionSlot(task2); + expect(smallGuard.getWaitingQueueSize()).toBe(1); + + smallGuard.releaseExecutionSlot(task1); + + await slot2Promise; + expect(smallGuard.getGlobalExecutionSize()).toBe(1); + expect(smallGuard.getWaitingQueueSize()).toBe(0); + + smallGuard.shutdown(); + }); + + it("should respect template execution limit", async () => { + vi.useFakeTimers(); + + const smallGuard = new Guard({ + ...defaultConfig, + slots: { ...defaultConfig.slots!, slotTimeoutMs: 100 }, + backpressure: defaultConfig.backpressure!, + dlq: defaultConfig.dlq!, + recovery: defaultConfig.recovery!, + }); + + const task1 = createTask({ + idempotencyKey: idempotencyKey("a".repeat(64)), + name: taskName("limited-task"), + executionOptions: { maxConcurrentExecutions: 1 }, + }); + const task2 = createTask({ + idempotencyKey: idempotencyKey("b".repeat(64)), + name: taskName("limited-task"), + executionOptions: { maxConcurrentExecutions: 1 }, + }); + + smallGuard.acceptTask(task1); + smallGuard.acceptTask(task2); + + await smallGuard.acquireExecutionSlot(task1); + + const acquirePromise = smallGuard.acquireExecutionSlot(task2); + vi.advanceTimersByTime(150); + + await expect(acquirePromise).rejects.toThrow(SlotTimeoutError); + + smallGuard.shutdown(); + vi.useRealTimers(); + }); + }); + + describe("releaseExecutionSlot", () => { + it("should release an execution slot", async () => { + const task = createTask(); + guard.acceptTask(task); + await guard.acquireExecutionSlot(task); + + expect(guard.getGlobalExecutionSize()).toBe(1); + + guard.releaseExecutionSlot(task); + + expect(guard.getGlobalExecutionSize()).toBe(0); + }); + + it("should not go below zero", () => { + const task = createTask(); + + guard.releaseExecutionSlot(task); + guard.releaseExecutionSlot(task); + + expect(guard.getGlobalExecutionSize()).toBe(0); + }); + }); + + describe("recovery slots", () => { + it("should acquire a recovery slot", () => { + expect(() => guard.acquireRecoverySlot()).not.toThrow(); + }); + + it("should release a recovery slot", () => { + guard.acquireRecoverySlot(); + expect(() => guard.releaseRecoverySlot()).not.toThrow(); + }); + + it("should throw BackpressureError when recovery capacity exhausted", () => { + const smallGuard = new Guard({ + ...defaultConfig, + recovery: { ...defaultConfig.recovery!, maxRecoverySlots: 1 }, + backpressure: defaultConfig.backpressure!, + dlq: defaultConfig.dlq!, + slots: defaultConfig.slots!, + }); + + smallGuard.acquireRecoverySlot(); + + expect(() => smallGuard.acquireRecoverySlot()).toThrow(BackpressureError); + + smallGuard.shutdown(); + }); + + it("should not go below zero on release", () => { + guard.releaseRecoverySlot(); + guard.releaseRecoverySlot(); + + // Should not throw, just stay at 0 + const stats = guard.getStats(); + expect(stats.recovery.inUse).toBe(0); + }); + }); + + describe("DLQ operations", () => { + it("should add a task to the DLQ", () => { + const task = createTask(); + guard.addToDLQ(task, "test reason"); + + expect(guard.isTaskInDLQ(task.idempotencyKey)).toBe(true); + expect(guard.getDLQSize()).toBe(1); + }); + + it("should get DLQ entry with metadata", () => { + const task = createTask(); + guard.addToDLQ(task, "test reason"); + + const entry = guard.getDLQEntry(task.idempotencyKey); + + expect(entry).toBeDefined(); + expect(entry?.task.id).toBe(task.id); + expect(entry?.reason).toBe("test reason"); + }); + + it("should list all DLQ entries", () => { + guard.addToDLQ( + createTask({ idempotencyKey: idempotencyKey("a".repeat(64)) }), + ); + guard.addToDLQ( + createTask({ idempotencyKey: idempotencyKey("b".repeat(64)) }), + ); + + const entries = guard.getDLQEntries(); + + expect(entries).toHaveLength(2); + }); + + it("should remove task from DLQ", () => { + const task = createTask(); + guard.addToDLQ(task); + + const removed = guard.removeFromDLQ(task.idempotencyKey); + + expect(removed).toBe(true); + expect(guard.isTaskInDLQ(task.idempotencyKey)).toBe(false); + }); + + it("should retry a task from DLQ", () => { + const task = createFailedTask(); + guard.addToDLQ(task, "test failure"); + + const retriedTask = guard.retryFromDLQ(task.idempotencyKey); + + expect(retriedTask).not.toBeNull(); + expect(retriedTask?.status).toBe("created"); + expect(guard.isTaskInDLQ(task.idempotencyKey)).toBe(false); + }); + + it("should retry all tasks from DLQ", () => { + const task1 = createFailedTask({ + idempotencyKey: idempotencyKey("a".repeat(64)), + }); + const task2 = createFailedTask({ + idempotencyKey: idempotencyKey("b".repeat(64)), + }); + + guard.addToDLQ(task1); + guard.addToDLQ(task2); + + const retriedTasks = guard.retryAllFromDLQ(); + + expect(retriedTasks).toHaveLength(2); + expect(guard.getDLQSize()).toBe(0); + }); + + it("should retry DLQ entries with filter", () => { + const task1 = createFailedTask({ + idempotencyKey: idempotencyKey("a".repeat(64)), + name: taskName("task-a"), + }); + const task2 = createFailedTask({ + idempotencyKey: idempotencyKey("b".repeat(64)), + name: taskName("task-b"), + }); + + guard.addToDLQ(task1); + guard.addToDLQ(task2); + + const retriedTasks = guard.retryDLQWithFilter( + (entry) => entry.task.name === taskName("task-a"), + ); + + expect(retriedTasks).toHaveLength(1); + expect(guard.getDLQSize()).toBe(1); + }); + + it("should subscribe to DLQ events", () => { + const events: Array<{ type: string }> = []; + guard.onDLQEvent((event) => events.push(event)); + + guard.addToDLQ(createTask()); + + expect(events).toHaveLength(1); + expect(events[0].type).toBe("dlq:added"); + }); + + it("should get DLQ stats", () => { + guard.addToDLQ( + createTask({ idempotencyKey: idempotencyKey("a".repeat(64)) }), + "timeout", + ); + guard.addToDLQ( + createTask({ idempotencyKey: idempotencyKey("b".repeat(64)) }), + "timeout", + ); + + const stats = guard.getDLQStats(); + + expect(stats.size).toBe(2); + }); + }); + + describe("getStats", () => { + it("should return comprehensive stats", async () => { + const task1 = createTask({ + idempotencyKey: idempotencyKey("a".repeat(64)), + }); + const task2 = createTask({ + idempotencyKey: idempotencyKey("b".repeat(64)), + }); + + guard.acceptTask(task1); + guard.acceptTask(task2); + await guard.acquireExecutionSlot(task1); + + guard.addToDLQ( + createTask({ idempotencyKey: idempotencyKey("c".repeat(64)) }), + ); + guard.acquireRecoverySlot(); + + const stats = guard.getStats(); + + expect(stats.admission.window.accepted).toBe(2); + expect(stats.slots.current.inUse).toBe(1); + expect(stats.dlq.size).toBe(1); + expect(stats.recovery.inUse).toBe(1); + }); + }); + + describe("shutdown", () => { + it("should clear all state on shutdown", async () => { + const task = createTask(); + guard.acceptTask(task); + await guard.acquireExecutionSlot(task); + guard.addToDLQ( + createTask({ idempotencyKey: idempotencyKey("b".repeat(64)) }), + ); + guard.acquireRecoverySlot(); + + guard.shutdown(); + + expect(guard.getGlobalQueueSize()).toBe(0); + expect(guard.getGlobalExecutionSize()).toBe(0); + expect(guard.getDLQSize()).toBe(0); + expect(guard.getStats().recovery.inUse).toBe(0); + }); + }); + + describe("clear", () => { + it("should clear all state", async () => { + const task = createTask(); + guard.acceptTask(task); + await guard.acquireExecutionSlot(task); + + guard.clear(); + + expect(guard.getGlobalQueueSize()).toBe(0); + expect(guard.getGlobalExecutionSize()).toBe(0); + }); + }); + + describe("default configuration", () => { + it("should use default config when none provided", () => { + const defaultGuard = new Guard(defaultConfig as GuardConfig); + + // Should not throw + const task = createTask(); + expect(() => defaultGuard.acceptTask(task)).not.toThrow(); + + defaultGuard.shutdown(); + }); + }); +}); diff --git a/packages/taskflow/src/tests/guard/slot-manager.test.ts b/packages/taskflow/src/tests/guard/slot-manager.test.ts new file mode 100644 index 00000000..d3796fa7 --- /dev/null +++ b/packages/taskflow/src/tests/guard/slot-manager.test.ts @@ -0,0 +1,347 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { idempotencyKey, taskName, userId } from "@/core/branded"; +import { SlotTimeoutError } from "@/core/errors"; +import { Task } from "@/domain"; +import { SlotManager } from "@/guard/slot-manager"; +import type { SlotManagerConfig } from "@/guard/types"; +import type { TaskSystemHooks } from "@/observability/hooks"; +import { noopHooks } from "@/observability/noop"; + +const createTask = (overrides?: Partial): Task => + new Task({ + name: taskName(overrides?.name ?? "test-task"), + input: { data: "test" }, + userId: userId(overrides?.userId ?? "test-user"), + idempotencyKey: idempotencyKey( + overrides?.idempotencyKey ?? "test-idempotency-key", + ), + executionOptions: overrides?.executionOptions ?? undefined, + }); + +const defaultConfig: SlotManagerConfig = { + maxExecutionGlobal: 10, + maxExecutionPerUser: 5, + slotTimeoutMs: 5000, +}; + +describe("SlotManager", () => { + let slotManager: SlotManager; + beforeEach(() => { + slotManager = new SlotManager(defaultConfig); + }); + + afterEach(() => { + slotManager.clear(); + }); + + describe("acquire", () => { + it("should acquire an execution slot", async () => { + const task = createTask(); + await slotManager.acquire(task); + + expect(slotManager.getGlobalExecutionSize()).toBe(1); + expect(slotManager.getUserExecutionSize(task.userId!)).toBe(1); + expect(slotManager.getTemplateExecutionSize(task.name)).toBe(1); + }); + + it("should acquire multiple slots up to global limit", async () => { + const sm = new SlotManager({ ...defaultConfig, maxExecutionGlobal: 3 }); + await sm.acquire(createTask({ idempotencyKey: idempotencyKey("a") })); + await sm.acquire(createTask({ idempotencyKey: idempotencyKey("b") })); + await sm.acquire(createTask({ idempotencyKey: idempotencyKey("c") })); + + expect(sm.getGlobalExecutionSize()).toBe(3); + }); + + it("should wait in queue when global slots are full", async () => { + const sm = new SlotManager({ + ...defaultConfig, + maxExecutionGlobal: 1, + slotTimeoutMs: 1000, + }); + + const task1 = createTask(); + const task2 = createTask(); + + await sm.acquire(task1); + expect(sm.getGlobalExecutionSize()).toBe(1); + + // start waiting for slot + const slot2Promise = sm.acquire(task2); + expect(sm.getWaitingQueueSize()).toBe(1); + + // release slot 1 + sm.release(task1); + + // slot2 should be acquired + await slot2Promise; + expect(sm.getGlobalExecutionSize()).toBe(1); + expect(sm.getWaitingQueueSize()).toBe(0); + }); + + it("should throw SlotTimeoutError when timeout is reached", async () => { + vi.useFakeTimers(); + + const sm = new SlotManager({ + ...defaultConfig, + maxExecutionGlobal: 1, + slotTimeoutMs: 100, + }); + + const task1 = createTask(); + const task2 = createTask(); + + await sm.acquire(task1); + + const acquirePromise = sm.acquire(task2); + + // advance time past timeout + vi.advanceTimersByTime(150); + + await expect(acquirePromise).rejects.toThrow(SlotTimeoutError); + + try { + await acquirePromise; + } catch (error) { + expect(SlotTimeoutError.is(error)).toBe(true); + expect((error as SlotTimeoutError).timeoutMs).toBe(100); + } + + vi.useRealTimers(); + }); + + it("should respect per-user limits", async () => { + vi.useFakeTimers(); + + const sm = new SlotManager({ + ...defaultConfig, + maxExecutionPerUser: 1, + slotTimeoutMs: 100, + }); + + const task1 = createTask({ userId: userId("user-1") }); + const task2 = createTask({ userId: userId("user-1") }); + + await sm.acquire(task1); + + const acquirePromise = sm.acquire(task2); + vi.advanceTimersByTime(150); + + await expect(acquirePromise).rejects.toThrow(SlotTimeoutError); + vi.useRealTimers(); + }); + + it("should allow different users to acquire slots independently", async () => { + const sm = new SlotManager({ + ...defaultConfig, + maxExecutionPerUser: 1, + maxExecutionGlobal: 10, + }); + + const task1 = createTask({ userId: userId("user-1") }); + const task2 = createTask({ userId: userId("user-2") }); + + await sm.acquire(task1); + await sm.acquire(task2); + + expect(sm.getGlobalExecutionSize()).toBe(2); + expect(sm.getUserExecutionSize(userId("user-1")!)).toBe(1); + expect(sm.getUserExecutionSize(userId("user-2")!)).toBe(1); + }); + + it("should respect per-template limits", async () => { + vi.useFakeTimers(); + const sm = new SlotManager({ + ...defaultConfig, + slotTimeoutMs: 100, + }); + + const task1 = createTask({ + name: taskName("task-1"), + executionOptions: { maxConcurrentExecutions: 1 }, + }); + const task2 = createTask({ + name: taskName("task-1"), + executionOptions: { maxConcurrentExecutions: 1 }, + }); + + await sm.acquire(task1); + + const acquirePromise = sm.acquire(task2); + vi.advanceTimersByTime(150); + + await expect(acquirePromise).rejects.toThrow(SlotTimeoutError); + vi.useRealTimers(); + }); + + it("should rollback user count when template limit fails", async () => { + vi.useFakeTimers(); + const sm = new SlotManager({ + ...defaultConfig, + slotTimeoutMs: 100, + }); + + const task1 = createTask({ + name: taskName("task-1"), + executionOptions: { maxConcurrentExecutions: 1 }, + }); + + await sm.acquire(task1); + expect(sm.getTemplateExecutionSize(task1.name)).toBe(1); + + const task2 = createTask({ + name: taskName("task-1"), + executionOptions: { maxConcurrentExecutions: 1 }, + }); + + const acquirePromise = sm.acquire(task2); + vi.advanceTimersByTime(150); + + await expect(acquirePromise).rejects.toThrow(SlotTimeoutError); + + // user count should still be 1, not 2 + expect(sm.getUserExecutionSize(task1.userId!)).toBe(1); + vi.useRealTimers(); + }); + + it("should handle background tasks (null userId)", async () => { + const task = createTask({ userId: null }); + + await slotManager.acquire(task); + expect(slotManager.getGlobalExecutionSize()).toBe(1); + }); + }); + + describe("release", () => { + it("should release an execution slot", async () => { + const task = createTask(); + + await slotManager.acquire(task); + expect(slotManager.getGlobalExecutionSize()).toBe(1); + + slotManager.release(task); + + expect(slotManager.getGlobalExecutionSize()).toBe(0); + expect(slotManager.getUserExecutionSize(task.userId!)).toBe(0); + expect(slotManager.getTemplateExecutionSize(task.name)).toBe(0); + }); + + it("should not go below zero on release", async () => { + const task = createTask(); + + slotManager.release(task); + slotManager.release(task); + + expect(slotManager.getGlobalExecutionSize()).toBe(0); + expect(slotManager.getUserExecutionSize(task.userId!)).toBe(0); + }); + + it("should process waiting queue when slot is released", async () => { + const sm = new SlotManager({ + ...defaultConfig, + maxExecutionGlobal: 1, + slotTimeoutMs: 5000, + }); + + const task1 = createTask(); + const task2 = createTask(); + + await sm.acquire(task1); + const slot2Promise = sm.acquire(task2); + expect(sm.getWaitingQueueSize()).toBe(1); + + sm.release(task1); + + await slot2Promise; + expect(sm.getGlobalExecutionSize()).toBe(1); + expect(sm.getWaitingQueueSize()).toBe(0); + }); + }); + + describe("getStats", () => { + it("should return slot statistics", async () => { + await slotManager.acquire(createTask()); + + const stats = slotManager.getStats(); + + expect(stats.current.inUse).toBe(1); + expect(stats.current.waiting).toBe(0); + expect(stats.current.available).toBe(9); + expect(stats.limits.global).toBe(10); + expect(stats.limits.perUser).toBe(5); + expect(stats.events.acquired).toBe(1); + }); + it("should track timeout events", async () => { + vi.useFakeTimers(); + + const sm = new SlotManager({ + ...defaultConfig, + maxExecutionGlobal: 1, + slotTimeoutMs: 100, + }); + + await sm.acquire(createTask()); + const acquirePromise = sm.acquire(createTask()); + vi.advanceTimersByTime(150); + + try { + await acquirePromise; + } catch {} + + expect(sm.getStats().events.timeouts).toBe(1); + vi.useRealTimers(); + }); + }); + + describe("clear", () => { + it("should clear all state", async () => { + await slotManager.acquire(createTask()); + + slotManager.clear(); + + expect(slotManager.getGlobalExecutionSize()).toBe(0); + expect(slotManager.getWaitingQueueSize()).toBe(0); + expect(slotManager.getStats().events.acquired).toBe(0); + }); + }); + + describe("observability hooks", () => { + it("shoudl record gauge on slot acquisition", async () => { + const mockHooks: TaskSystemHooks = { + ...noopHooks, + recordGauge: vi.fn(), + }; + + const sm = new SlotManager(defaultConfig, mockHooks); + await sm.acquire(createTask()); + + expect(mockHooks.recordGauge).toHaveBeenCalled(); + }); + + it("should increment counter on timeout", async () => { + vi.useFakeTimers(); + + const mockHooks: TaskSystemHooks = { + ...noopHooks, + incrementCounter: vi.fn(), + }; + + const sm = new SlotManager( + { ...defaultConfig, maxExecutionGlobal: 1 }, + mockHooks, + ); + await sm.acquire(createTask()); + + const acquirePromise = sm.acquire( + createTask({ idempotencyKey: idempotencyKey("second-task") }), + ); + vi.advanceTimersByTime(defaultConfig.slotTimeoutMs); + try { + await acquirePromise; + } catch {} + + expect(mockHooks.incrementCounter).toHaveBeenCalled(); + vi.useRealTimers(); + }); + }); +}); From e3897bee5f94fd234868bb527a28a6a011eb19bd Mon Sep 17 00:00:00 2001 From: Ditadi Date: Mon, 26 Jan 2026 20:45:09 +0000 Subject: [PATCH 07/13] feat(taskflow): delivery layer with stream manager and ring-buffer --- packages/taskflow/src/delivery/ring-buffer.ts | 161 +++++ packages/taskflow/src/delivery/stream.ts | 351 +++++++++++ packages/taskflow/src/delivery/types.ts | 102 ++++ .../src/tests/delivery/ring-buffer.test.ts | 235 ++++++++ .../src/tests/delivery/stream.test.ts | 549 ++++++++++++++++++ 5 files changed, 1398 insertions(+) create mode 100644 packages/taskflow/src/delivery/ring-buffer.ts create mode 100644 packages/taskflow/src/delivery/stream.ts create mode 100644 packages/taskflow/src/delivery/types.ts create mode 100644 packages/taskflow/src/tests/delivery/ring-buffer.test.ts create mode 100644 packages/taskflow/src/tests/delivery/stream.test.ts diff --git a/packages/taskflow/src/delivery/ring-buffer.ts b/packages/taskflow/src/delivery/ring-buffer.ts new file mode 100644 index 00000000..d1a5be40 --- /dev/null +++ b/packages/taskflow/src/delivery/ring-buffer.ts @@ -0,0 +1,161 @@ +/** + * Ring buffer - Fixed size circular buffer with FIFO eviction + * + * Used for buffering stream events with automatic eviction of oldest + * items when capacity is reached. Supports key-based deduplication + * and O(1) lookup + */ + +import { ValidationError } from "@/core/errors"; + +/** + * Generic ring buffer with key-based lookup and FIFO eviction + */ +export class RingBuffer { + /** Internal buffer array */ + private buffer: (T | null)[]; + /** Maximum capacity */ + private readonly capacity: number; + /** Current write position */ + private writeIndex: number; + /** Number of items in buffer */ + private size: number; + /** Function to extract key from item */ + private readonly keyExtractor: (item: T) => string; + /** Map from key to buffer index for O(1) lookup */ + private keyIndex: Map; + /** Count of evicted items (overflow) */ + private overflowCount: number; + + constructor(capacity: number, keyExtractor: (item: T) => string) { + if (capacity <= 0) { + throw new ValidationError( + `Ring buffer capacity must be greater than 0, got ${capacity}`, + "capacity", + ); + } + + this.capacity = capacity; + this.buffer = new Array(capacity).fill(null); + this.writeIndex = 0; + this.size = 0; + this.keyExtractor = keyExtractor; + this.keyIndex = new Map(); + this.overflowCount = 0; + } + + /** + * Add an item to the buffer + * If an item with the same key exists, it will be updated + * If at capacity, the oldest item will be evicted + */ + add(item: T): void { + const key = this.keyExtractor(item); + + // check if item already exists, update in place + const existingIndex = this.keyIndex.get(key); + if (existingIndex !== undefined) { + this.buffer[existingIndex] = item; + return; + } + + // evict oldest item if at capacity + const evicted = this.buffer[this.writeIndex]; + if (evicted) { + const evictedKey = this.keyExtractor(evicted); + this.keyIndex.delete(evictedKey); + this.overflowCount++; + } + + // add new item + this.buffer[this.writeIndex] = item; + this.keyIndex.set(key, this.writeIndex); + + // update write index and size + this.writeIndex = (this.writeIndex + 1) % this.capacity; + this.size = Math.min(this.size + 1, this.capacity); + } + + /** + * Get an item by its key + */ + get(key: string): T | null { + const index = this.keyIndex.get(key); + if (index === undefined) return null; + + return this.buffer[index]; + } + + /** + * Check if an item exists in the buffer + */ + has(key: string): boolean { + return this.keyIndex.has(key); + } + + /** + * Remove an item from the buffer by key + */ + remove(key: string): void { + const index = this.keyIndex.get(key); + if (index === undefined) return; + + // remove item from buffer + this.buffer[index] = null; + this.keyIndex.delete(key); + + // update size + this.size = Math.max(this.size - 1, 0); + } + + /** + * Get all items in insertion order (oldest first) + */ + getAll(): T[] { + if (this.keyIndex.size === 0) return []; + + const result: T[] = []; + + // iterate over buffer in order of insertion + for (let i = 0; i < this.capacity; i++) { + // calculate index of item in buffer + const index = + (this.writeIndex - this.capacity + i + this.capacity) % this.capacity; + const item = this.buffer[index]; + if (item !== null) result.push(item); + } + return result; + } + + /** + * Get the current number of items in the buffer + */ + getSize(): number { + return this.size; + } + + /** + * Get the maximum capacity + */ + getCapacity(): number { + return this.capacity; + } + + /** + * Get the number of items that have been evicted (overflow) + */ + getOverflowCount(): number { + return this.overflowCount; + } + + /** + * Clear all items from the buffer + */ + clear(): void { + this.buffer = new Array(this.capacity).fill(null); + this.keyIndex.clear(); + this.writeIndex = 0; + this.size = 0; + this.overflowCount = 0; + } +} diff --git a/packages/taskflow/src/delivery/stream.ts b/packages/taskflow/src/delivery/stream.ts new file mode 100644 index 00000000..09213890 --- /dev/null +++ b/packages/taskflow/src/delivery/stream.ts @@ -0,0 +1,351 @@ +/** + * Stream Manager - Manages event streams for SSE delivery + * + * Provides event streaming with: + * - Ring buffer for event replay on reconnection + * - Sequence numbers for ordering + * - AbortSignal support for cancellation + * - Automatic cleanup of closed streams + */ + +import type { IdempotencyKey } from "@/core/branded"; +import { StreamOverflowError, ValidationError } from "@/core/errors"; +import type { TaskEvent } from "@/domain"; +import { noopHooks, TaskMetrics, type TaskSystemHooks } from "@/observability"; +import { RingBuffer } from "./ring-buffer"; +import { + DEFAULT_STREAM_CONFIG, + type StreamConfig, + type StreamStats, + type StreamTaskEvent, + type TaskStream, + type TaskStreamOptions, +} from "./types"; + +/** + * Manages event streams for SSE delivery with reconnection support + */ +export class StreamManager { + private readonly config: StreamConfig; + private readonly hooks: TaskSystemHooks; + + private readonly streams: Map; + private overflowCount: number; + private eventsPushed: number; + private eventsConsumed: number; + + constructor( + config?: Partial, + hooks: TaskSystemHooks = noopHooks, + ) { + this.config = { + ...DEFAULT_STREAM_CONFIG, + ...config, + }; + this.hooks = hooks; + + this.streams = new Map(); + this.overflowCount = 0; + this.eventsPushed = 0; + this.eventsConsumed = 0; + } + + /** + * Get or create a stream for the given idempotency key + */ + getOrCreate(idempotencyKey: string): TaskStream { + this.validateIdempotencyKey(idempotencyKey); + + const existing = this.streams.get(idempotencyKey); + if (existing) return existing; + + const stream: TaskStream = { + buffer: new RingBuffer( + this.config.streamBufferSize, + (event) => String(event.seq), + ), + listeners: new Set(), + closed: false, + cleanupTimer: null, + nextSeq: 1, + }; + + // add stream to map + this.streams.set(idempotencyKey, stream); + + // record gauge for active streams + this.hooks.recordGauge(TaskMetrics.STREAMS_ACTIVE, this.streams.size); + + return stream; + } + + /** + * Get an existing stream (does not create) + */ + get(idempotencyKey: string): TaskStream | undefined { + this.validateIdempotencyKey(idempotencyKey); + return this.streams.get(idempotencyKey); + } + + /** + * Push an event to a stream + */ + push(idempotencyKey: string, event: TaskEvent): void { + this.validateIdempotencyKey(idempotencyKey); + const stream = this.streams.get(idempotencyKey); + if (!stream) return; + + const sequencedEvent: StreamTaskEvent = { + ...event, + seq: stream.nextSeq++, + }; + + stream.buffer.add(sequencedEvent); + this.eventsPushed++; + + // notify all listeners + for (const listener of stream.listeners) { + listener(); + } + + // record metrics + this.hooks.incrementCounter(TaskMetrics.FLUSH_ENTRIES, 1, { + stream: idempotencyKey, + }); + } + + /** + * Close a stream + */ + + close(idempotencyKey: string): void { + this.validateIdempotencyKey(idempotencyKey); + + const stream = this.streams.get(idempotencyKey); + if (!stream || stream.closed) return; + stream.closed = true; + + // notify all listeners stream is closing + for (const listener of stream.listeners) { + listener(); + } + stream.listeners.clear(); + + // clear any existing cleanup timer + if (stream.cleanupTimer) { + clearTimeout(stream.cleanupTimer); + } + + // schedule cleanup after retention period + stream.cleanupTimer = setTimeout(() => { + const current = this.streams.get(idempotencyKey); + if (current === stream) { + this.streams.delete(idempotencyKey); + this.hooks.recordGauge( + TaskMetrics.STREAMS_ACTIVE, + this.getActiveCount(), + ); + } + }, this.config.streamRetentionMs); + + // use unref() to not block process exit + if (stream.cleanupTimer.unref) { + stream.cleanupTimer.unref(); + } + } + + /** + * Create an async generator for streaming events + */ + async *createGenerator( + idempotencyKey: IdempotencyKey, + options?: TaskStreamOptions, + ): AsyncGenerator { + this.validateIdempotencyKey(idempotencyKey); + + const stream = this.streams.get(idempotencyKey); + if (!stream) return; + + let lastSeq: number = options?.lastSeq ?? 0; + const signal = options?.signal; + + while (true) { + // check for abort + if (signal?.aborted) return; + + const allEvents = stream.buffer.getAll(); + + // check for overflow (reconnection with evicted events) + if (allEvents.length > 0) { + const minSeq = allEvents[0].seq; + + if (lastSeq !== 0 && lastSeq < minSeq) { + this.overflowCount++; + throw new StreamOverflowError( + `Stream overflow: requested seq ${lastSeq} has been evicted (min: ${minSeq})`, + { + idempotencyKey, + lastSeq: String(lastSeq), + minSeq: String(minSeq), + }, + ); + } + } + + // yield events newer than lastSeq + for (const event of allEvents) { + if (event.seq > lastSeq) { + yield event; + lastSeq = event.seq; + this.eventsConsumed++; + } + } + + // if stream is closed, finish + if (stream.closed) return; + + // wait for new events or close + await new Promise((resolve, reject) => { + const listener = () => { + stream.listeners.delete(listener); + signal?.removeEventListener("abort", onAbort); + resolve(); + }; + + const onAbort = () => { + stream.listeners.delete(listener); + signal?.removeEventListener("abort", onAbort); + reject(signal?.reason ?? new Error("Stream aborted")); + }; + + stream.listeners.add(listener); + + if (signal) { + if (signal.aborted) { + onAbort(); + return; + } + + signal.addEventListener("abort", onAbort, { once: true }); + } + }); + } + } + + /** + * Clear all streams and cancel cleanup timers + */ + clearAll(): void { + for (const stream of this.streams.values()) { + if (stream.cleanupTimer) { + clearTimeout(stream.cleanupTimer); + } + } + + this.streams.clear(); + this.overflowCount = 0; + this.eventsPushed = 0; + this.eventsConsumed = 0; + this.hooks.recordGauge(TaskMetrics.STREAMS_ACTIVE, 0); + } + + /** + * Get the listener count for a stream + */ + getListenerCount(idempotencyKey: IdempotencyKey): number { + this.validateIdempotencyKey(idempotencyKey); + return this.streams.get(idempotencyKey)?.listeners.size ?? 0; + } + + /** + * Get comprehensive statistics + */ + getStats(): StreamStats { + let activeStream = 0; + let closedStream = 0; + let totalListeners = 0; + let totalBufferedEvents = 0; + let streamsWithListeners = 0; + let maxListenersOnSingleStream = 0; + + const streamData: Array<{ + idempotencyKey: string; + bufferedEvents: number; + listeners: number; + closed: boolean; + }> = []; + + for (const [key, stream] of this.streams) { + stream.closed ? closedStream++ : activeStream++; + + const listenerCount = stream.listeners.size; + totalListeners += listenerCount; + + if (listenerCount > 0) streamsWithListeners++; + + if (listenerCount > maxListenersOnSingleStream) + maxListenersOnSingleStream = listenerCount; + + const bufferedEvents = stream.buffer.getAll().length; + totalBufferedEvents += bufferedEvents; + + streamData.push({ + idempotencyKey: key, + bufferedEvents, + listeners: listenerCount, + closed: stream.closed, + }); + } + + // sort by buffered events descending + streamData.sort((a, b) => b.bufferedEvents - a.bufferedEvents); + + return { + streams: { + active: activeStream, + closed: closedStream, + total: this.streams.size, + }, + config: { + retentionMs: this.config.streamRetentionMs, + bufferSize: this.config.streamBufferSize, + }, + listeners: { + total: totalListeners, + streamsWithListeners, + maxOnSingleStream: maxListenersOnSingleStream, + }, + buffer: { + totalEvents: totalBufferedEvents, + overflows: this.overflowCount, + }, + events: { + pushed: this.eventsPushed, + consumed: this.eventsConsumed, + }, + debug: { + topStreamsByBufferedEvents: streamData.slice(0, 10), + }, + }; + } + + /** + * Get count of active (non-closed) streams + */ + private getActiveCount(): number { + let count = 0; + for (const stream of this.streams.values()) { + if (!stream.closed) count++; + } + return count; + } + + /** + * Validate idempotency key + */ + private validateIdempotencyKey(idempotencyKey: string): void { + if (!idempotencyKey || typeof idempotencyKey !== "string") { + throw new ValidationError("Invalid idempotency key", "idempotencyKey"); + } + } +} diff --git a/packages/taskflow/src/delivery/types.ts b/packages/taskflow/src/delivery/types.ts new file mode 100644 index 00000000..bfe276b2 --- /dev/null +++ b/packages/taskflow/src/delivery/types.ts @@ -0,0 +1,102 @@ +/** + * Delivery Layer Types - Stream Types for event delivery + */ + +import type { TaskEvent } from "@/domain"; +import type { RingBuffer } from "./ring-buffer"; + +/** + * Task event with sequence number for ordering and reconnection + */ +export type StreamTaskEvent = TaskEvent & { seq: number }; + +/** + * Options for creating a stream generator + */ +export interface TaskStreamOptions { + /** Last sequence number received (for reconnection) */ + lastSeq?: number; + /** AbortSignal for cancellation */ + signal?: AbortSignal; +} + +/** + * Internal stream state + */ +export interface TaskStream { + /** Ring buffer holding sequenced events */ + buffer: RingBuffer; + /** Set of listener callbacks to notify on new events */ + listeners: Set<() => void>; + /** Whether the stream has been closed */ + closed: boolean; + /** Cleanup timer handle (for delayed deletion after close) */ + cleanupTimer: ReturnType | null; + /** Next sequence number to assign */ + nextSeq: number; +} + +/** + * Stream configuration + */ +export interface StreamConfig { + /** How long to retain closed streams before cleanup (ms) */ + streamRetentionMs: number; + /** Maximum events to buffer per stream */ + streamBufferSize: number; +} + +/** + * Default stream configuration + */ +export const DEFAULT_STREAM_CONFIG: StreamConfig = { + streamRetentionMs: 60_000, // 1 minute + streamBufferSize: 100, +}; + +/** + * Comprehensive stream statistics for monitoring + */ +export interface StreamStats { + /** Stream counts */ + streams: { + active: number; + closed: number; + total: number; + }; + + /** Configuration */ + config: { + retentionMs: number; + bufferSize: number; + }; + + /** Listener statistics */ + listeners: { + total: number; + streamsWithListeners: number; + maxOnSingleStream: number; + }; + + /** Buffer statistics */ + buffer: { + totalEvents: number; + overflows: number; + }; + + /** Event flow statistics */ + events: { + pushed: number; + consumed: number; + }; + + /** Debug information */ + debug: { + topStreamsByBufferedEvents: Array<{ + idempotencyKey: string; + bufferedEvents: number; + listeners: number; + closed: boolean; + }>; + }; +} diff --git a/packages/taskflow/src/tests/delivery/ring-buffer.test.ts b/packages/taskflow/src/tests/delivery/ring-buffer.test.ts new file mode 100644 index 00000000..57a00329 --- /dev/null +++ b/packages/taskflow/src/tests/delivery/ring-buffer.test.ts @@ -0,0 +1,235 @@ +import { beforeEach, describe, expect, it } from "vitest"; +import { ValidationError } from "@/core/errors"; +import { RingBuffer } from "@/delivery/ring-buffer"; + +interface TestItem { + id: string; + value: number; +} + +const keyExtractor = (item: TestItem) => item.id; + +describe("RingBuffer", () => { + let buffer: RingBuffer; + + beforeEach(() => { + buffer = new RingBuffer(5, keyExtractor); + }); + + describe("constructor", () => { + it("should create a buffer with the specified capacity", () => { + const buf = new RingBuffer(10, keyExtractor); + expect(buf.getCapacity()).toBe(10); + expect(buf.getSize()).toBe(0); + }); + + it("should throw ValidationError for zero capacity", () => { + expect(() => new RingBuffer(0, keyExtractor)).toThrow( + ValidationError, + ); + }); + + it("should throw ValidationError for negative capacity", () => { + expect(() => new RingBuffer(-1, keyExtractor)).toThrow( + ValidationError, + ); + }); + }); + + describe("add", () => { + it("should add items to buffer", () => { + buffer.add({ id: "a", value: 1 }); + buffer.add({ id: "b", value: 2 }); + + expect(buffer.getSize()).toBe(2); + expect(buffer.has("a")).toBe(true); + expect(buffer.has("b")).toBe(true); + }); + + it("should update existing item with same key", () => { + buffer.add({ id: "a", value: 1 }); + buffer.add({ id: "a", value: 100 }); + + expect(buffer.getSize()).toBe(1); + expect(buffer.get("a")?.value).toBe(100); + }); + + it("should evict oldest when capacity is reached", () => { + for (let i = 1; i <= 6; i++) { + buffer.add({ id: `item-${i}`, value: i }); + } + + expect(buffer.getSize()).toBe(5); + expect(buffer.has("item-1")).toBe(false); // evicted + expect(buffer.has("item-2")).toBe(true); + expect(buffer.has("item-6")).toBe(true); + }); + + it("should track overflow count when items are evicted", () => { + for (let i = 1; i <= 7; i++) { + buffer.add({ id: `item-${i}`, value: i }); + } + + expect(buffer.getOverflowCount()).toBe(2); + }); + }); + + describe("get", () => { + it("should return item if exists", () => { + buffer.add({ id: "a", value: 42 }); + + const item = buffer.get("a"); + expect(item).toEqual({ id: "a", value: 42 }); + }); + + it("should return null if item does not exist", () => { + expect(buffer.get("nonexistent")).toBeNull(); + }); + }); + + describe("has", () => { + it("should return true if item exists", () => { + buffer.add({ id: "a", value: 1 }); + expect(buffer.has("a")).toBe(true); + }); + + it("should return false if item does not exist", () => { + expect(buffer.has("nonexistent")).toBe(false); + }); + }); + + describe("remove", () => { + it("should remove item from buffer", () => { + buffer.add({ id: "a", value: 1 }); + buffer.add({ id: "b", value: 2 }); + + buffer.remove("a"); + + expect(buffer.has("a")).toBe(false); + expect(buffer.has("b")).toBe(true); + expect(buffer.getSize()).toBe(1); + }); + + it("should do nothing if item does not exist", () => { + buffer.add({ id: "a", value: 1 }); + + buffer.remove("nonexistent"); + + expect(buffer.getSize()).toBe(1); + }); + }); + + describe("getAll", () => { + it("should return all items in insertion order", () => { + buffer.add({ id: "a", value: 1 }); + buffer.add({ id: "b", value: 2 }); + buffer.add({ id: "c", value: 3 }); + + const items = buffer.getAll(); + + expect(items).toHaveLength(3); + expect(items[0].id).toBe("a"); + expect(items[1].id).toBe("b"); + expect(items[2].id).toBe("c"); + }); + + it("should return items in correct order after evictions", () => { + for (let i = 1; i <= 7; i++) { + buffer.add({ id: `item-${i}`, value: i }); + } + + const items = buffer.getAll(); + + expect(items).toHaveLength(5); + expect(items[0].id).toBe("item-3"); + expect(items[4].id).toBe("item-7"); + }); + + it("should return empty array for empty buffer", () => { + expect(buffer.getAll()).toEqual([]); + }); + + it("should skip null slots from removed items", () => { + buffer.add({ id: "a", value: 1 }); + buffer.add({ id: "b", value: 2 }); + buffer.add({ id: "c", value: 3 }); + + buffer.remove("b"); + + const items = buffer.getAll(); + expect(items).toHaveLength(2); + expect(items.map((i) => i.id)).toEqual(["a", "c"]); + }); + }); + + describe("getSize", () => { + it("should return current number of items in buffer", () => { + expect(buffer.getSize()).toBe(0); + + buffer.add({ id: "a", value: 1 }); + expect(buffer.getSize()).toBe(1); + + buffer.add({ id: "b", value: 2 }); + expect(buffer.getSize()).toBe(2); + }); + + it("should not exceed capacity", () => { + for (let i = 0; i < 10; i++) { + buffer.add({ id: `item-${i}`, value: i }); + } + + expect(buffer.getSize()).toBe(5); + }); + }); + + describe("clear", () => { + it("should remove all items", () => { + buffer.add({ id: "a", value: 1 }); + buffer.add({ id: "b", value: 2 }); + + buffer.clear(); + + expect(buffer.getSize()).toBe(0); + expect(buffer.getAll()).toEqual([]); + expect(buffer.has("a")).toBe(false); + }); + + it("should reset overflow count", () => { + for (let i = 0; i < 10; i++) { + buffer.add({ id: `item-${i}`, value: i }); + } + + buffer.clear(); + + expect(buffer.getOverflowCount()).toBe(0); + }); + }); + + describe("getCapacity", () => { + it("should return the capacity of the buffer", () => { + const buf = new RingBuffer(42, keyExtractor); + expect(buf.getCapacity()).toBe(42); + }); + }); + + describe("getOverflowCount", () => { + it("should return 0 when no evictions have occurred", () => { + buffer.add({ id: "a", value: 1 }); + expect(buffer.getOverflowCount()).toBe(0); + }); + + it("should count each eviction", () => { + const smallBuffer = new RingBuffer(2, keyExtractor); + + smallBuffer.add({ id: "a", value: 1 }); + smallBuffer.add({ id: "b", value: 2 }); + expect(smallBuffer.getOverflowCount()).toBe(0); + + smallBuffer.add({ id: "c", value: 3 }); // evicts 'a' + expect(smallBuffer.getOverflowCount()).toBe(1); + + smallBuffer.add({ id: "d", value: 4 }); // evicts 'b' + expect(smallBuffer.getOverflowCount()).toBe(2); + }); + }); +}); diff --git a/packages/taskflow/src/tests/delivery/stream.test.ts b/packages/taskflow/src/tests/delivery/stream.test.ts new file mode 100644 index 00000000..ffe9285c --- /dev/null +++ b/packages/taskflow/src/tests/delivery/stream.test.ts @@ -0,0 +1,549 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + eventId, + idempotencyKey, + taskId, + taskName, + userId, +} from "@/core/branded"; +import { StreamOverflowError, ValidationError } from "@/core/errors"; +import { StreamManager } from "@/delivery/stream"; +import type { TaskEvent } from "@/domain"; +import { noopHooks, type TaskSystemHooks } from "@/observability"; + +function createMockEvent(overrides?: Partial): TaskEvent { + return { + id: eventId(overrides?.id ?? crypto.randomUUID()), + type: "progress", + taskId: taskId("task-123"), + name: taskName("test-task"), + idempotencyKey: idempotencyKey("a".repeat(64)), + userId: userId("user-123"), + taskType: "user", + message: "Test event", + ...overrides, + } as TaskEvent; +} + +describe("StreamManager", () => { + let manager: StreamManager; + + beforeEach(() => { + manager = new StreamManager({ + streamRetentionMs: 1000, + streamBufferSize: 10, + }); + }); + + afterEach(() => { + manager.clearAll(); + vi.clearAllMocks(); + vi.useRealTimers(); + }); + + describe("getOrCreate", () => { + it("should create a new stream when it does not exist", () => { + const key = idempotencyKey("a".repeat(64)); + const stream = manager.getOrCreate(key); + + expect(stream).toBeDefined(); + expect(stream.closed).toBe(false); + expect(stream.nextSeq).toBe(1); + expect(stream.listeners.size).toBe(0); + }); + + it("should return existing stream when already created", () => { + const key = idempotencyKey("a".repeat(64)); + const stream1 = manager.getOrCreate(key); + const stream2 = manager.getOrCreate(key); + + expect(stream1).toBe(stream2); + }); + + it("should create separate streams for different keys", () => { + const key1 = idempotencyKey("a".repeat(64)); + const key2 = idempotencyKey("b".repeat(64)); + const stream1 = manager.getOrCreate(key1); + const stream2 = manager.getOrCreate(key2); + + expect(stream1).not.toBe(stream2); + }); + }); + + describe("get", () => { + it("should return stream if exists", () => { + const key = idempotencyKey("a".repeat(64)); + manager.getOrCreate(key); + const stream = manager.get(key); + + expect(stream).toBeDefined(); + }); + + it("should return undefined if stream does not exist", () => { + const key = idempotencyKey("z".repeat(64)); + const stream = manager.get(key); + + expect(stream).toBeUndefined(); + }); + }); + + describe("push", () => { + it("should add event to buffer with seq number", () => { + const key = idempotencyKey("a".repeat(64)); + manager.getOrCreate(key); + const event = createMockEvent(); + + manager.push(key, event); + + const stream = manager.get(key); + const events = stream?.buffer.getAll(); + expect(events).toHaveLength(1); + expect(events?.[0].seq).toBe(1); + }); + + it("should increment seq on each push", () => { + const key = idempotencyKey("a".repeat(64)); + manager.getOrCreate(key); + + manager.push(key, createMockEvent()); + manager.push(key, createMockEvent()); + manager.push(key, createMockEvent()); + + const stream = manager.get(key); + const events = stream?.buffer.getAll(); + expect(events?.map((e) => e.seq)).toEqual([1, 2, 3]); + }); + + it("should notify listeners", () => { + const key = idempotencyKey("a".repeat(64)); + const stream = manager.getOrCreate(key); + const listener = vi.fn(); + stream.listeners.add(listener); + + manager.push(key, createMockEvent()); + + expect(listener).toHaveBeenCalledTimes(1); + }); + + it("should do nothing if stream does not exist", () => { + const key = idempotencyKey("z".repeat(64)); + expect(() => manager.push(key, createMockEvent())).not.toThrow(); + }); + }); + + describe("close", () => { + it("should mark stream as closed", () => { + const key = idempotencyKey("a".repeat(64)); + manager.getOrCreate(key); + + manager.close(key); + + const stream = manager.get(key); + expect(stream?.closed).toBe(true); + }); + + it("should notify listeners when closing", () => { + const key = idempotencyKey("a".repeat(64)); + const stream = manager.getOrCreate(key); + const listener = vi.fn(); + stream.listeners.add(listener); + + manager.close(key); + + expect(listener).toHaveBeenCalledTimes(1); + }); + + it("should clear listeners after closing", () => { + const key = idempotencyKey("a".repeat(64)); + const stream = manager.getOrCreate(key); + stream.listeners.add(vi.fn()); + stream.listeners.add(vi.fn()); + + manager.close(key); + + expect(stream.listeners.size).toBe(0); + }); + + it("should not close if already closed", () => { + const key = idempotencyKey("a".repeat(64)); + const stream = manager.getOrCreate(key); + const listener = vi.fn(); + + manager.close(key); + stream.listeners.add(listener); + manager.close(key); + + expect(listener).not.toHaveBeenCalled(); + }); + + it("should do nothing if stream does not exist", () => { + const key = idempotencyKey("z".repeat(64)); + expect(() => manager.close(key)).not.toThrow(); + }); + + it("should schedule cleanup after retention period", () => { + vi.useFakeTimers(); + const key = idempotencyKey("a".repeat(64)); + manager.getOrCreate(key); + manager.close(key); + + expect(manager.get(key)).toBeDefined(); + + vi.advanceTimersByTime(1100); + + expect(manager.get(key)).toBeUndefined(); + }); + }); + + describe("createGenerator", () => { + it("should yield events in order", async () => { + const key = idempotencyKey("a".repeat(64)); + manager.getOrCreate(key); + manager.push(key, createMockEvent({ message: "Event 1" })); + manager.push(key, createMockEvent({ message: "Event 2" })); + manager.close(key); + + const generator = manager.createGenerator(key); + const events: TaskEvent[] = []; + + for await (const event of generator) { + events.push(event); + } + + expect(events).toHaveLength(2); + expect(events[0].message).toBe("Event 1"); + expect(events[1].message).toBe("Event 2"); + }); + + it("should wait for new events", async () => { + const key = idempotencyKey("a".repeat(64)); + manager.getOrCreate(key); + + const generator = manager.createGenerator(key); + const events: TaskEvent[] = []; + + const collectPromise = (async () => { + for await (const event of generator) { + events.push(event); + if (events.length === 2) break; + } + })(); + + await new Promise((r) => setTimeout(r, 10)); + manager.push(key, createMockEvent({ message: "Event 1" })); + + await new Promise((r) => setTimeout(r, 10)); + manager.push(key, createMockEvent({ message: "Event 2" })); + + await collectPromise; + + expect(events).toHaveLength(2); + }); + + it("should return when stream is closed", async () => { + const key = idempotencyKey("a".repeat(64)); + manager.getOrCreate(key); + manager.push(key, createMockEvent()); + + const generator = manager.createGenerator(key); + const events: TaskEvent[] = []; + + const collectPromise = (async () => { + for await (const event of generator) { + events.push(event); + } + })(); + + await new Promise((r) => setTimeout(r, 10)); + manager.close(key); + + await collectPromise; + + expect(events).toHaveLength(1); + }); + + it("should return immediately if stream does not exist", async () => { + const key = idempotencyKey("z".repeat(64)); + const generator = manager.createGenerator(key); + const events: TaskEvent[] = []; + + for await (const event of generator) { + events.push(event); + } + + expect(events).toHaveLength(0); + }); + + it("should support lastSeq for reconnection", async () => { + const key = idempotencyKey("a".repeat(64)); + manager.getOrCreate(key); + manager.push(key, createMockEvent({ message: "Event 1" })); + manager.push(key, createMockEvent({ message: "Event 2" })); + manager.push(key, createMockEvent({ message: "Event 3" })); + manager.close(key); + + const generator = manager.createGenerator(key, { lastSeq: 1 }); + const events: TaskEvent[] = []; + + for await (const event of generator) { + events.push(event); + } + + expect(events).toHaveLength(2); + expect(events[0].message).toBe("Event 2"); + expect(events[1].message).toBe("Event 3"); + }); + + it("should throw StreamOverflowError when lastSeq is evicted from buffer", async () => { + const smallBufferManager = new StreamManager({ + streamBufferSize: 10, + streamRetentionMs: 1000, + }); + + const key = idempotencyKey("a".repeat(64)); + smallBufferManager.getOrCreate(key); + + for (let i = 1; i <= 15; i++) { + smallBufferManager.push( + key, + createMockEvent({ message: `Event ${i}` }), + ); + } + smallBufferManager.close(key); + + const generator = smallBufferManager.createGenerator(key, { + lastSeq: 1, + }); + + await expect(async () => { + for await (const _ of generator) { + // consume, do nothing + } + }).rejects.toThrow(StreamOverflowError); + + smallBufferManager.clearAll(); + }); + + it("should support AbortSignal", async () => { + const key = idempotencyKey("a".repeat(64)); + manager.getOrCreate(key); + + const controller = new AbortController(); + const generator = manager.createGenerator(key, { + signal: controller.signal, + }); + + const collectPromise = (async () => { + const events: TaskEvent[] = []; + try { + for await (const event of generator) { + events.push(event); + } + } catch { + // expected abort + } + return events; + })(); + + await new Promise((r) => setTimeout(r, 10)); + controller.abort(); + + const events = await collectPromise; + expect(events).toHaveLength(0); + }); + + it("should reject with abort reason when aborted while waiting", async () => { + const key = idempotencyKey("a".repeat(64)); + manager.getOrCreate(key); + + const controller = new AbortController(); + const generator = manager.createGenerator(key, { + signal: controller.signal, + }); + + const collectPromise = (async () => { + for await (const _ of generator) { + // waiting for events + } + })(); + + await new Promise((r) => setTimeout(r, 10)); + controller.abort(new Error("User cancelled")); + + await expect(collectPromise).rejects.toThrow("User cancelled"); + }); + }); + + describe("clearAll", () => { + it("should clear all streams", () => { + const key1 = idempotencyKey("a".repeat(64)); + const key2 = idempotencyKey("b".repeat(64)); + const key3 = idempotencyKey("c".repeat(64)); + manager.getOrCreate(key1); + manager.getOrCreate(key2); + manager.getOrCreate(key3); + + manager.clearAll(); + + expect(manager.get(key1)).toBeUndefined(); + expect(manager.get(key2)).toBeUndefined(); + expect(manager.get(key3)).toBeUndefined(); + }); + + it("should clear cleanup timers", () => { + vi.useFakeTimers(); + const key = idempotencyKey("a".repeat(64)); + manager.getOrCreate(key); + manager.close(key); + + manager.clearAll(); + + vi.advanceTimersByTime(200); + }); + }); + + describe("getListenerCount", () => { + it("should return listener count", () => { + const key = idempotencyKey("a".repeat(64)); + const stream = manager.getOrCreate(key); + stream.listeners.add(vi.fn()); + stream.listeners.add(vi.fn()); + + expect(manager.getListenerCount(key)).toBe(2); + }); + + it("should return 0 if stream does not exist", () => { + const key = idempotencyKey("z".repeat(64)); + expect(manager.getListenerCount(key)).toBe(0); + }); + + it("should return 0 after listeners are cleared", () => { + const key = idempotencyKey("a".repeat(64)); + const stream = manager.getOrCreate(key); + stream.listeners.add(vi.fn()); + + manager.close(key); + + expect(manager.getListenerCount(key)).toBe(0); + }); + }); + + describe("getStats", () => { + it("should return comprehensive statistics", () => { + const key1 = idempotencyKey("a".repeat(64)); + const key2 = idempotencyKey("b".repeat(64)); + manager.getOrCreate(key1); + manager.getOrCreate(key2); + manager.push(key1, createMockEvent()); + manager.push(key1, createMockEvent()); + manager.close(key2); + + const stats = manager.getStats(); + + expect(stats.streams.active).toBe(1); + expect(stats.streams.closed).toBe(1); + expect(stats.streams.total).toBe(2); + expect(stats.buffer.totalEvents).toBe(2); + expect(stats.events.pushed).toBe(2); + }); + }); + + describe("configuration", () => { + it("should use default config when not provided", () => { + const defaultManager = new StreamManager(); + const key = idempotencyKey("a".repeat(64)); + const stream = defaultManager.getOrCreate(key); + + expect(stream.buffer).toBeDefined(); + defaultManager.clearAll(); + }); + + it("should respect custom buffer size", () => { + const customManager = new StreamManager({ streamBufferSize: 10 }); + const key = idempotencyKey("a".repeat(64)); + customManager.getOrCreate(key); + + for (let i = 0; i < 20; i++) { + customManager.push(key, createMockEvent()); + } + + const stream = customManager.get(key); + expect(stream?.buffer.getAll()).toHaveLength(10); + + customManager.clearAll(); + }); + }); + + describe("validation", () => { + it("should throw ValidationError for empty idempotencyKey in getOrCreate", () => { + expect(() => manager.getOrCreate(idempotencyKey(""))).toThrow( + ValidationError, + ); + }); + + it("should throw ValidationError for empty idempotencyKey in get", () => { + expect(() => manager.get(idempotencyKey(""))).toThrow(ValidationError); + }); + + it("should throw ValidationError for empty idempotencyKey in push", () => { + expect(() => manager.push(idempotencyKey(""), createMockEvent())).toThrow( + ValidationError, + ); + }); + + it("should throw ValidationError for empty idempotencyKey in close", () => { + expect(() => manager.close(idempotencyKey(""))).toThrow(ValidationError); + }); + + it("should throw ValidationError for empty idempotencyKey in createGenerator", async () => { + const generator = manager.createGenerator(idempotencyKey("")); + await expect(generator.next()).rejects.toThrow(ValidationError); + }); + + it("should throw ValidationError for empty idempotencyKey in getListenerCount", () => { + expect(() => manager.getListenerCount(idempotencyKey(""))).toThrow( + ValidationError, + ); + }); + }); + + describe("observability hooks", () => { + it("should call incrementCounter on push", () => { + const mockHooks: TaskSystemHooks = { + ...noopHooks, + incrementCounter: vi.fn(), + }; + + const hookedManager = new StreamManager( + { streamBufferSize: 10, streamRetentionMs: 1000 }, + mockHooks, + ); + + const key = idempotencyKey("a".repeat(64)); + hookedManager.getOrCreate(key); + hookedManager.push(key, createMockEvent()); + + expect(mockHooks.incrementCounter).toHaveBeenCalled(); + + hookedManager.clearAll(); + }); + it("should record gauge for active streams", () => { + const mockHooks: TaskSystemHooks = { + ...noopHooks, + recordGauge: vi.fn(), + }; + + const hookedManager = new StreamManager( + { streamBufferSize: 10, streamRetentionMs: 1000 }, + mockHooks, + ); + + const key = idempotencyKey("a".repeat(64)); + hookedManager.getOrCreate(key); + + expect(mockHooks.recordGauge).toHaveBeenCalled(); + + hookedManager.clearAll(); + }); + }); +}); From f37f2a23f3f5c7a3d35aa95bc0d59d7a4db78962 Mon Sep 17 00:00:00 2001 From: Ditadi Date: Tue, 27 Jan 2026 14:08:58 +0000 Subject: [PATCH 08/13] feat(taskflow): persistence layer with sqlite and lakebase operations --- .npmrc | 1 + packages/taskflow/package.json | 6 +- packages/taskflow/src/core/errors.ts | 111 +++ packages/taskflow/src/observability/hooks.ts | 34 + .../taskflow/src/persistence/event-log.ts | 656 ++++++++++++++++++ packages/taskflow/src/persistence/index.ts | 23 + .../src/persistence/repository/index.ts | 54 ++ .../persistence/repository/lakebase/index.ts | 8 + .../migrations/001_create_tasks_table.sql | 37 + .../002_create_task_events_table.sql | 15 + .../repository/lakebase/repository.ts | 472 +++++++++++++ .../persistence/repository/lakebase/types.ts | 92 +++ .../repository/sqlite/connector.ts | 491 +++++++++++++ .../persistence/repository/sqlite/index.ts | 8 + .../migrations/001_create_tasks_table.sql | 42 ++ .../002_create_task_events_table.sql | 19 + .../repository/sqlite/repository.ts | 85 +++ .../persistence/repository/sqlite/types.ts | 84 +++ .../src/persistence/repository/types.ts | 102 +++ packages/taskflow/src/persistence/types.ts | 80 +++ .../src/tests/persistence/event-log.test.ts | 192 +++++ .../persistence/repository/lakebase.test.ts | 148 ++++ .../persistence/repository/sqlite.test.ts | 347 +++++++++ pnpm-lock.yaml | 157 +++++ pnpm-workspace.yaml | 2 + 25 files changed, 3265 insertions(+), 1 deletion(-) create mode 100644 packages/taskflow/src/persistence/event-log.ts create mode 100644 packages/taskflow/src/persistence/index.ts create mode 100644 packages/taskflow/src/persistence/repository/index.ts create mode 100644 packages/taskflow/src/persistence/repository/lakebase/index.ts create mode 100644 packages/taskflow/src/persistence/repository/lakebase/migrations/001_create_tasks_table.sql create mode 100644 packages/taskflow/src/persistence/repository/lakebase/migrations/002_create_task_events_table.sql create mode 100644 packages/taskflow/src/persistence/repository/lakebase/repository.ts create mode 100644 packages/taskflow/src/persistence/repository/lakebase/types.ts create mode 100644 packages/taskflow/src/persistence/repository/sqlite/connector.ts create mode 100644 packages/taskflow/src/persistence/repository/sqlite/index.ts create mode 100644 packages/taskflow/src/persistence/repository/sqlite/migrations/001_create_tasks_table.sql create mode 100644 packages/taskflow/src/persistence/repository/sqlite/migrations/002_create_task_events_table.sql create mode 100644 packages/taskflow/src/persistence/repository/sqlite/repository.ts create mode 100644 packages/taskflow/src/persistence/repository/sqlite/types.ts create mode 100644 packages/taskflow/src/persistence/repository/types.ts create mode 100644 packages/taskflow/src/persistence/types.ts create mode 100644 packages/taskflow/src/tests/persistence/event-log.test.ts create mode 100644 packages/taskflow/src/tests/persistence/repository/lakebase.test.ts create mode 100644 packages/taskflow/src/tests/persistence/repository/sqlite.test.ts diff --git a/.npmrc b/.npmrc index 24d9971c..6cbe4e37 100644 --- a/.npmrc +++ b/.npmrc @@ -1,3 +1,4 @@ shamefully-hoist=true auto-install-peers=true public-hoist-pattern[]=* +onlyBuiltDependencies[]=better-sqlite3 diff --git a/packages/taskflow/package.json b/packages/taskflow/package.json index 35f3a5c0..bb2c9a8e 100644 --- a/packages/taskflow/package.json +++ b/packages/taskflow/package.json @@ -25,9 +25,13 @@ "test": "cd ../.. && vitest run --project=taskflow" }, "devDependencies": { + "@types/better-sqlite3": "^7.6.13", + "@types/pg": "^8.15.6", "vitest": "^3.2.4" }, "dependencies": { - "json-canonicalize": "^2.0.0" + "better-sqlite3": "^12.6.2", + "json-canonicalize": "^2.0.0", + "pg": "^8.16.3" } } diff --git a/packages/taskflow/src/core/errors.ts b/packages/taskflow/src/core/errors.ts index 507fc107..43214fd4 100644 --- a/packages/taskflow/src/core/errors.ts +++ b/packages/taskflow/src/core/errors.ts @@ -39,6 +39,14 @@ export const ErrorCodes = { // system errors INITIALIZATION_FAILED: "INITIALIZATION_FAILED", + + // persistence errors + EVENTLOG_WRITE_FAILED: "EVENTLOG_WRITE_FAILED", + EVENTLOG_ROTATION_FAILED: "EVENTLOG_ROTATION_FAILED", + REPOSITORY_MIGRATION_FAILED: "REPOSITORY_MIGRATION_FAILED", + REPOSITORY_QUERY_FAILED: "REPOSITORY_QUERY_FAILED", + REPOSITORY_BATCH_FAILED: "REPOSITORY_BATCH_FAILED", + INVALID_PATH: "INVALID_PATH", } as const; export type ErrorCode = (typeof ErrorCodes)[keyof typeof ErrorCodes]; @@ -406,6 +414,109 @@ export class StreamOverflowError extends TaskSystemError { } } +/** + * EventLog write failure error + */ +export class EventLogError extends TaskSystemError { + readonly operation: "write" | "rotate" | "compact" | "read"; + readonly path?: string; + + constructor( + message: string, + operation: "write" | "rotate" | "compact" | "read", + path?: string, + cause?: Error, + ) { + super( + message, + operation === "write" + ? ErrorCodes.EVENTLOG_WRITE_FAILED + : ErrorCodes.EVENTLOG_ROTATION_FAILED, + { path, operation }, + cause, + ); + this.name = "EventLogError"; + this.operation = operation; + this.path = path; + } + + static is(value: unknown): value is EventLogError { + return ( + value !== null && + typeof value === "object" && + "name" in value && + (value as Error).name === "EventLogError" + ); + } +} + +/** + * Repository operation failure error + */ +export class RepositoryError extends TaskSystemError { + readonly repositoryType: "sqlite" | "lakebase"; + readonly operation: "query" | "batch" | "migration"; + readonly isRetryable: boolean; + + constructor( + message: string, + repositoryType: "sqlite" | "lakebase", + operation: "query" | "batch" | "migration", + isRetryable = false, + cause?: Error, + ) { + const code = + operation === "migration" + ? ErrorCodes.REPOSITORY_MIGRATION_FAILED + : operation === "query" + ? ErrorCodes.REPOSITORY_QUERY_FAILED + : ErrorCodes.REPOSITORY_BATCH_FAILED; + + super(message, code, { repositoryType, operation, isRetryable }, cause); + this.name = "RepositoryError"; + this.repositoryType = repositoryType; + this.operation = operation; + this.isRetryable = isRetryable; + } + + static is(value: unknown): value is RepositoryError { + return ( + value !== null && + typeof value === "object" && + "name" in value && + (value as Error).name === "RepositoryError" + ); + } +} + +/** + * Invalid path error for security violations + */ +export class InvalidPathError extends TaskSystemError { + readonly path: string; + readonly reason: "traversal" | "absolute" | "invalid"; + + constructor(path: string, reason: "traversal" | "absolute" | "invalid") { + super( + `Invalid path detected: ${reason} in "${path}"`, + ErrorCodes.INVALID_PATH, + { path, reason }, + ); + this.name = "InvalidPathError"; + this.path = path; + this.reason = reason; + } + + static is(value: unknown): value is InvalidPathError { + return ( + value !== null && + typeof value === "object" && + "name" in value && + (value as Error).name === "InvalidPathError" + ); + } +} + // Known retryable error patterns const RETRYABLE_ERROR_PATTERNS = [ "ECONNRESET", diff --git a/packages/taskflow/src/observability/hooks.ts b/packages/taskflow/src/observability/hooks.ts index 3e281df3..265d1faf 100644 --- a/packages/taskflow/src/observability/hooks.ts +++ b/packages/taskflow/src/observability/hooks.ts @@ -84,6 +84,17 @@ export const TaskMetrics = { DLQ_ADDED: "taskflow.dlq.added", DLQ_RETRIED: "taskflow.dlq.retried", + // persistence counters + EVENTLOG_ENTRIES_WRITTEN: "taskflow.eventlog.entries_written", + EVENTLOG_ROTATIONS: "taskflow.eventlog.rotations", + EVENTLOG_COMPACTIONS: "taskflow.eventlog.compactions", + EVENTLOG_MALFORMED_SKIPPED: "taskflow.eventlog.malformed_skipped", + + REPOSITORY_BATCH_EXECUTED: "taskflow.repository.batch_executed", + REPOSITORY_QUERIES: "taskflow.repository.queries", + REPOSITORY_ERRORS: "taskflow.repository.errors", + REPOSITORY_RETRIES: "taskflow.repository.retries", + // gauges TASKS_RUNNING: "taskflow.tasks.running", TASKS_QUEUED: "taskflow.tasks.queued", @@ -91,11 +102,19 @@ export const TaskMetrics = { DLQ_SIZE: "taskflow.dlq.size", STREAMS_ACTIVE: "taskflow.streams.active", + EVENTLOG_SEQUENCE: "taskflow.eventlog.sequence", + EVENTLOG_SIZE_BYTES: "taskflow.eventlog.size_bytes", + // histograms TASK_DURATION_MS: "taskflow.task.duration_ms", TASK_QUEUE_WAIT_MS: "taskflow.task.queue_wait_ms", FLUSH_DURATION_MS: "taskflow.flush.duration_ms", FLUSH_BATCH_SIZE: "taskflow.flush.batch_size", + + EVENTLOG_WRITE_LATENCY_MS: "taskflow.eventlog.write_latency_ms", + EVENTLOG_ROTATION_DURATION_MS: "taskflow.eventlog.rotation_duration_ms", + REPOSITORY_QUERY_LATENCY_MS: "taskflow.repository.query_latency_ms", + REPOSITORY_BATCH_LATENCY_MS: "taskflow.repository.batch_latency_ms", } as const; /** @@ -118,6 +137,12 @@ export const TaskSpans = { REPOSITORY_QUERY: "taskflow.repository.query", REPOSITORY_WRITE: "taskflow.repository.write", + + EVENTLOG_APPEND: "taskflow.eventlog.append", + EVENTLOG_ROTATE: "taskflow.eventlog.rotate", + EVENTLOG_COMPACT: "taskflow.eventlog.compact", + REPOSITORY_BATCH: "taskflow.repository.batch", + REPOSITORY_MIGRATION: "taskflow.repository.migration", } as const; /** @@ -139,4 +164,13 @@ export const TaskAttributes = { FLUSH_BATCH_SIZE: "taskflow.flush.batch_size", REPOSITORY_TYPE: "taskflow.repository.type", + + EVENTLOG_PATH: "taskflow.eventlog.path", + EVENTLOG_SEQUENCE: "taskflow.eventlog.sequence", + EVENTLOG_FSYNC: "taskflow.eventlog.fsync", + EVENTLOG_COMPACTIONS: "taskflow.eventlog.compactions", + EVENTLOG_COMPACTION_DURATION_MS: "taskflow.eventlog.compaction_duration_ms", + EVENT_TYPE: "taskflow.event.type", + BATCH_SIZE: "taskflow.batch.size", + MIGRATION_NAME: "taskflow.migration.name", } as const; diff --git a/packages/taskflow/src/persistence/event-log.ts b/packages/taskflow/src/persistence/event-log.ts new file mode 100644 index 00000000..cec86dec --- /dev/null +++ b/packages/taskflow/src/persistence/event-log.ts @@ -0,0 +1,656 @@ +import { createHash } from "node:crypto"; +import fs from "node:fs/promises"; +import path from "node:path"; +import { canonicalize } from "json-canonicalize"; +import { EventLogError } from "@/core/errors"; +import type { TaskStatus } from "@/core/types"; +import type { EventLogEntry, TaskEvent } from "@/domain"; +import { + noopHooks, + TaskAttributes, + TaskMetrics, + type TaskSystemHooks, +} from "@/observability"; +import { + DEFAULT_EVENT_LOG_CONFIG, + type EventLogConfig, + type EventLogEvent, + type EventLogStats, +} from "./types"; + +/** + * Event Log - File-based Write-Ahead log + * + * Provides durable event storage with: + * - Append-only file operations + * - Optional fsync for critical events + * - Rotation based on size/age + * - Compaction of rotated files + * - Checkpoint-based recovery + */ +export class EventLog { + private config: EventLogConfig; + private hooks: TaskSystemHooks; + + private fileHandle: fs.FileHandle | null = null; + + private rotationInterval?: ReturnType; + private rotationLock: Promise = Promise.resolve(); + private lastRotationAt: number | null = null; + private isRotating = false; + + /** current sequence number */ + currentSeq = 0; + /** number of rotations performed */ + rotationCount = 0; + /** total entries written */ + private entriesWritten = 0; + /** count of malformed entries skipped during reads */ + private malformedEntriesSkipped = 0; + + constructor( + config: Partial, + hooks: TaskSystemHooks = noopHooks, + ) { + this.config = { ...DEFAULT_EVENT_LOG_CONFIG, ...config }; + this.hooks = hooks; + + // validate event log path + this.validatePath(this.config.eventLogPath); + } + + /** + * Initialize the event log + * Creates the log file and checkpoint, schedule rotation + */ + async initialize(): Promise { + // create the directory if it doesn't exist + const dir = path.dirname(this.config.eventLogPath); + await fs.mkdir(dir, { recursive: true }); + this.fileHandle = await fs.open(this.config.eventLogPath, "a"); + + // load or create checkpoint + const previousSeq = await this.loadCheckpoint(); + this.currentSeq = previousSeq; + await fs.writeFile( + `${this.config.eventLogPath}.checkpoint`, + this.currentSeq.toString(), + "utf8", + ); + + this.scheduleRotation(); + + this.hooks?.log({ + severity: "info", + message: "Event log initialized", + attributes: { + [TaskAttributes.EVENTLOG_PATH]: this.config.eventLogPath, + [TaskAttributes.EVENTLOG_SEQUENCE]: this.currentSeq, + }, + }); + + this.hooks?.recordGauge(TaskMetrics.EVENTLOG_SEQUENCE, this.currentSeq, { + [TaskAttributes.EVENTLOG_PATH]: this.config.eventLogPath, + }); + } + + /** + * Append an entry to the log file + * @param entry - The entry to append + * @param fsync - Whether force sync to disk (critical events) + */ + async appendEntry(entry: EventLogEntry, fsync = false): Promise { + if (!this.fileHandle) return; + + const startTime = Date.now(); + + // wait for any ongoing rotation to complete + await this.rotationLock; + + try { + this.currentSeq++; + this.entriesWritten++; + + const eventPayload: EventLogEvent = { + seq: this.currentSeq, + ...entry, + }; + + // compute checksum + eventPayload.checksum = this.computeChecksum(eventPayload); + + // write to file + const line = `${JSON.stringify(eventPayload)}\n`; + await this.fileHandle.write(line); + + if (fsync) await this.fileHandle.sync(); + + // save checkpoint periodically or on fsync + if (this.currentSeq % 100 === 0 || fsync) { + await this.saveCheckpoint(); + } + + this.hooks?.incrementCounter(TaskMetrics.EVENTLOG_ENTRIES_WRITTEN, 1, { + [TaskAttributes.EVENT_TYPE]: entry.type, + [TaskAttributes.EVENTLOG_FSYNC]: fsync, + }); + + this.hooks?.recordHistogram( + TaskMetrics.EVENTLOG_WRITE_LATENCY_MS, + Date.now() - startTime, + { + [TaskAttributes.EVENT_TYPE]: entry.type, + }, + ); + + this.hooks?.recordGauge(TaskMetrics.EVENTLOG_SEQUENCE, this.currentSeq); + } catch (error) { + const eventLogError = new EventLogError( + "Failed to append entry to event log", + "write", + this.config.eventLogPath, + error instanceof Error ? error : new Error(String(error)), + ); + + this.hooks?.log({ + severity: "error", + message: eventLogError.message, + error: eventLogError, + attributes: { + taskId: entry.taskId, + [TaskAttributes.EVENT_TYPE]: entry.type, + }, + }); + + throw eventLogError; + } + } + + /** + * Append a TaskEvent to the log + * Converts TaskEvent to EventLogEntry format + */ + async appendEvent(event: TaskEvent): Promise { + const base = { + taskId: event.taskId, + idempotencyKey: event.idempotencyKey, + name: event.name, + userId: event.userId ?? null, + taskType: event.taskType, + timestamp: event.timestamp ?? Date.now(), + }; + + switch (event.type) { + case "created": + await this.appendEntry( + { + ...base, + type: "TASK_CREATED", + input: event.input, + executionOptions: event.executionOptions, + }, + true, + ); + break; + case "start": + await this.appendEntry( + { + ...base, + type: "TASK_START", + }, + true, + ); + break; + case "progress": + await this.appendEntry({ + ...base, + type: "TASK_PROGRESS", + payload: event.payload, + }); + break; + case "complete": + await this.appendEntry( + { + ...base, + type: "TASK_COMPLETE", + result: event.result, + }, + true, + ); + break; + case "heartbeat": + await this.appendEntry({ + ...base, + type: "TASK_HEARTBEAT", + }); + break; + case "error": + await this.appendEntry( + { + ...base, + type: "TASK_CANCELLED", + error: event.error ?? "Unknown reason", + }, + true, + ); + break; + case "custom": + await this.appendEntry({ + ...base, + type: "TASK_CUSTOM", + payload: { + eventName: event.eventName, + ...event.payload, + }, + }); + break; + } + } + + /** + * Read entries from a checkpoint position + * Used by flush worker to get entries to flush + */ + async readEntriesFromCheckpoint( + checkpoint: number, + ): Promise { + const entries = await this.readEntries(this.config.eventLogPath); + return entries.filter((entry) => { + const eventLogPath = entry as EventLogEvent; + return eventLogPath.seq > checkpoint; + }); + } + + /** + * Get the current sequence number from checkpoint file + */ + async getSequenceNumber(): Promise { + const seqFilePath = `${this.config.eventLogPath}.checkpoint`; + try { + const seq = await fs.readFile(seqFilePath, "utf8"); + return parseInt(seq, 10); + } catch { + return 0; + } + } + + /** + * Check if log file should be rotated + */ + async shouldRotateEventLog(): Promise { + try { + const stats = await fs.stat(this.config.eventLogPath); + const age = Date.now() - stats.mtime.getTime(); + return ( + stats.size >= this.config.maxSizeBytesPerFile || + age >= this.config.maxAgePerFile + ); + } catch { + return false; + } + } + + /** + * Perform rotation if needed + */ + async performRotation(): Promise { + if (this.isRotating) return; + + if (await this.shouldRotateEventLog()) { + await this.rotateEventLog(); + } + } + + /** + * Compact a rotated log file + * Removes heartbeats and entries for completed/old failed tasks + */ + async compactRotatedFile(filePath: string): Promise { + const startTime = Date.now(); + + try { + const entries = await this.readEntries(filePath); + + const tasksState = this.buildTaskState(entries); + const failedThresholdMs = Date.now() - 1000 * 60 * 60 * 24; // 24 hours + + // filter entries + const compactedEntries = entries.filter((entry) => { + const taskId = entry.taskId; + const finalState = tasksState.get(taskId); + + if (!finalState) return false; + + // remove completed task entries + if (finalState === "completed") return false; + + // remove old failed/cancelled entries + if ( + finalState === "failed" || + (finalState === "cancelled" && entry.timestamp < failedThresholdMs) + ) + return false; + + // remove heartbeats + if (entry.type === "TASK_HEARTBEAT") return false; + + return true; + }); + + // write compacted entries + const content = compactedEntries + .map((entry) => JSON.stringify(entry)) + .join("\n"); + await fs.writeFile(filePath, content, "utf8"); + + this.hooks?.incrementCounter(TaskMetrics.EVENTLOG_COMPACTIONS, 1); + + this.hooks?.log({ + severity: "info", + message: "Compacted rotated file", + attributes: { + [TaskAttributes.EVENTLOG_PATH]: filePath, + [TaskAttributes.EVENTLOG_COMPACTIONS]: compactedEntries.length, + [TaskAttributes.EVENTLOG_COMPACTION_DURATION_MS]: + Date.now() - startTime, + }, + }); + } catch (error) { + throw new EventLogError( + "Failed to compact rotated file", + "compact", + filePath, + error instanceof Error ? error : new Error(String(error)), + ); + } + } + + /** + * Get event log statistics + */ + getStats(): EventLogStats { + return { + status: { + initialized: this.fileHandle !== null, + path: this.config.eventLogPath, + }, + sequence: { + current: this.currentSeq, + }, + rotation: { + count: this.rotationCount, + isActive: this.isRotating, + lastAt: this.lastRotationAt ?? undefined, + }, + volume: { + entriesWritten: this.entriesWritten, + malformedSkipped: this.malformedEntriesSkipped, + }, + }; + } + + /** + * Close the event log + * @param deleteFiles - Whether to delete the log files + */ + async close(deleteFiles = false): Promise { + if (this.rotationInterval) { + clearInterval(this.rotationInterval); + this.rotationInterval = undefined; + } + + if (this.fileHandle) { + await this.saveCheckpoint(); + await this.fileHandle.sync(); + await this.fileHandle.close(); + this.fileHandle = null; + } + + this.currentSeq = 0; + + if (deleteFiles) { + try { + await fs.unlink(this.config.eventLogPath); + } catch { + // ignore if file doesn't exist + } + + // delete rotated files + for (let i = 0; i < this.config.retentionCount; i++) { + try { + await fs.unlink(`${this.config.eventLogPath}.${i}`); + } catch { + // ignore if file doesn't exist + } + } + + // delete checkpoint file + try { + await fs.unlink(`${this.config.eventLogPath}.checkpoint`); + } catch { + // ignore if file doesn't exist + } + } + } + + private validatePath(eventLogPath: string): void { + // prevent path traversal + const normalizedPath = path.normalize(eventLogPath); + if (normalizedPath.includes("..")) { + throw new Error( + `Invalid event log path: path traversal detected in "${eventLogPath}"`, + ); + } + + // warn if absolute path outside project + if (path.isAbsolute(normalizedPath)) { + this.hooks?.log({ + severity: "warn", + message: "EventLog using absolute path", + attributes: { + path: normalizedPath, + }, + }); + } + } + + private scheduleRotation(): void { + // schedule rotation check at configured interval + this.rotationInterval = setInterval(async () => { + if (await this.shouldRotateEventLog()) { + await this.rotateEventLog(); + } + }, this.config.rotationInterval); + + // don't keep the process alive just for rotation + this.rotationInterval.unref(); + } + + private async rotateEventLog(): Promise { + const startTime = Date.now(); + + // set the new lock + let releaseLock: () => void; + const lockPromise = new Promise((resolve) => { + releaseLock = resolve; + }); + + // wait for any existing rotation + await this.rotationLock; + + // set the new lock + this.rotationLock = lockPromise; + this.isRotating = true; + + try { + await this.saveCheckpoint(); + + if (this.fileHandle) { + await this.fileHandle.close(); + + // shift existing rotated files + for (let i = this.rotationCount; i >= 1; i--) { + const current = `${this.config.eventLogPath}.${i}`; + const next = `${this.config.eventLogPath}.${i + 1}`; + const currentExists = await fs + .access(current) + .then(() => true) + .catch(() => false); + + // rename current to next if current exists + if (currentExists) { + await fs.rename(current, next); + } + } + + // rename current file to .1 + const rotatedPath = `${this.config.eventLogPath}.1`; + await fs.rename(this.config.eventLogPath, rotatedPath); + this.rotationCount++; + this.lastRotationAt = Date.now(); + + // compact the rotated file + await this.compactRotatedFile(rotatedPath); + + // cleanup old files + await this.cleanupOldRotatedFiles(this.rotationCount); + + this.hooks?.incrementCounter(TaskMetrics.EVENTLOG_ROTATIONS, 1); + + this.hooks?.recordHistogram( + TaskMetrics.EVENTLOG_ROTATION_DURATION_MS, + Date.now() - startTime, + ); + } + + // open new file handle + this.fileHandle = await fs.open(this.config.eventLogPath, "a+"); + } catch (error) { + throw new EventLogError( + "Failed to rotate event log", + "rotate", + this.config.eventLogPath, + error instanceof Error ? error : new Error(String(error)), + ); + } finally { + this.isRotating = false; + releaseLock!(); + + this.rotationLock = Promise.resolve(); + } + } + + private async cleanupOldRotatedFiles(currentCount: number): Promise { + // iterate over all rotated files except the current one + for (let i = this.config.retentionCount + 1; i < currentCount; i++) { + const filePath = `${this.config.eventLogPath}.${i}`; + const fileExists = await fs + .access(filePath) + .then(() => true) + .catch(() => false); + + if (fileExists) { + await fs.unlink(filePath); + } else { + break; + } + } + } + + private async readEntries(filePath: string): Promise { + try { + const content = await fs.readFile(filePath, "utf8"); + const lines = content.trim().split("\n").filter(Boolean); + + const entries: EventLogEntry[] = []; + + for (const line of lines) { + try { + const entry = JSON.parse(line) as EventLogEvent; + entries.push(entry); + } catch { + this.malformedEntriesSkipped++; + + this.hooks?.incrementCounter( + TaskMetrics.EVENTLOG_MALFORMED_SKIPPED, + 1, + ); + + this.hooks?.log({ + severity: "warn", + message: "Skipped malformed event log entry", + attributes: { + path: filePath, + linePreview: line.substring(0, 100), + }, + }); + } + } + + return entries; + } catch (error) { + throw new EventLogError( + "Failed to read event log entries", + "read", + filePath, + error instanceof Error ? error : new Error(String(error)), + ); + } + } + + private buildTaskState(entries: EventLogEntry[]): Map { + const taskState = new Map(); + + for (const entry of entries) { + const taskId = entry.taskId; + + switch (entry.type) { + case "TASK_CREATED": + taskState.set(taskId, "created"); + break; + case "TASK_START": + taskState.set(taskId, "running"); + break; + case "TASK_COMPLETE": + taskState.set(taskId, "completed"); + break; + case "TASK_CANCELLED": + taskState.set(taskId, "cancelled"); + break; + case "TASK_ERROR": + taskState.set(taskId, "failed"); + break; + case "TASK_PROGRESS": + taskState.set(taskId, "running"); + break; + case "TASK_HEARTBEAT": + // no stage change + break; + } + } + + return taskState; + } + + private async saveCheckpoint(): Promise { + const seqFilePath = `${this.config.eventLogPath}.checkpoint`; + await fs.writeFile(seqFilePath, this.currentSeq.toString(), "utf8"); + } + + private async loadCheckpoint(): Promise { + try { + const seqFilePath = `${this.config.eventLogPath}.checkpoint`; + const seq = await fs.readFile(seqFilePath, "utf8"); + return parseInt(seq, 10); + } catch { + return 0; + } + } + + private computeChecksum(event: EventLogEvent): string { + const { checksum: _checksum, ...payloadWithoutChecksum } = event; + return createHash("sha256") + .update(canonicalize(payloadWithoutChecksum)) + .digest("hex"); + } +} diff --git a/packages/taskflow/src/persistence/index.ts b/packages/taskflow/src/persistence/index.ts new file mode 100644 index 00000000..4d1889df --- /dev/null +++ b/packages/taskflow/src/persistence/index.ts @@ -0,0 +1,23 @@ +export { EventLog } from "./event-log"; + +export type { + LakebaseConnector, + LakebaseRepositoryConfig, + RepositoryConfig, + SQLiteRepositoryConfig, + StoredEvent, + TaskRepository, +} from "./repository"; + +export { + createRepository, + LakebaseTaskRepository, + SQLiteTaskRepository, +} from "./repository"; + +export type { + EventLogConfig, + EventLogEvent, + EventLogStats, +} from "./types"; +export { DEFAULT_EVENT_LOG_CONFIG } from "./types"; diff --git a/packages/taskflow/src/persistence/repository/index.ts b/packages/taskflow/src/persistence/repository/index.ts new file mode 100644 index 00000000..03127644 --- /dev/null +++ b/packages/taskflow/src/persistence/repository/index.ts @@ -0,0 +1,54 @@ +export { LakebaseTaskRepository } from "./lakebase"; +export type { + LakebaseConnector, + LakebaseRepositoryConfig, +} from "./lakebase/types"; +export { SQLiteTaskRepository } from "./sqlite"; +export type { SQLiteRepositoryConfig } from "./sqlite/types"; +export type { StoredEvent, TaskRepository } from "./types"; + +import { noopHooks, type TaskSystemHooks } from "@/observability"; +import type { LakebaseRepositoryConfig } from "./lakebase"; +import type { SQLiteRepositoryConfig } from "./sqlite"; +import type { TaskRepository } from "./types"; + +/** + * Repository configuration union type + */ +export type RepositoryConfig = + | SQLiteRepositoryConfig + | LakebaseRepositoryConfig; + +/** + * Create a repository based on configuration + * @param config - Repository configuration + * @returns TaskRepository instance + */ +export async function createRepository( + config: RepositoryConfig, + hooks: TaskSystemHooks = noopHooks, +): Promise { + let repository: TaskRepository; + + switch (config.type) { + case "sqlite": { + const { SQLiteTaskRepository } = await import("./sqlite"); + repository = new SQLiteTaskRepository(config, hooks); + break; + } + case "lakebase": { + const { LakebaseTaskRepository } = await import("./lakebase"); + repository = new LakebaseTaskRepository(config, hooks); + break; + } + default: { + const _exhaustiveCheck: never = config; + throw new Error( + `Unknown repository type: ${(_exhaustiveCheck as RepositoryConfig).type}`, + ); + } + } + + await repository.initialize(); + return repository; +} diff --git a/packages/taskflow/src/persistence/repository/lakebase/index.ts b/packages/taskflow/src/persistence/repository/lakebase/index.ts new file mode 100644 index 00000000..e79c9fb1 --- /dev/null +++ b/packages/taskflow/src/persistence/repository/lakebase/index.ts @@ -0,0 +1,8 @@ +export { LakebaseTaskRepository } from "./repository"; +export type { + LakebaseConnector, + LakebaseRepositoryConfig, + LakebaseTaskEventRecord, + LakebaseTaskRecord, + LakebaseTransactionClient, +} from "./types"; diff --git a/packages/taskflow/src/persistence/repository/lakebase/migrations/001_create_tasks_table.sql b/packages/taskflow/src/persistence/repository/lakebase/migrations/001_create_tasks_table.sql new file mode 100644 index 00000000..78584870 --- /dev/null +++ b/packages/taskflow/src/persistence/repository/lakebase/migrations/001_create_tasks_table.sql @@ -0,0 +1,37 @@ +CREATE TABLE IF NOT EXISTS tasks ( + task_id TEXT PRIMARY KEY, + name TEXT NOT NULL, + status TEXT NOT NULL CHECK (status IN ('created', 'pending', 'running', 'completed', 'failed', 'cancelled')), + type TEXT NOT NULL CHECK (type IN ('background', 'user')), + input_data TEXT, + idempotency_key TEXT NOT NULL, + user_id TEXT, + created_at TIMESTAMP NOT NULL, + started_at TIMESTAMP, + completed_at TIMESTAMP, + duration_ms INTEGER, + result_truncated INTEGER DEFAULT 0, + last_heartbeat_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + result TEXT, + error TEXT, + attempt INTEGER DEFAULT 0, + execution_options TEXT +); + +CREATE UNIQUE INDEX IF NOT EXISTS idx_tasks_idempotency_active +ON tasks(idempotency_key) +WHERE status IN ('created', 'pending', 'running'); + +CREATE INDEX IF NOT EXISTS idx_tasks_type ON tasks(type); + +CREATE INDEX IF NOT EXISTS idx_tasks_alive_check +ON tasks(idempotency_key, last_heartbeat_at, status) +WHERE status = 'running'; + +CREATE INDEX IF NOT EXISTS idx_tasks_stale +ON tasks(status, last_heartbeat_at) +WHERE status = 'running'; + +CREATE INDEX IF NOT EXISTS idx_tasks_pending_recovery +ON tasks(status, created_at) +WHERE status IN ('created', 'pending'); diff --git a/packages/taskflow/src/persistence/repository/lakebase/migrations/002_create_task_events_table.sql b/packages/taskflow/src/persistence/repository/lakebase/migrations/002_create_task_events_table.sql new file mode 100644 index 00000000..f53289b8 --- /dev/null +++ b/packages/taskflow/src/persistence/repository/lakebase/migrations/002_create_task_events_table.sql @@ -0,0 +1,15 @@ +CREATE TABLE IF NOT EXISTS task_events ( + entry_id TEXT PRIMARY KEY, + task_id TEXT NOT NULL, + seq INTEGER NOT NULL, + type TEXT NOT NULL, + timestamp TIMESTAMP NOT NULL, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + payload TEXT, + + FOREIGN KEY (task_id) REFERENCES tasks(task_id) ON DELETE CASCADE +); + +CREATE UNIQUE INDEX IF NOT EXISTS idx_task_events_unique_seq ON task_events(task_id, seq); + +CREATE INDEX IF NOT EXISTS idx_task_events_streaming ON task_events(task_id, seq, created_at); diff --git a/packages/taskflow/src/persistence/repository/lakebase/repository.ts b/packages/taskflow/src/persistence/repository/lakebase/repository.ts new file mode 100644 index 00000000..5753ce5e --- /dev/null +++ b/packages/taskflow/src/persistence/repository/lakebase/repository.ts @@ -0,0 +1,472 @@ +import fs from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import type { IdempotencyKey, TaskId } from "@/core/branded"; +import type { TaskStatus } from "@/core/types"; +import { type EventLogEntry, type EventLogEntryType, Task } from "@/domain"; +import type { StoredEvent, TaskRepository } from "../types"; +import type { + LakebaseConnector, + LakebaseRepositoryConfig, + LakebaseTaskEventRecord, + LakebaseTaskRecord, + LakebaseTransactionClient, +} from "./types"; +import { + noopHooks, + TaskAttributes, + TaskMetrics, + TaskSystemHooks, +} from "@/observability"; +import { RepositoryError } from "@/core/errors"; + +/** + * Lakebase Task Repository + * + * Implements TaskRepository interface using Lakebase for storage + * Consumer provides the connector + */ +export class LakebaseTaskRepository implements TaskRepository { + readonly type = "lakebase" as const; + private connector: LakebaseConnector; + private _isInitialized = false; + private hooks: TaskSystemHooks; + + constructor( + config: LakebaseRepositoryConfig, + hooks: TaskSystemHooks = noopHooks, + ) { + this.connector = config.connector; + this.hooks = hooks; + } + + get isInitialized(): boolean { + return this._isInitialized; + } + + async initialize(): Promise { + await this.runMigrations(); + this._isInitialized = true; + + this.hooks.log({ + severity: "info", + message: "Lakebase repository initialized", + attributes: { + [TaskAttributes.REPOSITORY_TYPE]: "lakebase", + }, + }); + } + + async close(): Promise { + await this.connector.close(); + this._isInitialized = false; + } + + async executeBatch(entries: EventLogEntry[]): Promise { + if (entries.length === 0) return; + + const startTime = Date.now(); + + try { + await this.connector.transaction( + async (client: LakebaseTransactionClient) => { + for (const entry of entries) { + await this.executeEntry(client, entry); + } + }, + ); + this.hooks?.incrementCounter(TaskMetrics.REPOSITORY_BATCH_EXECUTED, 1, { + [TaskAttributes.REPOSITORY_TYPE]: "lakebase", + [TaskAttributes.BATCH_SIZE]: entries.length, + }); + + this.hooks?.recordHistogram( + TaskMetrics.REPOSITORY_BATCH_LATENCY_MS, + Date.now() - startTime, + { [TaskAttributes.REPOSITORY_TYPE]: "lakebase" }, + ); + } catch (error) { + this.hooks?.incrementCounter(TaskMetrics.REPOSITORY_ERRORS, 1, { + [TaskAttributes.REPOSITORY_TYPE]: "lakebase", + operation: "batch", + }); + + throw new RepositoryError( + "Failed to execute batch", + "lakebase", + "batch", + true, + error instanceof Error ? error : new Error(String(error)), + ); + } + } + + async findById(taskId: TaskId): Promise { + const result = await this.connector.query( + "SELECT * FROM tasks WHERE task_id = $1", + [taskId], + ); + return result.rows[0] ? this.mapTaskRecord(result.rows[0]) : null; + } + + async findByIdempotencyKey( + idempotencyKey: IdempotencyKey, + ): Promise { + const result = await this.connector.query( + "SELECT * FROM tasks WHERE idempotency_key = $1", + [idempotencyKey], + ); + return result.rows[0] ? this.mapTaskRecord(result.rows[0]) : null; + } + + async findStaleTasks(threshold: number): Promise { + const thresholdDateMs = new Date(Date.now() - threshold).toISOString(); + const result = await this.connector.query( + "SELECT * FROM tasks WHERE status = 'running' AND last_heartbeat_at < $1", + [thresholdDateMs], + ); + return result.rows.map((row) => this.mapTaskRecord(row)); + } + + async getEvents(taskId: TaskId): Promise { + const result = await this.connector.query( + "SELECT * FROM task_events WHERE task_id = $1 ORDER BY seq", + [taskId], + ); + return result.rows.map((row) => this.mapTaskEventRecord(row)); + } + + async healthCheck(): Promise { + return this.connector.healthCheck(); + } + + private async runMigrations(): Promise { + const __dirname = path.dirname(fileURLToPath(import.meta.url)); + const migrationsFolder = path.join(__dirname, "migrations"); + + if (!fs.existsSync(migrationsFolder)) { + // fallback for test environment + const srcMigrationsFolder = path.resolve( + process.cwd(), + "packages/taskflow/src/persistence/repository/lakebase/migrations", + ); + + if (fs.existsSync(srcMigrationsFolder)) { + const migrations = fs.readdirSync(srcMigrationsFolder).sort(); + for (const migration of migrations) { + const migrationContent = fs.readFileSync( + path.join(srcMigrationsFolder, migration), + "utf8", + ); + await this.connector.query(migrationContent); + } + return; + } + + throw new RepositoryError( + `Migrations folder not found at ${migrationsFolder} or ${srcMigrationsFolder}`, + "lakebase", + "migration", + ); + } + + const migrations = fs.readdirSync(migrationsFolder).sort(); + + for (const migration of migrations) { + try { + const migrationContent = fs.readFileSync( + path.join(migrationsFolder, migration), + "utf8", + ); + await this.connector.query(migrationContent); + + this.hooks?.log({ + severity: "info", + message: "Applied migration", + attributes: { + [TaskAttributes.MIGRATION_NAME]: migration, + [TaskAttributes.REPOSITORY_TYPE]: "lakebase", + }, + }); + } catch (error) { + throw new RepositoryError( + `Failed to apply migration ${migration}`, + "lakebase", + "migration", + false, + error instanceof Error ? error : new Error(String(error)), + ); + } + } + } + + private async executeEntry( + client: LakebaseTransactionClient, + entry: EventLogEntry, + ): Promise { + switch (entry.type) { + case "TASK_CREATED": + await this.executeTaskCreated(client, entry); + break; + case "TASK_START": + await this.executeTaskStart(client, entry); + break; + case "TASK_COMPLETE": + await this.executeTaskComplete(client, entry); + break; + case "TASK_ERROR": + await this.executeTaskError(client, entry); + break; + case "TASK_PROGRESS": + await this.executeTaskProgress(client, entry); + break; + case "TASK_CANCELLED": + await this.executeTaskCancelled(client, entry); + break; + case "TASK_HEARTBEAT": + await this.executeTaskHeartbeat(client, entry); + break; + case "TASK_CUSTOM": + await this.executeTaskCustom(client, entry); + break; + default: + throw new Error(`Unknown entry type: ${entry.type}`); + } + } + + private async executeTaskCreated( + client: LakebaseTransactionClient, + entry: EventLogEntry, + ): Promise { + await client.query( + `INSERT INTO tasks (task_id, name, status, type, idempotency_key, user_id, input_data, execution_options, created_at, last_heartbeat_at) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)`, + [ + entry.taskId, + entry.name, + "created", + entry.taskType, + entry.idempotencyKey, + entry.userId ?? null, + entry.input ? JSON.stringify(entry.input) : null, + entry.executionOptions ? JSON.stringify(entry.executionOptions) : null, + new Date(entry.timestamp).toISOString(), + new Date(entry.timestamp).toISOString(), + ], + ); + await this.insertTaskEvent( + client, + entry.taskId, + "TASK_CREATED", + entry.timestamp, + { + name: entry.name, + taskType: entry.taskType, + idempotencyKey: entry.idempotencyKey, + userId: entry.userId, + input: entry.input, + }, + ); + } + + private async executeTaskStart( + client: LakebaseTransactionClient, + entry: EventLogEntry, + ): Promise { + await client.query( + `UPDATE tasks SET status = $1, started_at = $2, last_heartbeat_at = $3 WHERE task_id = $4`, + [ + "running", + new Date(entry.timestamp).toISOString(), + new Date(entry.timestamp).toISOString(), + entry.taskId, + ], + ); + await this.insertTaskEvent( + client, + entry.taskId, + "TASK_START", + entry.timestamp, + ); + } + + private async executeTaskComplete( + client: LakebaseTransactionClient, + entry: EventLogEntry, + ): Promise { + await client.query( + `UPDATE tasks SET status = $1, completed_at = $2, result = $3 WHERE task_id = $4`, + [ + "completed", + new Date(entry.timestamp).toISOString(), + entry.result ? JSON.stringify(entry.result) : null, + entry.taskId, + ], + ); + await this.insertTaskEvent( + client, + entry.taskId, + "TASK_COMPLETE", + entry.timestamp, + { + result: entry.result, + }, + ); + } + + private async executeTaskError( + client: LakebaseTransactionClient, + entry: EventLogEntry, + ): Promise { + await client.query( + `UPDATE tasks SET status = $1, completed_at = $2, error = $3, attempt = attempt + 1 WHERE task_id = $4`, + [ + "failed", + new Date(entry.timestamp).toISOString(), + entry.error ?? null, + entry.taskId, + ], + ); + await this.insertTaskEvent( + client, + entry.taskId, + "TASK_ERROR", + entry.timestamp, + { + error: entry.error, + }, + ); + } + + private async executeTaskCancelled( + client: LakebaseTransactionClient, + entry: EventLogEntry, + ): Promise { + await client.query( + `UPDATE tasks SET status = $1, completed_at = $2, error = $3 WHERE task_id = $4`, + [ + "cancelled", + new Date(entry.timestamp).toISOString(), + entry.error ?? null, + entry.taskId, + ], + ); + await this.insertTaskEvent( + client, + entry.taskId, + "TASK_CANCELLED", + entry.timestamp, + { + error: entry.error, + }, + ); + } + + private async executeTaskProgress( + client: LakebaseTransactionClient, + entry: EventLogEntry, + ): Promise { + await client.query( + `UPDATE tasks SET last_heartbeat_at = $1 WHERE task_id = $2`, + [new Date(entry.timestamp).toISOString(), entry.taskId], + ); + await this.insertTaskEvent( + client, + entry.taskId, + "TASK_PROGRESS", + entry.timestamp, + { + ...entry.payload, + }, + ); + } + + private async executeTaskHeartbeat( + client: LakebaseTransactionClient, + entry: EventLogEntry, + ): Promise { + // only update heartbeat, do NOT insert into task_events + await client.query( + `UPDATE tasks SET last_heartbeat_at = $1 WHERE task_id = $2`, + [new Date(entry.timestamp).toISOString(), entry.taskId], + ); + } + + private async executeTaskCustom( + client: LakebaseTransactionClient, + entry: EventLogEntry, + ): Promise { + await client.query( + `UPDATE tasks SET last_heartbeat_at = $1 WHERE task_id = $2`, + [new Date(entry.timestamp).toISOString(), entry.taskId], + ); + await this.insertTaskEvent( + client, + entry.taskId, + "TASK_CUSTOM", + entry.timestamp, + { + ...entry.payload, + }, + ); + } + + private async insertTaskEvent( + client: LakebaseTransactionClient, + taskId: string, + type: EventLogEntryType, + timestampMs: number, + payload?: Record, + ): Promise { + // get next sequence number + const seqResult = await client.query<{ nextseq: number }>( + `SELECT COALESCE(MAX(seq), 0) + 1 as nextseq FROM task_events WHERE task_id = $1`, + [taskId], + ); + const nextSeq = seqResult.rows[0]?.nextseq ?? 1; + + await client.query( + `INSERT INTO task_events (entry_id, task_id, seq, type, timestamp, payload) + VALUES ($1, $2, $3, $4, $5, $6)`, + [ + crypto.randomUUID(), + taskId, + nextSeq, + type, + new Date(timestampMs).toISOString(), + payload ? JSON.stringify(payload) : null, + ], + ); + } + + private mapTaskRecord(record: LakebaseTaskRecord): Task { + return Task.fromRecord({ + id: record.task_id, + name: record.name, + idempotency_key: record.idempotency_key, + user_id: record.user_id, + task_type: record.type as "background" | "user", + status: record.status as TaskStatus, + input: record.input_data ?? "{}", + result: record.result, + error: record.error, + attempt: record.attempt, + created_at: record.created_at, + started_at: record.started_at, + completed_at: record.completed_at, + last_heartbeat_at: record.last_heartbeat_at, + execution_options: record.execution_options, + }); + } + + private mapTaskEventRecord(record: LakebaseTaskEventRecord): StoredEvent { + return { + id: record.entry_id, + taskId: record.task_id, + seq: record.seq, + type: record.type as StoredEvent["type"], + timestamp: new Date(record.timestamp), + payload: record.payload ? JSON.parse(record.payload) : null, + }; + } +} diff --git a/packages/taskflow/src/persistence/repository/lakebase/types.ts b/packages/taskflow/src/persistence/repository/lakebase/types.ts new file mode 100644 index 00000000..fa34c614 --- /dev/null +++ b/packages/taskflow/src/persistence/repository/lakebase/types.ts @@ -0,0 +1,92 @@ +import type pg from "pg"; +import type { BaseRepositoryConfig } from "../types"; + +/** + * Lakebase Repository Types + * + * Types for Lakebase/Postgres repository implementation + * The consumer provides the connector (no pg_deps) + */ +export interface LakebaseConnector { + /** + * Execute a SQL query + * @param sql - SQL query string $1, $2, etc. placeholders + * @params params -Query parameters + * @returns Query result with rows + */ + query( + sql: string, + params?: unknown[], + ): Promise>; + + /** + * Execute a function within a transaction + * @param callback - Function to execute within the transaction context + */ + transaction( + callback: (client: LakebaseTransactionClient) => Promise, + ): Promise; + + /** + * Check if the connector is healthy + */ + healthCheck(): Promise; + + /** + * Close the connection + */ + close(): Promise; +} + +/** + * Transaction client interface + */ +export interface LakebaseTransactionClient { + query( + sql: string, + params?: unknown[], + ): Promise>; +} + +/** + * Lakebase repository configuration + */ +export interface LakebaseRepositoryConfig extends BaseRepositoryConfig { + type: "lakebase"; + connector: LakebaseConnector; +} + +/** + * Raw task record from lakebase database + */ +export interface LakebaseTaskRecord { + task_id: string; + name: string; + status: string; + type: string; + idempotency_key: string; + user_id: string | null; + input_data: string | null; + execution_options: string | null; + created_at: string; + started_at: string | null; + completed_at: string | null; + duration_ms: number | null; + result: string | null; + error: string | null; + attempt: number; + last_heartbeat_at: string; +} + +/** + * raw task event record from Lakebase database + */ +export interface LakebaseTaskEventRecord { + entry_id: string; + task_id: string; + seq: number; + type: string; + timestamp: string; + created_at: string; + payload: string | null; +} diff --git a/packages/taskflow/src/persistence/repository/sqlite/connector.ts b/packages/taskflow/src/persistence/repository/sqlite/connector.ts new file mode 100644 index 00000000..a6618aa3 --- /dev/null +++ b/packages/taskflow/src/persistence/repository/sqlite/connector.ts @@ -0,0 +1,491 @@ +import fs from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import Database from "better-sqlite3"; +import type { IdempotencyKey, TaskId } from "@/core/branded"; +import { RepositoryError } from "@/core/errors"; +import type { TaskStatus } from "@/core/types"; +import { type EventLogEntry, type EventLogEntryType, Task } from "@/domain"; +import { + noopHooks, + TaskAttributes, + TaskMetrics, + type TaskSystemHooks, +} from "@/observability"; +import type { StoredEvent } from "../types"; +import type { + SQLiteConfig, + SQLiteTaskEventRecord, + SQLiteTaskRecord, +} from "./types"; + +/** + * Default retry configuration for SQLite operations + */ +const DEFAULT_RETRY_CONFIG = { + maxRetries: 3, + baseDelayMs: 100, + maxDelayMs: 1000, +}; + +/** + * SQLite Connector + * + * Low-level SQLite operations for task persistence + * Handles schema migrations, batch execution, and queries. + */ +export class SQLiteConnector { + private db: Database.Database; + private _isInitialized = false; + private hooks: TaskSystemHooks; + + constructor(config: SQLiteConfig, hooks: TaskSystemHooks = noopHooks) { + this.db = new Database(config.database ?? "./.taskflow/sqlite.db"); + this.hooks = hooks; + } + + get isInitialized(): boolean { + return this._isInitialized; + } + + /** + * Initialize the database + * Enables WAL mode and run migrations + */ + async initialize(): Promise { + // enable WAL mode for better performance + this.db.pragma("journal_mode = WAL"); + + // run migrations + await this.runMigrations(); + this._isInitialized = true; + + this.hooks.log({ + severity: "info", + message: "SQLite connector initialized", + attributes: { + [TaskAttributes.REPOSITORY_TYPE]: "sqlite", + }, + }); + } + + /** + * Execute a batch of events in a transaction + */ + async executeBatch(batch: EventLogEntry[]): Promise { + if (batch.length === 0) return; + + const startTime = Date.now(); + + await this.withRetry(async () => { + const transaction = this.db.transaction((entries: EventLogEntry[]) => { + for (const entry of entries) { + this.executeEntry(entry); + } + }); + transaction(batch); + }, "executeBatch"); + + this.hooks.incrementCounter(TaskMetrics.REPOSITORY_BATCH_EXECUTED, 1, { + [TaskAttributes.REPOSITORY_TYPE]: "sqlite", + [TaskAttributes.BATCH_SIZE]: batch.length, + }); + + this.hooks?.recordHistogram( + TaskMetrics.REPOSITORY_BATCH_LATENCY_MS, + Date.now() - startTime, + { [TaskAttributes.REPOSITORY_TYPE]: "sqlite" }, + ); + } + + /** + * Close the database connection + */ + async close(): Promise { + this.db.close(); + this._isInitialized = false; + } + + healthCheck(): boolean { + try { + this.db.prepare("SELECT 1").get(); + return true; + } catch { + return false; + } + } + + /** + * Find a task by ID + */ + findTaskById(taskId: TaskId): Task | null { + const stmt = this.db.prepare(` + SELECT * from tasks where task_id = ? + `); + + const record = stmt.get(taskId); + if (!record) return null; + + return this.mapTaskRecord(record as SQLiteTaskRecord); + } + + /** + * Find a task by idempotency key + */ + findTaskByIdempotencyKey(idempotencyKey: IdempotencyKey): Task | null { + const stmt = this.db.prepare(` + SELECT * from tasks where idempotency_key = ? + `); + + const record = stmt.get(idempotencyKey); + if (!record) return null; + + return this.mapTaskRecord(record as SQLiteTaskRecord); + } + + /** + * Find stale running tasks + * Tasks whose last heartbeat is older than the threshold + */ + findStaleTasks(staleThresholdMs: number): Task[] { + const thresholdDateMs = new Date( + Date.now() - staleThresholdMs, + ).toISOString(); + const stmt = this.db.prepare(` + SELECT * from tasks where status = 'running' and last_heartbeat_at < ? + `); + + const records = stmt.all(thresholdDateMs); + return records.map((record) => + this.mapTaskRecord(record as SQLiteTaskRecord), + ); + } + + /** + * Get task events by task ID, ordered by sequence + */ + getTaskEvents(taskId: TaskId): StoredEvent[] { + const stmt = this.db.prepare(` + SELECT * from task_events where task_id = ? order by seq + `); + + const records = stmt.all(taskId); + return records.map((record) => + this.mapTaskEventRecord(record as SQLiteTaskEventRecord), + ); + } + + private async withRetry( + fn: () => T | Promise, + operation: string, + ): Promise { + let lastError: Error | undefined; + const { maxRetries, baseDelayMs, maxDelayMs } = DEFAULT_RETRY_CONFIG; + + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + return await fn(); + } catch (error) { + lastError = error instanceof Error ? error : new Error(String(error)); + + // check if error is retryable + const isRetryable = + lastError.message.includes("SQLITE_BUSY") || + lastError.message.includes("SQLITE_LOCKED"); + + if (!isRetryable || attempt === maxRetries) { + break; + } + + // exponential backoff with jitter + const delay = Math.min( + baseDelayMs * 2 ** attempt + Math.random() * 100, + maxDelayMs, + ); + + this.hooks.incrementCounter(TaskMetrics.REPOSITORY_RETRIES, 1, { + [TaskAttributes.REPOSITORY_TYPE]: "sqlite", + operation, + }); + + this.hooks?.log({ + severity: "warn", + message: `SQLite operation failed, retrying`, + attributes: { + operation, + attempt: attempt + 1, + maxRetries, + delayMs: delay, + }, + error: lastError, + }); + + await new Promise((resolve) => setTimeout(resolve, delay)); + } + } + + this.hooks?.incrementCounter(TaskMetrics.REPOSITORY_ERRORS, 1, { + [TaskAttributes.REPOSITORY_TYPE]: "sqlite", + [TaskAttributes.ERROR_TYPE]: + lastError?.message.split(":")[0] ?? "unknown", + }); + + const isRetryable = + lastError?.message.includes("SQLITE_BUSY") || + lastError?.message.includes("SQLITE_LOCKED"); + + throw new RepositoryError( + `SQLite ${operation} failed after ${maxRetries} retries`, + "sqlite", + operation === "executeBatch" ? "batch" : "query", + isRetryable, + lastError, + ); + } + + private executeEntry(entry: EventLogEntry): void { + switch (entry.type) { + case "TASK_CREATED": + this.executeTaskCreated(entry); + break; + case "TASK_START": + this.executeTaskStart(entry); + break; + case "TASK_COMPLETE": + this.executeTaskComplete(entry); + break; + case "TASK_ERROR": + this.executeTaskError(entry); + break; + case "TASK_PROGRESS": + this.executeTaskProgress(entry); + break; + case "TASK_CANCELLED": + this.executeTaskCancelled(entry); + break; + case "TASK_HEARTBEAT": + this.executeTaskHeartbeat(entry); + break; + case "TASK_CUSTOM": + this.executeTaskCustom(entry); + break; + default: + throw new Error(`Unsupported event type: ${entry.type}`); + } + } + + private executeTaskCreated(entry: EventLogEntry): void { + const stmt = this.db.prepare(` + INSERT INTO tasks (task_id, name, status, type, idempotency_key, user_id, input_data, execution_options, created_at, last_heartbeat_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `); + stmt.run( + entry.taskId, + entry.name, + "created", + entry.taskType, + entry.idempotencyKey, + entry.userId ?? null, + entry.input ? JSON.stringify(entry.input) : null, + entry.executionOptions ? JSON.stringify(entry.executionOptions) : null, + new Date(entry.timestamp).toISOString(), + new Date(entry.timestamp).toISOString(), + ); + + this.insertTaskEvent(entry.taskId, "TASK_CREATED", entry.timestamp, { + name: entry.name, + taskType: entry.taskType, + idempotencyKey: entry.idempotencyKey, + userId: entry.userId, + input: entry.input, + }); + } + + private executeTaskStart(entry: EventLogEntry): void { + const stmt = this.db.prepare(` + UPDATE tasks SET status = ?, started_at = ?, last_heartbeat_at = ? WHERE task_id = ? + `); + stmt.run( + "running", + new Date(entry.timestamp).toISOString(), + new Date(entry.timestamp).toISOString(), + entry.taskId, + ); + + this.insertTaskEvent(entry.taskId, "TASK_START", entry.timestamp); + } + + private executeTaskComplete(entry: EventLogEntry): void { + const stmt = this.db.prepare(` + UPDATE tasks SET status = ?, completed_at = ?, result = ? WHERE task_id = ? + `); + stmt.run( + "completed", + new Date(entry.timestamp).toISOString(), + entry.result ? JSON.stringify(entry.result) : null, + entry.taskId, + ); + + this.insertTaskEvent(entry.taskId, "TASK_COMPLETE", entry.timestamp, { + result: entry.result, + }); + } + + private executeTaskError(entry: EventLogEntry): void { + const stmt = this.db.prepare(` + UPDATE tasks SET status = ?, completed_at = ?, error = ?, attempt = attempt + 1 WHERE task_id = ? + `); + stmt.run( + "failed", + new Date(entry.timestamp).toISOString(), + entry.error ?? null, + entry.taskId, + ); + + this.insertTaskEvent(entry.taskId, "TASK_ERROR", entry.timestamp, { + error: entry.error, + }); + } + + private executeTaskCancelled(entry: EventLogEntry): void { + const stmt = this.db.prepare(` + UPDATE tasks SET status = ?, completed_at = ?, error = ? WHERE task_id = ? + `); + stmt.run( + "cancelled", + new Date(entry.timestamp).toISOString(), + entry.error ?? null, + entry.taskId, + ); + + this.insertTaskEvent(entry.taskId, "TASK_CANCELLED", entry.timestamp, { + error: entry.error, + }); + } + + private executeTaskProgress(entry: EventLogEntry): void { + const stmt = this.db.prepare(` + UPDATE tasks SET last_heartbeat_at = ? WHERE task_id = ? + `); + stmt.run(new Date(entry.timestamp).toISOString(), entry.taskId); + + this.insertTaskEvent(entry.taskId, "TASK_PROGRESS", entry.timestamp, { + ...entry.payload, + }); + } + + private executeTaskHeartbeat(entry: EventLogEntry): void { + // only update heartbeat, do NOT insert into task_events + const stmt = this.db.prepare(` + UPDATE tasks SET last_heartbeat_at = ? WHERE task_id = ? + `); + stmt.run(new Date(entry.timestamp).toISOString(), entry.taskId); + } + + private executeTaskCustom(entry: EventLogEntry): void { + const stmt = this.db.prepare(` + UPDATE tasks SET last_heartbeat_at = ? WHERE task_id = ? + `); + stmt.run(new Date(entry.timestamp).toISOString(), entry.taskId); + + this.insertTaskEvent(entry.taskId, "TASK_CUSTOM", entry.timestamp, { + ...entry.payload, + }); + } + + private insertTaskEvent( + taskId: string, + type: EventLogEntryType, + timestampMs: number, + payload?: Record, + ): void { + // get next sequence number for this task + const seqStmt = this.db.prepare(` + SELECT COALESCE(MAX(seq), 0) + 1 as nextSeq FROM task_events WHERE task_id = ? + `); + const { nextSeq } = seqStmt.get(taskId) as { nextSeq: number }; + + const eventStmt = this.db.prepare(` + INSERT INTO task_events (entry_id, task_id, seq, type, timestamp, payload) + VALUES (?, ?, ?, ?, ?, ?) + `); + eventStmt.run( + crypto.randomUUID(), + taskId, + nextSeq, + type, + new Date(timestampMs).toISOString(), + payload ? JSON.stringify(payload) : null, + ); + } + + private mapTaskRecord(record: SQLiteTaskRecord): Task { + return Task.fromRecord({ + id: record.task_id, + name: record.name, + idempotency_key: record.idempotency_key, + user_id: record.user_id, + task_type: record.type as "background" | "user", + status: record.status as TaskStatus, + input: record.input_data ?? "{}", + result: record.result, + error: record.error, + attempt: record.attempt, + created_at: record.created_at, + started_at: record.started_at, + completed_at: record.completed_at, + last_heartbeat_at: record.last_heartbeat_at, + execution_options: record.execution_options, + }); + } + + private mapTaskEventRecord(record: SQLiteTaskEventRecord): StoredEvent { + return { + id: record.entry_id, + taskId: record.task_id, + seq: record.seq, + type: record.type as StoredEvent["type"], + timestamp: new Date(record.timestamp), + payload: record.payload ? JSON.parse(record.payload) : null, + }; + } + + private async runMigrations(): Promise { + const __dirname = path.dirname(fileURLToPath(import.meta.url)); + const migrationsFolder = path.join(__dirname, "migrations"); + + if (!fs.existsSync(migrationsFolder)) { + // fallback, try to find migrations relative to the source + const srcMigrationsFolder = path.resolve( + process.cwd(), + "packages/taskflow/src/persistence/repository/sqlite/migrations", + ); + + if (fs.existsSync(srcMigrationsFolder)) { + const migrations = fs.readdirSync(srcMigrationsFolder).sort(); + for (const migration of migrations) { + const migrationContent = fs.readFileSync( + path.join(srcMigrationsFolder, migration), + "utf8", + ); + this.db.exec(migrationContent); + } + return; + } + + throw new Error( + `Migrations folder not found at ${migrationsFolder} or ${srcMigrationsFolder}`, + ); + } + + const migrations = fs.readdirSync(migrationsFolder).sort(); + + for (const migration of migrations) { + const migrationContent = fs.readFileSync( + path.join(migrationsFolder, migration), + "utf8", + ); + this.db.exec(migrationContent); + } + } +} diff --git a/packages/taskflow/src/persistence/repository/sqlite/index.ts b/packages/taskflow/src/persistence/repository/sqlite/index.ts new file mode 100644 index 00000000..6d86a2f5 --- /dev/null +++ b/packages/taskflow/src/persistence/repository/sqlite/index.ts @@ -0,0 +1,8 @@ +export { SQLiteConnector } from "./connector"; +export { SQLiteTaskRepository } from "./repository"; +export type { + SQLiteConfig, + SQLiteRepositoryConfig, + SQLiteTaskEventRecord, + SQLiteTaskRecord, +} from "./types"; diff --git a/packages/taskflow/src/persistence/repository/sqlite/migrations/001_create_tasks_table.sql b/packages/taskflow/src/persistence/repository/sqlite/migrations/001_create_tasks_table.sql new file mode 100644 index 00000000..5f913024 --- /dev/null +++ b/packages/taskflow/src/persistence/repository/sqlite/migrations/001_create_tasks_table.sql @@ -0,0 +1,42 @@ +CREATE TABLE IF NOT EXISTS tasks ( + task_id TEXT PRIMARY KEY, + name TEXT NOT NULL, + status TEXT NOT NULL CHECK (status IN ('created', 'pending', 'running', 'completed', 'failed', 'cancelled')), + type TEXT NOT NULL CHECK (type IN ('background', 'user')), + input_data TEXT, + idempotency_key TEXT NOT NULL, + user_id TEXT, + created_at TEXT NOT NULL, + started_at TEXT, + completed_at TEXT, + duration_ms INTEGER, + result_truncated INTEGER DEFAULT 0, + last_heartbeat_at TEXT DEFAULT CURRENT_TIMESTAMP, + result TEXT, + error TEXT, + attempt INTEGER DEFAULT 0, + execution_options TEXT +); + +-- unique constraint on idempotency key for active tasks only +CREATE UNIQUE INDEX IF NOT EXISTS idx_tasks_idempotency_active +ON tasks(idempotency_key) +WHERE status IN ('created', 'pending', 'running'); + +-- index for type filtering +CREATE INDEX IF NOT EXISTS idx_tasks_type ON tasks(type); + +-- index for checking if a task is alive +CREATE INDEX IF NOT EXISTS idx_tasks_alive_check +ON tasks(idempotency_key, last_heartbeat_at, status) +WHERE status = 'running'; + +-- index for finding stale tasks +CREATE INDEX IF NOT EXISTS idx_tasks_stale +ON tasks(status, last_heartbeat_at) +WHERE status = 'running'; + +-- index for pending task recovery +CREATE INDEX IF NOT EXISTS idx_tasks_pending_recovery +ON tasks(status, created_at) +WHERE status IN ('created', 'pending'); diff --git a/packages/taskflow/src/persistence/repository/sqlite/migrations/002_create_task_events_table.sql b/packages/taskflow/src/persistence/repository/sqlite/migrations/002_create_task_events_table.sql new file mode 100644 index 00000000..f3dde738 --- /dev/null +++ b/packages/taskflow/src/persistence/repository/sqlite/migrations/002_create_task_events_table.sql @@ -0,0 +1,19 @@ +CREATE TABLE IF NOT EXISTS task_events ( + entry_id TEXT PRIMARY KEY, + task_id TEXT NOT NULL, + seq INTEGER NOT NULL, + type TEXT NOT NULL, + timestamp TEXT NOT NULL, + created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, + payload TEXT, + + FOREIGN KEY (task_id) REFERENCES tasks(task_id) ON DELETE CASCADE +); + +-- unique constraint on task_id + seq +CREATE UNIQUE INDEX IF NOT EXISTS idx_task_events_unique_seq +ON task_events(task_id, seq); + +-- index for streaming events in order +CREATE INDEX IF NOT EXISTS idx_task_events_streaming +ON task_events(task_id, seq, created_at); diff --git a/packages/taskflow/src/persistence/repository/sqlite/repository.ts b/packages/taskflow/src/persistence/repository/sqlite/repository.ts new file mode 100644 index 00000000..74fd4a91 --- /dev/null +++ b/packages/taskflow/src/persistence/repository/sqlite/repository.ts @@ -0,0 +1,85 @@ +import type { IdempotencyKey, TaskId } from "@/core/branded"; +import type { EventLogEntry, Task } from "@/domain"; +import { + noopHooks, + TaskAttributes, + type TaskSystemHooks, +} from "@/observability"; +import type { StoredEvent, TaskRepository } from "../types"; +import { SQLiteConnector } from "./connector"; +import type { SQLiteRepositoryConfig } from "./types"; + +/** + * SQLite Task Repository + * + * Implements TaskRepository interface using SQLite for storage + */ +export class SQLiteTaskRepository implements TaskRepository { + readonly type = "sqlite" as const; + private connector: SQLiteConnector; + private hooks: TaskSystemHooks; + + constructor( + config: SQLiteRepositoryConfig, + hooks: TaskSystemHooks = noopHooks, + ) { + this.connector = new SQLiteConnector( + { + database: config.database ?? "./.taskflow/sqlite.db", + }, + hooks, + ); + this.hooks = hooks; + } + + get isInitialized(): boolean { + return this.connector.isInitialized; + } + + async initialize(): Promise { + await this.connector.initialize(); + + this.hooks.log({ + severity: "info", + message: "SQLite repository initialized", + attributes: { + [TaskAttributes.REPOSITORY_TYPE]: "sqlite", + }, + }); + } + + async executeBatch(entries: EventLogEntry[]): Promise { + await this.connector.executeBatch(entries); + } + + async findById(taskId: TaskId): Promise { + return this.connector.findTaskById(taskId); + } + + async findByIdempotencyKey( + idempotencyKey: IdempotencyKey, + ): Promise { + return this.connector.findTaskByIdempotencyKey(idempotencyKey); + } + + async findStaleTasks(staleThresholdMs: number): Promise { + return this.connector.findStaleTasks(staleThresholdMs); + } + + async getEvents(taskId: TaskId): Promise { + return this.connector.getTaskEvents(taskId); + } + + async healthCheck(): Promise { + try { + this.connector.findTaskById("__health__check__" as TaskId); + return true; + } catch { + return false; + } + } + + async close(): Promise { + await this.connector.close(); + } +} diff --git a/packages/taskflow/src/persistence/repository/sqlite/types.ts b/packages/taskflow/src/persistence/repository/sqlite/types.ts new file mode 100644 index 00000000..e957c1de --- /dev/null +++ b/packages/taskflow/src/persistence/repository/sqlite/types.ts @@ -0,0 +1,84 @@ +/** + * SQLite repository Types + * + * Types specific to the SQLite repository implementation + */ + +import type { TaskStatus, TaskType } from "@/core/types"; +import type { BaseRepositoryConfig } from "../types"; + +/** + * SQLite repository configuration + */ +export interface SQLiteRepositoryConfig extends BaseRepositoryConfig { + type: "sqlite"; + /** path to the SQLite database file */ + database: string; +} + +/** + * SQLite connector configuration + */ +export interface SQLiteConfig { + /** path to the SQLite database file */ + database: string; +} + +/** + * Raw SQLite task record from the database + * Column names use snake_case to match SQL conventions + */ +export interface SQLiteTaskRecord { + /** task id (primary_key) */ + task_id: string; + /** task name/template */ + name: string; + /** current status */ + status: TaskStatus; + /** task type: 'user' or 'background' */ + type: TaskType; + /** idempotency key for deduplication */ + idempotency_key: string; + /** user id (null for background tasks) */ + user_id: string | null; + /** JSON-stringified input data */ + input_data: string | null; + /** JSON-stringified execution options */ + execution_options: string | null; + /** ISO timestamp when created */ + created_at: string; + /** ISO timestamp when started */ + started_at: string | null; + /** ISO timestamp when completed */ + completed_at: string | null; + /** duration in milliseconds */ + duration_ms: number | null; + /** JSON-stringified result */ + result: string | null; + /** error message */ + error: string | null; + /** attempt count */ + attempt: number; + /** ISO timestamp of last heartbeat */ + last_heartbeat_at: string; +} + +/** + * Raw SQLite task event record from the database + */ +export interface SQLiteTaskEventRecord { + /** event entry id (primary_key) */ + entry_id: string; + /** task id (foreign_key) */ + task_id: string; + /** sequence number within the task */ + seq: number; + /** event type */ + type: TaskType; + /** ISO timestamp of the event */ + timestamp: string; + /** ISO timestamp when inserted */ + created_at: string; + /** JSON-stringified payload */ + payload: string | null; +} diff --git a/packages/taskflow/src/persistence/repository/types.ts b/packages/taskflow/src/persistence/repository/types.ts new file mode 100644 index 00000000..33902958 --- /dev/null +++ b/packages/taskflow/src/persistence/repository/types.ts @@ -0,0 +1,102 @@ +/** + * Repository types + * + * Defines the abstract repository interface that all persistence + * implementations must follow + */ + +import type { IdempotencyKey, TaskId } from "@/core/branded"; +import type { EventLogEntry, StoredEventType, Task } from "@/domain"; + +/** + * Supported repository types + */ +export type RepositoryType = "sqlite" | "lakebase"; + +/** + * Event stored in the database + */ +export interface StoredEvent { + /** unique event id */ + id: string; + /** task this event belongs to */ + taskId: string; + /** sequence number within the task */ + seq: number; + /** event type */ + type: StoredEventType; + /** when the event occurred */ + timestamp: Date; + /** event payload (parsed json) */ + payload: Record | null; +} + +/** + * Abstract repository interface + * + * All repository implementations must implement this interface + * The repository is responsible for: + * - Executing batches of events from the event log + * - Querying tasks by ID or idempotency key + * - Finding stale tasks for recovery + * - Retrieving task events for replay + */ +export interface TaskRepository { + /** repository type identifier */ + readonly type: RepositoryType; + /** whether the repository is initialized */ + readonly isInitialized: boolean; + /** + * Initialize the repository + * Creates tables if they don't exist + */ + initialize(): Promise; + + /** + * Execute a batch of event log entries + * Applies events to update task state in a transaction + */ + executeBatch(entries: EventLogEntry[]): Promise; + + /** + * Find a task by its ID + * @returns Task or null if not found + */ + findById(taskId: TaskId): Promise; + + /** + * Find a task by its idempotency key + * @returns Task or null if not found + */ + findByIdempotencyKey(idempotencyKey: IdempotencyKey): Promise; + + /** + * Find stale running tasks + * Tasks are stale if their last heartbeat is older than the threshold + */ + findStaleTasks(threshold: number): Promise; + + /** + * Get all events for a task + * Events are ordered by sequence number + */ + getEvents(taskId: TaskId): Promise; + + /** + * Check if the repository is healthy + * @returns true if the repository is healthy + */ + healthCheck(): Promise; + + /** + * Close the repository connection + */ + close(): Promise; +} + +/** + * Base repository configuration + */ +export interface BaseRepositoryConfig { + type: RepositoryType; +} diff --git a/packages/taskflow/src/persistence/types.ts b/packages/taskflow/src/persistence/types.ts new file mode 100644 index 00000000..23558fa6 --- /dev/null +++ b/packages/taskflow/src/persistence/types.ts @@ -0,0 +1,80 @@ +/** + * Persistence layer types + * + * Types for event log and persistence configuration + */ + +import type { EventLogEntry } from "@/domain"; + +/** + * configuration for event log + */ +export interface EventLogConfig { + /** path to the event log file */ + eventLogPath: string; + /** maximum size of a string log file in bytes before rotation */ + maxSizeBytesPerFile: number; + /** maximum age of a log file in milliseconds before rotation */ + maxAgePerFile: number; + /** interval in milliseconds to check for rotation */ + rotationInterval: number; + /** number of rotated files to retain */ + retentionCount: number; +} + +/** + * default event log configuration + */ +export const DEFAULT_EVENT_LOG_CONFIG: EventLogConfig = { + eventLogPath: "./.taskflow/event.log", + maxSizeBytesPerFile: 1024 * 1024 * 10, // 10MB + maxAgePerFile: 1000 * 60 * 60, // 1 hour + rotationInterval: 1000 * 60, // 1 minute + retentionCount: 5, +}; + +/** + * statistics about the event log + */ +export interface EventLogStats { + /** status information */ + status: { + /** whether the event log is initialized */ + initialized: boolean; + /** Path to the event log file */ + path: string; + }; + /** sequence tracking */ + sequence: { + /** current sequence number */ + current: number; + }; + /** Rotation information */ + rotation: { + /** number of rotation performed */ + count: number; + /** whether a rotation is currently in progress */ + isActive: boolean; + /** timestamp of the last rotation */ + lastAt?: number; + }; + + /** volume tracking */ + volume: { + /** total number of entries written */ + entriesWritten: number; + /** count of malformed entries skipped during reads */ + malformedSkipped?: number; + }; +} + +/** + * Event log entry with sequence number and checksum + * This is the format written to the WAL file + */ +export interface EventLogEvent extends EventLogEntry { + /** monotonically increasing sequence number */ + seq: number; + /** SHA-256 checksum of the canonicalized entry */ + checksum?: string; +} diff --git a/packages/taskflow/src/tests/persistence/event-log.test.ts b/packages/taskflow/src/tests/persistence/event-log.test.ts new file mode 100644 index 00000000..7b379df2 --- /dev/null +++ b/packages/taskflow/src/tests/persistence/event-log.test.ts @@ -0,0 +1,192 @@ +import fs from "node:fs/promises"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import type { EventLogEntry, TaskEvent } from "@/domain/events"; +import { EventLog } from "@/persistence/event-log"; + +describe("EventLog", () => { + let eventLog: EventLog; + const eventLogPath = "./test-event-log"; + + beforeEach(async () => { + eventLog = new EventLog({ eventLogPath, maxSizeBytesPerFile: 1024 }); + await eventLog.initialize(); + }); + + afterEach(async () => { + await eventLog.close(true); + }); + + it("should create event log file when initialized", async () => { + const fileExists = await fs + .access(eventLogPath) + .then(() => true) + .catch(() => false); + expect(fileExists).toBe(true); + }); + + it("should append entry to event log file", async () => { + const entry: EventLogEntry = { + timestamp: Date.now(), + taskId: "task-123", + type: "TASK_CREATED", + name: "test-task", + idempotencyKey: "idem-123", + userId: "user-123", + taskType: "user", + }; + + await eventLog.appendEntry(entry); + const content = await fs.readFile(eventLogPath, "utf8"); + const firstLine = content.split("\n")[0]; + const parsed = JSON.parse(firstLine); + + expect(parsed.type).toBe("TASK_CREATED"); + expect(parsed.taskId).toBe("task-123"); + expect(parsed.seq).toBe(1); + }); + + it("should check if rotation is needed", async () => { + let shouldRotate = await eventLog.shouldRotateEventLog(); + expect(shouldRotate).toBe(false); + + // add enough entries to exceed maxSizeBytesPerFile + for (let i = 0; i < 10; i++) { + await eventLog.appendEntry({ + timestamp: Date.now(), + taskId: `task-${i}`, + type: "TASK_CREATED", + name: "test-task", + idempotencyKey: `key-${i}`, + userId: "user-123", + taskType: "user", + input: { data: "padding".repeat(20) }, + }); + } + + shouldRotate = await eventLog.shouldRotateEventLog(); + expect(shouldRotate).toBe(true); + }); + + it("should compact rotated files removing heartbeats", async () => { + const base = { + taskId: "task-123", + name: "test-task", + idempotencyKey: "idem-123", + taskType: "user" as const, + userId: "user-123", + }; + + await eventLog.appendEntry({ + ...base, + timestamp: Date.now(), + type: "TASK_CREATED", + }); + await eventLog.appendEntry({ + ...base, + timestamp: Date.now(), + type: "TASK_HEARTBEAT", + }); + await eventLog.appendEntry({ + ...base, + timestamp: Date.now(), + type: "TASK_HEARTBEAT", + }); + await eventLog.appendEntry({ + ...base, + timestamp: Date.now(), + type: "TASK_HEARTBEAT", + }); + + await eventLog.compactRotatedFile(eventLogPath); + const content = await fs.readFile(eventLogPath, "utf8"); + const lines = content.split("\n").filter(Boolean); + + // only TASK_CREATED should remain + expect(lines.length).toBe(1); + }); + + it("should rotate event log file", async () => { + const base = { + taskId: "task-123", + name: "test-task", + idempotencyKey: "idem-123", + taskType: "user" as const, + userId: "user-123", + }; + + // add enough entries to exceed maxSizeBytesPerFile + for (let i = 0; i < 10; i++) { + await eventLog.appendEntry({ + ...base, + timestamp: Date.now(), + type: "TASK_CREATED", + input: { data: "padding".repeat(20) }, + }); + } + + await eventLog.performRotation(); + + // check rotated file exists + const rotatedExists = await fs + .access(`${eventLogPath}.1`) + .then(() => true) + .catch(() => false); + expect(rotatedExists).toBe(true); + }); + + it("should save and restore sequence number", async () => { + expect(eventLog.currentSeq).toBe(0); + + await eventLog.appendEntry( + { + timestamp: Date.now(), + taskId: "task-123", + type: "TASK_CREATED", + name: "test-task", + idempotencyKey: "idem-123", + userId: "user-123", + taskType: "user", + }, + true, + ); + + expect(eventLog.currentSeq).toBe(1); + + const savedSeq = await fs.readFile(`${eventLogPath}.checkpoint`, "utf8"); + expect(savedSeq).toBe("1"); + }); + + it("should append TaskEvent and convert to EventLogEntry", async () => { + const event = { + id: "evt-123", + taskId: "task-123", + type: "created" as const, + input: { test: "data" }, + taskType: "user" as const, + name: "test-task", + idempotencyKey: "idem-123", + userId: "user-123", + } as TaskEvent; + + await eventLog.appendEvent(event); + + const content = await fs.readFile(eventLogPath, "utf8"); + const lines = content.split("\n").filter(Boolean); + expect(lines.length).toBe(1); + + const parsed = JSON.parse(lines[0]); + expect(parsed.type).toBe("TASK_CREATED"); + expect(parsed.input).toEqual({ test: "data" }); + }); + + it("should return stats", () => { + const stats = eventLog.getStats(); + + expect(stats.status.initialized).toBe(true); + expect(stats.status.path).toBe(eventLogPath); + expect(stats.sequence.current).toBe(0); + expect(stats.rotation.count).toBe(0); + expect(stats.rotation.isActive).toBe(false); + expect(stats.volume.entriesWritten).toBe(0); + }); +}); diff --git a/packages/taskflow/src/tests/persistence/repository/lakebase.test.ts b/packages/taskflow/src/tests/persistence/repository/lakebase.test.ts new file mode 100644 index 00000000..ac30690c --- /dev/null +++ b/packages/taskflow/src/tests/persistence/repository/lakebase.test.ts @@ -0,0 +1,148 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import type { EventLogEntry } from "@/domain/events"; +import { LakebaseTaskRepository } from "@/persistence/repository/lakebase/repository"; +import type { LakebaseConnector } from "@/persistence/repository/lakebase/types"; + +describe("LakebaseTaskRepository", () => { + let repository: LakebaseTaskRepository; + let mockConnector: LakebaseConnector; + let queryResults: Map; + + beforeEach(() => { + queryResults = new Map(); + + // create a properly typed mock query function + const mockQueryFn = vi + .fn() + .mockImplementation( + async >( + sql: string, + _params?: unknown[], + ): Promise<{ rows: T[] }> => { + // handle sequence query + if (sql.includes("MAX(seq)")) { + return { rows: [{ nextseq: 1 }] as unknown as T[] }; + } + // return configured results or empty + const key = sql.trim().split(" ")[0]; + return { rows: (queryResults.get(key) ?? []) as T[] }; + }, + ); + + // create mock transaction function + const mockTransactionFn = vi + .fn() + .mockImplementation( + async ( + fn: (client: LakebaseConnector) => Promise, + ): Promise => { + return fn(mockConnector); + }, + ); + + mockConnector = { + query: mockQueryFn, + transaction: mockTransactionFn, + healthCheck: vi.fn().mockResolvedValue(true), + close: vi.fn().mockResolvedValue(undefined), + } as unknown as LakebaseConnector; + + repository = new LakebaseTaskRepository({ + type: "lakebase", + connector: mockConnector, + }); + }); + + describe("initialization", () => { + it("should initialize and run migrations", async () => { + await repository.initialize(); + expect(repository.isInitialized).toBe(true); + + // should have called query for CREATE TABLE statements + expect(mockConnector.query).toHaveBeenCalled(); + }); + }); + + describe("executeBatch", () => { + it("should execute entries in a transaction", async () => { + await repository.initialize(); + + const entries: EventLogEntry[] = [ + { + type: "TASK_CREATED", + taskId: "task-001", + name: "test-task", + idempotencyKey: "idem-001", + userId: "user-123", + timestamp: Date.now(), + taskType: "user", + }, + ]; + + await repository.executeBatch(entries); + expect(mockConnector.transaction).toHaveBeenCalled(); + }); + + it("should skip empty batch", async () => { + await repository.initialize(); + await repository.executeBatch([]); + expect(mockConnector.transaction).not.toHaveBeenCalled(); + }); + }); + + describe("findById", () => { + it("should query and return task", async () => { + await repository.initialize(); + + queryResults.set("SELECT", [ + { + task_id: "task-001", + name: "test-task", + status: "created", + type: "user", + idempotency_key: "idem-001", + user_id: "user-123", + input_data: "{}", + created_at: new Date().toISOString(), + last_heartbeat_at: new Date().toISOString(), + attempt: 0, + execution_options: null, + result: null, + error: null, + started_at: null, + completed_at: null, + }, + ]); + + const task = await repository.findById("task-001" as any); + expect(task).not.toBeNull(); + expect(task?.id).toBe("task-001"); + }); + + it("should return null for non-existent task", async () => { + await repository.initialize(); + queryResults.set("SELECT", []); + + const task = await repository.findById("non-existent" as any); + expect(task).toBeNull(); + }); + }); + + describe("healthCheck", () => { + it("should delegate to connector", async () => { + const result = await repository.healthCheck(); + expect(result).toBe(true); + expect(mockConnector.healthCheck).toHaveBeenCalled(); + }); + }); + + describe("close", () => { + it("should close connector and update state", async () => { + await repository.initialize(); + await repository.close(); + + expect(repository.isInitialized).toBe(false); + expect(mockConnector.close).toHaveBeenCalled(); + }); + }); +}); diff --git a/packages/taskflow/src/tests/persistence/repository/sqlite.test.ts b/packages/taskflow/src/tests/persistence/repository/sqlite.test.ts new file mode 100644 index 00000000..1e60f488 --- /dev/null +++ b/packages/taskflow/src/tests/persistence/repository/sqlite.test.ts @@ -0,0 +1,347 @@ +import fs from "node:fs"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { idempotencyKey, taskId } from "@/core/branded"; +import type { EventLogEntry } from "@/domain/events"; +import { SQLiteConnector } from "@/persistence/repository/sqlite/connector"; + +describe("SQLiteConnector", () => { + let connector: SQLiteConnector; + const testDbPath = "./test-sqlite-connector.db"; + + beforeEach(async () => { + connector = new SQLiteConnector({ database: testDbPath }); + await connector.initialize(); + }); + + afterEach(async () => { + await connector.close(); + + // cleanup database files + for (const suffix of ["", "-wal", "-shm"]) { + if (fs.existsSync(`${testDbPath}${suffix}`)) { + fs.unlinkSync(`${testDbPath}${suffix}`); + } + } + }); + + describe("initialization", () => { + it("should initialize the connector", () => { + expect(connector.isInitialized).toBe(true); + }); + + it("should close the connector", async () => { + await connector.close(); + expect(connector.isInitialized).toBe(false); + }); + }); + + describe("executeTaskCreated", () => { + it("should insert a task into tasks table", async () => { + const entry: EventLogEntry = { + type: "TASK_CREATED", + taskId: "task-001", + name: "test-task", + idempotencyKey: "idem-001", + userId: "user-123", + timestamp: Date.now(), + input: { foo: "bar" }, + taskType: "user", + }; + + await connector.executeBatch([entry]); + + const task = connector.findTaskById(taskId("task-001")); + expect(task).not.toBeNull(); + expect(task?.name).toBe("test-task"); + expect(task?.status).toBe("created"); + }); + + it("should insert task event with seq 1", async () => { + const entry: EventLogEntry = { + type: "TASK_CREATED", + taskId: "task-001", + name: "test-task", + idempotencyKey: "idem-001", + userId: "user-123", + timestamp: Date.now(), + taskType: "user", + }; + + await connector.executeBatch([entry]); + + const events = connector.getTaskEvents(taskId("task-001")); + expect(events).toHaveLength(1); + expect(events[0].type).toBe("TASK_CREATED"); + expect(events[0].seq).toBe(1); + }); + }); + + describe("executeTaskRunning", () => { + it("should update task status to running", async () => { + const now = Date.now(); + const entries: EventLogEntry[] = [ + { + type: "TASK_CREATED", + taskId: "task-001", + name: "test-task", + idempotencyKey: "idem-001", + userId: "user-123", + timestamp: now, + taskType: "user", + }, + { + type: "TASK_START", + taskId: "task-001", + name: "test-task", + idempotencyKey: "idem-001", + userId: "user-123", + timestamp: now + 100, + taskType: "user", + }, + ]; + + await connector.executeBatch(entries); + + const task = connector.findTaskById(taskId("task-001")); + expect(task?.status).toBe("running"); + expect(task?.startedAt).not.toBeNull(); + }); + }); + + describe("executeTaskCompleted", () => { + it("should update task status to completed with result", async () => { + const now = Date.now(); + const entries: EventLogEntry[] = [ + { + type: "TASK_CREATED", + taskId: "task-001", + name: "test-task", + idempotencyKey: "idem-001", + userId: "user-123", + timestamp: now, + taskType: "user", + }, + { + type: "TASK_START", + taskId: "task-001", + name: "test-task", + idempotencyKey: "idem-001", + userId: "user-123", + timestamp: now + 100, + taskType: "user", + }, + { + type: "TASK_COMPLETE", + taskId: "task-001", + name: "test-task", + idempotencyKey: "idem-001", + userId: "user-123", + timestamp: now + 1000, + taskType: "user", + result: { success: true }, + }, + ]; + + await connector.executeBatch(entries); + + const task = connector.findTaskById(taskId("task-001")); + expect(task?.status).toBe("completed"); + expect(task?.result).toEqual({ success: true }); + }); + }); + + describe("executeTaskHeartbeat", () => { + it("should update heartbeat but NOT insert task event", async () => { + const now = Date.now(); + const entries: EventLogEntry[] = [ + { + type: "TASK_CREATED", + taskId: "task-001", + name: "test-task", + idempotencyKey: "idem-001", + userId: "user-123", + timestamp: now, + taskType: "user", + }, + { + type: "TASK_START", + taskId: "task-001", + name: "test-task", + idempotencyKey: "idem-001", + userId: "user-123", + timestamp: now + 100, + taskType: "user", + }, + { + type: "TASK_HEARTBEAT", + taskId: "task-001", + name: "test-task", + idempotencyKey: "idem-001", + userId: "user-123", + timestamp: now + 500, + taskType: "user", + }, + ]; + + await connector.executeBatch(entries); + + // only 2 events, created and start + const events = connector.getTaskEvents(taskId("task-001")); + expect(events).toHaveLength(2); + + // heartbeat is not stored in task_events table + const task = connector.findTaskById(taskId("task-001")); + expect(task?.lastHeartbeatAt).toBeDefined(); + }); + }); + + describe("findStaleTasks", () => { + it("should find stale running tasks", async () => { + const oldTimestamp = Date.now() - 60000; // 1 minute ago + + const entries: EventLogEntry[] = [ + { + type: "TASK_CREATED", + taskId: "task-stale", + name: "stale-task", + idempotencyKey: "idem-stale", + userId: "user-123", + timestamp: oldTimestamp, + taskType: "user", + }, + { + type: "TASK_START", + taskId: "task-stale", + name: "stale-task", + idempotencyKey: "idem-stale", + userId: "user-123", + timestamp: oldTimestamp + 100, + taskType: "user", + }, + ]; + + await connector.executeBatch(entries); + + const staleTasks = connector.findStaleTasks(30000); + expect(staleTasks).toHaveLength(1); + expect(staleTasks[0]?.id).toBe("task-stale"); + }); + + it("should not return fresh running tasks", async () => { + const now = Date.now(); + + const entries: EventLogEntry[] = [ + { + type: "TASK_CREATED", + taskId: "task-fresh", + name: "fresh-task", + idempotencyKey: "idem-fresh", + userId: "user-123", + timestamp: now, + taskType: "user", + }, + { + type: "TASK_START", + taskId: "task-fresh", + name: "fresh-task", + idempotencyKey: "idem-fresh", + userId: "user-123", + timestamp: now + 100, + taskType: "user", + }, + ]; + + await connector.executeBatch(entries); + + const staleTasks = connector.findStaleTasks(30000); + expect(staleTasks).toHaveLength(0); + }); + }); + + describe("query methods", () => { + beforeEach(async () => { + await connector.executeBatch([ + { + type: "TASK_CREATED", + taskId: "task-001", + name: "test-task", + idempotencyKey: "idem-001", + userId: "user-123", + timestamp: Date.now(), + taskType: "user", + }, + ]); + }); + + it("should find task by id", () => { + const task = connector.findTaskById(taskId("task-001")); + expect(task).not.toBeNull(); + expect(task?.id).toBe("task-001"); + }); + + it("should return null for non-existent task", () => { + const task = connector.findTaskById(taskId("non-existent")); + expect(task).toBeNull(); + }); + + it("should find task by idempotency key", () => { + const task = connector.findTaskByIdempotencyKey( + idempotencyKey("idem-001"), + ); + expect(task).not.toBeNull(); + expect(task?.idempotencyKey).toBe("idem-001"); + }); + }); + + describe("getTaskEvents", () => { + it("should return events in order by seq", async () => { + const now = Date.now(); + const entries: EventLogEntry[] = [ + { + type: "TASK_CREATED", + taskId: "task-001", + name: "t", + idempotencyKey: "i", + userId: "u", + timestamp: now, + taskType: "user", + }, + { + type: "TASK_START", + taskId: "task-001", + name: "t", + idempotencyKey: "i", + userId: "u", + timestamp: now + 100, + taskType: "user", + }, + { + type: "TASK_PROGRESS", + taskId: "task-001", + name: "t", + idempotencyKey: "i", + userId: "u", + timestamp: now + 200, + taskType: "user", + payload: { step: 1 }, + }, + { + type: "TASK_COMPLETE", + taskId: "task-001", + name: "t", + idempotencyKey: "i", + userId: "u", + timestamp: now + 300, + taskType: "user", + result: { done: true }, + }, + ]; + + await connector.executeBatch(entries); + + const events = connector.getTaskEvents(taskId("task-001")); + expect(events).toHaveLength(4); + expect(events.map((e) => e.seq)).toEqual([1, 2, 3, 4]); + }); + }); +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 371f9e9d..e8b46859 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -509,10 +509,22 @@ importers: packages/taskflow: dependencies: + '@types/better-sqlite3': + specifier: ^7.6.13 + version: 7.6.13 + better-sqlite3: + specifier: ^12.6.2 + version: 12.6.2 json-canonicalize: specifier: ^2.0.0 version: 2.0.0 + pg: + specifier: ^8.16.3 + version: 8.16.3 devDependencies: + '@types/pg': + specifier: ^8.15.6 + version: 8.15.6 vitest: specifier: ^3.2.4 version: 3.2.4(@types/debug@4.1.12)(@types/node@24.10.1)(jiti@2.6.1)(jsdom@27.0.0(bufferutil@4.0.9)(postcss@8.5.6))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.20.6)(yaml@2.8.1) @@ -4305,6 +4317,9 @@ packages: '@types/babel__traverse@7.28.0': resolution: {integrity: sha512-8PvcXf70gTDZBgt9ptxJ8elBeBjcLOAcOtoO/mPJjtji1+CdGbHgm77om1GrsPxsiE+uXIpNSK64UYaIwQXd4Q==} + '@types/better-sqlite3@7.6.13': + resolution: {integrity: sha512-NMv9ASNARoKksWtsq/SHakpYAYnhBrQgGD8zkLYk/jaK8jUGn08CfEdTRgYhMypUQAfzSP8W6gNLe0q19/t4VA==} + '@types/body-parser@1.19.6': resolution: {integrity: sha512-HLFeCYgz89uk22N5Qg3dvGvsv46B8GLvKKo1zKG4NybA8U2DiEO3w9lqGg29t/tfLRJpJ6iQxnVw4OnB7MoM9g==} @@ -5101,6 +5116,10 @@ packages: before-after-hook@4.0.0: resolution: {integrity: sha512-q6tR3RPqIB1pMiTRMFcZwuG5T8vwp+vUvEG0vuI6B+Rikh5BfPp2fQ82c925FOs+b0lcFQ8CFrL+KbilfZFhOQ==} + better-sqlite3@12.6.2: + resolution: {integrity: sha512-8VYKM3MjCa9WcaSAI3hzwhmyHVlH8tiGFwf0RlTsZPWJ1I5MkzjiudCo4KC4DxOaL/53A5B1sI/IbldNFDbsKA==} + engines: {node: 20.x || 22.x || 23.x || 24.x || 25.x} + bidi-js@1.0.3: resolution: {integrity: sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw==} @@ -5114,6 +5133,9 @@ packages: resolution: {integrity: sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==} engines: {node: '>=8'} + bindings@1.5.0: + resolution: {integrity: sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==} + birpc@2.6.1: resolution: {integrity: sha512-LPnFhlDpdSH6FJhJyn4M0kFO7vtQ5iPw24FnG0y21q09xC7e8+1LeR31S1MAIrDAHp4m7aas4bEkTDTvMAtebQ==} @@ -5314,6 +5336,9 @@ packages: resolution: {integrity: sha512-TQMmc3w+5AxjpL8iIiwebF73dRDF4fBIieAqGn9RGCWaEVwQ6Fb2cGe31Yns0RRIzii5goJ1Y7xbMwo1TxMplw==} engines: {node: '>= 20.19.0'} + chownr@1.1.4: + resolution: {integrity: sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==} + chrome-trace-event@1.0.4: resolution: {integrity: sha512-rNjApaLzuwaOTjCiT8lSDdGN1APCiqkChLMJxJPWLunPAt5fy8xgU9/jNOchV84wfIxrA0lRQB7oCT8jrn/wrQ==} engines: {node: '>=6.0'} @@ -6298,6 +6323,9 @@ packages: resolution: {integrity: sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==} engines: {node: '>= 0.8'} + end-of-stream@1.4.5: + resolution: {integrity: sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==} + enhanced-resolve@5.18.3: resolution: {integrity: sha512-d4lC8xfavMeBjzGr2vECC3fsGXziXZQyJxD868h2M/mBI3PwAuODxAkLkq5HYuvrPYcUtiLzsTo8U3PgX3Ocww==} engines: {node: '>=10.13.0'} @@ -6509,6 +6537,10 @@ packages: resolution: {integrity: sha512-VyhnebXciFV2DESc+p6B+y0LjSm0krU4OgJN44qFAhBY0TJ+1V61tYD2+wHusZ6F9n5K+vl8k0sTy7PEfV4qpg==} engines: {node: '>=16.17'} + expand-template@2.0.3: + resolution: {integrity: sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==} + engines: {node: '>=6'} + expand-tilde@2.0.2: resolution: {integrity: sha512-A5EmesHW6rfnZ9ysHQjPdJRni0SRar0tjtG5MNtm9n5TUvsYU8oozprtRD4AqHxcZWWlVuAmQo2nWKfN9oyjTw==} engines: {node: '>=0.10.0'} @@ -6598,6 +6630,9 @@ packages: peerDependencies: webpack: ^4.0.0 || ^5.0.0 + file-uri-to-path@1.0.0: + resolution: {integrity: sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==} + filing-cabinet@5.0.3: resolution: {integrity: sha512-PlPcMwVWg60NQkhvfoxZs4wEHjhlOO/y7OAm4sKM60o1Z9nttRY4mcdQxp/iZ+kg/Vv6Hw1OAaTbYVM9DA9pYg==} engines: {node: '>=18'} @@ -6709,6 +6744,9 @@ packages: resolution: {integrity: sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==} engines: {node: '>= 0.6'} + fs-constants@1.0.0: + resolution: {integrity: sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==} + fs-extra@11.3.2: resolution: {integrity: sha512-Xr9F6z6up6Ws+NjzMCZc6WXg2YFRlrLP9NQDO3VQrWrfiojdhS56TzueT88ze0uBdCTwEIhQ3ptnmKeWGFAe0A==} engines: {node: '>=14.14'} @@ -6806,6 +6844,9 @@ packages: git-url-parse@16.1.0: resolution: {integrity: sha512-cPLz4HuK86wClEW7iDdeAKcCVlWXmrLpb2L+G9goW0Z1dtpNS6BXXSOckUTlJT/LDQViE1QZKstNORzHsLnobw==} + github-from-package@0.0.0: + resolution: {integrity: sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==} + github-slugger@1.5.0: resolution: {integrity: sha512-wIh+gKBI9Nshz2o46B0B3f5k/W+WI9ZAv6y5Dn5WJ5SK1t0TnDimB4WE5rmTD05ZAIn8HALCZVmCsvj0w0v0lw==} @@ -8218,6 +8259,9 @@ packages: resolution: {integrity: sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==} engines: {node: '>=16 || 14 >=14.17'} + mkdirp-classic@0.5.3: + resolution: {integrity: sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==} + mkdirp@0.3.0: resolution: {integrity: sha512-OHsdUcVAQ6pOtg5JYWpCBo9W/GySVuwvP9hueRMW7UqshC0tbfzLv8wjySTPm3tfUZ/21CE9E1pJagOA91Pxew==} deprecated: Legacy versions of mkdirp are no longer supported. Please update to mkdirp 1.x. (Note that the API surface has changed to use Promises in 1.x.) @@ -8272,6 +8316,9 @@ packages: nanospinner@1.2.2: resolution: {integrity: sha512-Zt/AmG6qRU3e+WnzGGLuMCEAO/dAu45stNbHY223tUxldaDAeE+FxSPsd9Q+j+paejmm0ZbrNVs5Sraqy3dRxA==} + napi-build-utils@2.0.0: + resolution: {integrity: sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==} + natural-compare@1.4.0: resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==} @@ -8303,6 +8350,10 @@ packages: no-case@3.0.4: resolution: {integrity: sha512-fgAN3jGAh+RoxUGZHTSOLJIqUc2wmoBwGR4tbpNAKmmovFoWq0OdRkb0VkldReO2a2iBT/OEulG9XSUc10r3zg==} + node-abi@3.87.0: + resolution: {integrity: sha512-+CGM1L1CgmtheLcBuleyYOn7NWPVu0s0EJH2C4puxgEZb9h8QpR9G2dBfZJOAUhi7VQxuBPMd0hiISWcTyiYyQ==} + engines: {node: '>=10'} + node-domexception@1.0.0: resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==} engines: {node: '>=10.5.0'} @@ -9129,6 +9180,11 @@ packages: resolution: {integrity: sha512-9ZhXKM/rw350N1ovuWHbGxnGh/SNJ4cnxHiM0rxE4VN41wsg8P8zWn9hv/buK00RP4WvlOyr/RBDiptyxVbkZQ==} engines: {node: '>=0.10.0'} + prebuild-install@7.1.3: + resolution: {integrity: sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==} + engines: {node: '>=10'} + hasBin: true + precinct@12.2.0: resolution: {integrity: sha512-NFBMuwIfaJ4SocE9YXPU/n4AcNSoFMVFjP72nvl3cx69j/ke61/hPOWFREVxLkFhhEGnA8ZuVfTqJBa+PK3b5w==} engines: {node: '>=18'} @@ -9200,6 +9256,9 @@ packages: engines: {node: '>=18'} hasBin: true + pump@3.0.3: + resolution: {integrity: sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA==} + punycode.js@2.3.1: resolution: {integrity: sha512-uxFIHU0YlHYhDQtV4R9J6a52SLx28BCjT+4ieh7IGbgwVJWO+km431c4yRlREUAsAmt/uMjQUyQHNEPf0M39CA==} engines: {node: '>=6'} @@ -9836,6 +9895,12 @@ packages: resolution: {integrity: sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==} engines: {node: '>=14'} + simple-concat@1.0.1: + resolution: {integrity: sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==} + + simple-get@4.0.1: + resolution: {integrity: sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==} + sirv@2.0.4: resolution: {integrity: sha512-94Bdh3cC2PKrbgSOUqTiGPWVZeSiXfKOVZNJniWoqrWrRkB1CJzBU3NEbiTsPcYy1lDsANA/THzS+9WBiy5nfQ==} engines: {node: '>= 10'} @@ -10110,6 +10175,13 @@ packages: resolution: {integrity: sha512-g9ljZiwki/LfxmQADO3dEY1CbpmXT5Hm2fJ+QaGKwSXUylMybePR7/67YW7jOrrvjEgL1Fmz5kzyAjWVWLlucg==} engines: {node: '>=6'} + tar-fs@2.1.4: + resolution: {integrity: sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==} + + tar-stream@2.2.0: + resolution: {integrity: sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==} + engines: {node: '>=6'} + terser-webpack-plugin@5.3.16: resolution: {integrity: sha512-h9oBFCWrq78NyWWVcSwZarJkZ01c2AyGrzs1crmHZO3QUg9D61Wu4NPjBy69n7JqylFF5y+CsUZYmYEIZ3mR+Q==} engines: {node: '>= 10.13.0'} @@ -10298,6 +10370,9 @@ packages: engines: {node: '>=18.0.0'} hasBin: true + tunnel-agent@0.6.0: + resolution: {integrity: sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==} + turbo-darwin-64@2.6.1: resolution: {integrity: sha512-Dm0HwhyZF4J0uLqkhUyCVJvKM9Rw7M03v3J9A7drHDQW0qAbIGBrUijQ8g4Q9Cciw/BXRRd8Uzkc3oue+qn+ZQ==} cpu: [x64] @@ -15793,6 +15868,10 @@ snapshots: dependencies: '@babel/types': 7.28.5 + '@types/better-sqlite3@7.6.13': + dependencies: + '@types/node': 24.10.1 + '@types/body-parser@1.19.6': dependencies: '@types/connect': 3.4.38 @@ -16798,6 +16877,11 @@ snapshots: before-after-hook@4.0.0: {} + better-sqlite3@12.6.2: + dependencies: + bindings: 1.5.0 + prebuild-install: 7.1.3 + bidi-js@1.0.3: dependencies: require-from-string: 2.0.2 @@ -16808,6 +16892,10 @@ snapshots: binary-extensions@2.3.0: {} + bindings@1.5.0: + dependencies: + file-uri-to-path: 1.0.0 + birpc@2.6.1: {} bl@4.1.0: @@ -17075,6 +17163,8 @@ snapshots: dependencies: readdirp: 5.0.0 + chownr@1.1.4: {} + chrome-trace-event@1.0.4: {} ci-info@3.9.0: {} @@ -18078,6 +18168,10 @@ snapshots: encodeurl@2.0.0: {} + end-of-stream@1.4.5: + dependencies: + once: 1.4.0 + enhanced-resolve@5.18.3: dependencies: graceful-fs: 4.2.11 @@ -18344,6 +18438,8 @@ snapshots: signal-exit: 4.1.0 strip-final-newline: 3.0.0 + expand-template@2.0.3: {} + expand-tilde@2.0.2: dependencies: homedir-polyfill: 1.0.3 @@ -18457,6 +18553,8 @@ snapshots: schema-utils: 3.3.0 webpack: 5.103.0 + file-uri-to-path@1.0.0: {} + filing-cabinet@5.0.3: dependencies: app-module-path: 2.2.0 @@ -18567,6 +18665,8 @@ snapshots: fresh@0.5.2: {} + fs-constants@1.0.0: {} + fs-extra@11.3.2: dependencies: graceful-fs: 4.2.11 @@ -18702,6 +18802,8 @@ snapshots: dependencies: git-up: 8.1.1 + github-from-package@0.0.0: {} + github-slugger@1.5.0: {} glob-parent@5.1.2: @@ -20491,6 +20593,8 @@ snapshots: minipass@7.1.2: {} + mkdirp-classic@0.5.3: {} + mkdirp@0.3.0: {} mlly@1.8.0: @@ -20537,6 +20641,8 @@ snapshots: dependencies: picocolors: 1.1.1 + napi-build-utils@2.0.0: {} + natural-compare@1.4.0: {} negotiator@0.6.3: {} @@ -20561,6 +20667,10 @@ snapshots: lower-case: 2.0.2 tslib: 2.8.1 + node-abi@3.87.0: + dependencies: + semver: 7.7.3 + node-domexception@1.0.0: {} node-emoji@2.2.0: @@ -21468,6 +21578,21 @@ snapshots: dependencies: xtend: 4.0.2 + prebuild-install@7.1.3: + dependencies: + detect-libc: 2.1.2 + expand-template: 2.0.3 + github-from-package: 0.0.0 + minimist: 1.2.8 + mkdirp-classic: 0.5.3 + napi-build-utils: 2.0.0 + node-abi: 3.87.0 + pump: 3.0.3 + rc: 1.2.8 + simple-get: 4.0.1 + tar-fs: 2.1.4 + tunnel-agent: 0.6.0 + precinct@12.2.0: dependencies: '@dependents/detective-less': 5.0.1 @@ -21576,6 +21701,11 @@ snapshots: picocolors: 1.1.1 sade: 1.8.1 + pump@3.0.3: + dependencies: + end-of-stream: 1.4.5 + once: 1.4.0 + punycode.js@2.3.1: {} punycode@2.3.1: {} @@ -22396,6 +22526,14 @@ snapshots: signal-exit@4.1.0: {} + simple-concat@1.0.1: {} + + simple-get@4.0.1: + dependencies: + decompress-response: 6.0.0 + once: 1.4.0 + simple-concat: 1.0.1 + sirv@2.0.4: dependencies: '@polka/url': 1.0.0-next.29 @@ -22667,6 +22805,21 @@ snapshots: tapable@2.3.0: {} + tar-fs@2.1.4: + dependencies: + chownr: 1.1.4 + mkdirp-classic: 0.5.3 + pump: 3.0.3 + tar-stream: 2.2.0 + + tar-stream@2.2.0: + dependencies: + bl: 4.1.0 + end-of-stream: 1.4.5 + fs-constants: 1.0.0 + inherits: 2.0.4 + readable-stream: 3.6.2 + terser-webpack-plugin@5.3.16(webpack@5.103.0): dependencies: '@jridgewell/trace-mapping': 0.3.31 @@ -22818,6 +22971,10 @@ snapshots: optionalDependencies: fsevents: 2.3.3 + tunnel-agent@0.6.0: + dependencies: + safe-buffer: 5.2.1 + turbo-darwin-64@2.6.1: optional: true diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 3b88e350..f2847d8d 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -2,3 +2,5 @@ packages: - "packages/*" - "apps/*" - "docs" +onlyBuiltDependencies: + - better-sqlite3 From 0d99fb5fbe6e459fafdd00309787ff8f1b20577f Mon Sep 17 00:00:00 2001 From: Ditadi Date: Tue, 27 Jan 2026 16:50:29 +0000 Subject: [PATCH 09/13] feat(taskflow): execution layer with orchestrator, executor and recovery --- packages/taskflow/src/domain/handler.ts | 8 +- packages/taskflow/src/domain/task.ts | 2 +- packages/taskflow/src/execution/executor.ts | 458 +++++++++ packages/taskflow/src/execution/index.ts | 28 + packages/taskflow/src/execution/recovery.ts | 438 ++++++++ packages/taskflow/src/execution/system.ts | 579 +++++++++++ packages/taskflow/src/execution/types.ts | 374 +++++++ packages/taskflow/src/flush/flush-manager.ts | 333 ++++++ .../taskflow/src/flush/flush-worker-entry.ts | 103 ++ packages/taskflow/src/flush/flush-worker.ts | 364 +++++++ packages/taskflow/src/flush/index.ts | 12 + packages/taskflow/src/flush/types.ts | 131 +++ packages/taskflow/src/guard/types.ts | 2 +- packages/taskflow/src/index.test.ts | 8 - packages/taskflow/src/index.ts | 196 +++- .../taskflow/src/persistence/event-log.ts | 117 +-- .../repository/lakebase/repository.ts | 14 +- .../src/tests/execution/executor.test.ts | 592 +++++++++++ .../src/tests/execution/recovery.test.ts | 525 ++++++++++ .../src/tests/execution/system.test.ts | 448 ++++++++ .../taskflow/src/tests/flush/flush.test.ts | 960 ++++++++++++++++++ 21 files changed, 5592 insertions(+), 100 deletions(-) create mode 100644 packages/taskflow/src/execution/executor.ts create mode 100644 packages/taskflow/src/execution/index.ts create mode 100644 packages/taskflow/src/execution/recovery.ts create mode 100644 packages/taskflow/src/execution/system.ts create mode 100644 packages/taskflow/src/execution/types.ts create mode 100644 packages/taskflow/src/flush/flush-manager.ts create mode 100644 packages/taskflow/src/flush/flush-worker-entry.ts create mode 100644 packages/taskflow/src/flush/flush-worker.ts create mode 100644 packages/taskflow/src/flush/index.ts create mode 100644 packages/taskflow/src/flush/types.ts delete mode 100644 packages/taskflow/src/index.test.ts create mode 100644 packages/taskflow/src/tests/execution/executor.test.ts create mode 100644 packages/taskflow/src/tests/execution/recovery.test.ts create mode 100644 packages/taskflow/src/tests/execution/system.test.ts create mode 100644 packages/taskflow/src/tests/flush/flush.test.ts diff --git a/packages/taskflow/src/domain/handler.ts b/packages/taskflow/src/domain/handler.ts index 374f5d88..4a8bccbb 100644 --- a/packages/taskflow/src/domain/handler.ts +++ b/packages/taskflow/src/domain/handler.ts @@ -1,3 +1,5 @@ +import type { ZodType } from "zod"; +import type { TaskType } from "@/core/types"; import type { TaskEvent, TaskEventInput } from "./events"; import type { TaskExecutionOptions } from "./types"; @@ -110,8 +112,12 @@ export interface TaskDefinition { recover?: RecoveryHandler; /** Task description for documentation */ description?: string; + /** Task type: user or background (defaults to "user") */ + type?: TaskType; + /** Optional Zod schema for input validation */ + inputSchema?: ZodType; /** Default execution options */ - defaultOptions: TaskExecutionOptions; + defaultOptions?: TaskExecutionOptions; } /** diff --git a/packages/taskflow/src/domain/task.ts b/packages/taskflow/src/domain/task.ts index 69735d7b..e99377b0 100644 --- a/packages/taskflow/src/domain/task.ts +++ b/packages/taskflow/src/domain/task.ts @@ -286,7 +286,7 @@ export class Task { * Generate a deterministic idempotency key from task parameters * Uses json-canonicalize fro consistent key ordering */ - static generateIdempotencyKey(params: TaskCreationParams): string { + static generateIdempotencyKey(params: TaskCreationParams): IdempotencyKey { const payload = { name: params.name, input: params.input, diff --git a/packages/taskflow/src/execution/executor.ts b/packages/taskflow/src/execution/executor.ts new file mode 100644 index 00000000..43b4cdf8 --- /dev/null +++ b/packages/taskflow/src/execution/executor.ts @@ -0,0 +1,458 @@ +import type { IdempotencyKey } from "@/core/branded"; +import { isRetryableError } from "@/core/errors"; +import { + createTaskEvent, + isAsyncGenerator, + type Task, + type TaskDefinition, + type TaskEventContext, + type TaskEventInput, + type TaskHandlerContext, +} from "@/domain"; +import { + noopHooks, + TaskAttributes, + TaskMetrics, + TaskSpans, + type TaskSystemHooks, +} from "@/observability"; +import type { EventLog } from "@/persistence"; +import { + type ExecutorConfig, + type ExecutorStats, + mergeExecutorConfig, + type TaskEventSubscriber, +} from "./types"; + +/** + * Dependencies for TaskExecutor + */ +export interface TaskExecutorDeps { + /** event log for WAL persistence */ + eventLog: EventLog; + /** optional event subscribers */ + subscribers?: TaskEventSubscriber; +} + +/** + * TaskExecutor handles the execution of task handlers with: + * - Retry logic with exponential backoff + * - Periodic heartbeat emission + * - AbortController for cancellation + * - WAL-first event persistence + */ +export class TaskExecutor { + private readonly config: ExecutorConfig; + private readonly hooks: TaskSystemHooks; + private readonly eventLog: EventLog; + private readonly subscribers?: TaskEventSubscriber; + + /** active abort controllers keyed by idempotency key */ + private readonly abortControllers: Map; + /** active heartbeat intervals keyed by idempotency key */ + private readonly heartbeatIntervals: Map< + IdempotencyKey, + ReturnType + >; + + /** event sequence numbers per task */ + private readonly eventSeqMap: Map; + + /** statistics counters */ + private completedCount = 0; + private failedCount = 0; + private cancelledCount = 0; + private handlerMissingCount = 0; + private retriesAttemptedCount = 0; + private retriesSucceededCount = 0; + private retriesExhaustedCount = 0; + private lastStartAt: number | null = null; + private lastCompleteAt: number | null = null; + + constructor( + config: Partial | undefined, + deps: TaskExecutorDeps, + hooks: TaskSystemHooks = noopHooks, + ) { + this.config = mergeExecutorConfig(config); + this.hooks = hooks; + this.eventLog = deps.eventLog; + this.subscribers = deps.subscribers; + + this.abortControllers = new Map(); + this.heartbeatIntervals = new Map(); + this.eventSeqMap = new Map(); + } + + /** + * Execute a task with the given handler + */ + async execute(task: Task, definition?: TaskDefinition): Promise { + const handler = definition?.handler; + + // start the task + task.start(); + this.lastStartAt = Date.now(); + + // create event context for this task + const context: TaskEventContext = { + taskId: task.id, + name: task.name, + idempotencyKey: task.idempotencyKey, + userId: task.userId, + taskType: task.type, + executionOptions: task.executionOptions, + }; + + // handle missing handler + if (!handler) { + task.fail(`Handler for task ${task.name} not found`); + this.handlerMissingCount++; + this.failedCount++; + this.lastCompleteAt = Date.now(); + + this.emit(context, { + type: "error", + message: `Handler for task ${task.name} not found`, + }); + + this.subscribers?.onComplete?.(task); + return; + } + + // emit start event + this.emit(context, { + type: "start", + input: task.input, + message: `Starting task ${task.name}`, + }); + + // create abort controller for this task + const controller = new AbortController(); + this.abortControllers.set(task.idempotencyKey, controller); + + // start heartbeat + const stopHeartbeat = this.startHeartbeat(task, context); + + try { + await this.hooks.withSpan( + TaskSpans.TASK_EXECUTE, + { + [TaskAttributes.TASK_ID]: task.id, + [TaskAttributes.TASK_NAME]: task.name, + [TaskAttributes.TASK_TYPE]: task.type, + }, + async (span) => { + try { + await this.executeWithRetry( + task, + definition, + controller.signal, + context, + ); + span.setStatus("ok"); + } catch (error) { + span.recordException(error as Error); + span.setStatus("error", (error as Error).message); + throw error; + } + }, + ); + } finally { + stopHeartbeat(); + this.abortControllers.delete(task.idempotencyKey); + this.eventSeqMap.delete(task.id); + this.subscribers?.onComplete?.(task); + } + } + + /** + * Abort a running task by idempotency key + */ + abort(idempotencyKey: IdempotencyKey): void { + const controller = this.abortControllers.get(idempotencyKey); + if (controller) controller.abort("Task aborted"); + } + + /** + * Abort all running tasks + */ + abortAll(): void { + for (const controller of this.abortControllers.values()) { + controller.abort("Task aborted"); + } + this.abortControllers.clear(); + + // clear all heartbeat intervals + for (const interval of this.heartbeatIntervals.values()) { + clearInterval(interval); + } + } + + /** + * Check if a task is currently executing + */ + isExecuting(idempotencyKey: IdempotencyKey): boolean { + return this.abortControllers.has(idempotencyKey); + } + + /** + * Get executor statistics + */ + getStats(): ExecutorStats { + const total = + this.completedCount + + this.failedCount + + this.cancelledCount + + this.handlerMissingCount; + + return { + current: { + executing: this.abortControllers.size, + heartbeatsActive: this.heartbeatIntervals.size, + }, + outcomes: { + completed: this.completedCount, + failed: this.failedCount, + cancelled: this.cancelledCount, + handlerMissing: this.handlerMissingCount, + total, + }, + retries: { + attempted: this.retriesAttemptedCount, + succeeded: this.retriesSucceededCount, + exhausted: this.retriesExhaustedCount, + }, + timing: { + lastStartAt: this.lastStartAt ?? undefined, + lastCompleteAt: this.lastCompleteAt ?? undefined, + }, + debug: { + executingTaskKeys: Array.from(this.abortControllers.keys()), + }, + }; + } + + /** + * Execute handler with retry logic + */ + + private async executeWithRetry( + task: Task, + definition: TaskDefinition, + signal: AbortSignal, + context: TaskEventContext, + ): Promise { + const { maxAttempts, initialDelayMs, maxDelayMs, backoffMultiplier } = + this.config.retry; + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + const isRetryAttempt = attempt > 1; + + if (isRetryAttempt) { + task.incrementAttempt(); + this.retriesAttemptedCount++; + } + + // check for abort before each attempt + if (signal.aborted) { + task.cancel("Task aborted"); + this.cancelledCount++; + this.lastCompleteAt = Date.now(); + + this.emit(context, { + type: "cancelled", + message: "Task cancelled", + }); + + this.hooks.incrementCounter(TaskMetrics.TASKS_CANCELLED, 1, { + [TaskAttributes.TASK_NAME]: task.name, + }); + + return; + } + + const handlerResult: TaskEventInput[] = []; + + try { + // create handler context + const handlerContext: TaskHandlerContext = { + taskId: task.id, + name: task.name, + userId: task.userId, + idempotencyKey: task.idempotencyKey, + attempt, + signal, + }; + + // execute handler + const execution = definition.handler(task.input, handlerContext); + + if (isAsyncGenerator(execution)) { + // handle async generator + for await (const event of execution) { + if (signal.aborted) { + task.cancel("Task aborted"); + this.cancelledCount++; + this.lastCompleteAt = Date.now(); + throw new Error("Task aborted"); + } + + handlerResult.push(event); + this.emit(context, event); + } + } else { + // handle promise + const resultEvent = await execution; + if (resultEvent) { + handlerResult.push({ type: "progress", result: resultEvent }); + this.emit(context, { type: "progress", result: resultEvent }); + } + } + + // task completed successfully + task.complete(handlerResult); + this.completedCount++; + this.lastCompleteAt = Date.now(); + + // track successful retries + if (isRetryAttempt) { + this.retriesSucceededCount++; + } + + this.emit(context, { + type: "complete", + message: `Task ${task.name} completed`, + durationMs: task.durationMs, + result: handlerResult, + }); + + this.hooks.incrementCounter(TaskMetrics.TASKS_COMPLETED, 1, { + [TaskAttributes.TASK_NAME]: task.name, + }); + + this.hooks.recordHistogram( + TaskMetrics.TASK_DURATION_MS, + task.durationMs ?? 0, + { + [TaskAttributes.TASK_NAME]: task.name, + }, + ); + + return; + } catch (error: unknown) { + const errorMessage = + error instanceof Error ? error.message : String(error); + const isRetryable = isRetryableError(error); + const isLastAttempt = attempt === maxAttempts; + + if (isLastAttempt || !isRetryable) { + // track exhausted retries + if (isLastAttempt && isRetryable) { + this.retriesExhaustedCount++; + } + + task.fail(errorMessage); + this.failedCount++; + this.lastCompleteAt = Date.now(); + + this.emit(context, { + type: "error", + message: errorMessage, + error: errorMessage, + retryable: isRetryable, + attempt, + maxAttempts, + }); + + this.emit(context, { + type: "complete", + message: `Task ${task.name} failed after ${attempt} attempts`, + }); + + this.hooks.incrementCounter(TaskMetrics.TASKS_FAILED, 1, { + [TaskAttributes.TASK_NAME]: task.name, + [TaskAttributes.ERROR_TYPE]: + error instanceof Error ? error.name : "UnknownError", + }); + + return; + } + + // calculate retry delay with exponential backoff + const delay = Math.min( + initialDelayMs * backoffMultiplier ** (attempt - 1), + maxDelayMs, + ); + + this.emit(context, { + type: "retry", + message: `Retrying task ${task.name} in ${delay}ms (attempt ${attempt}/${maxAttempts})`, + attempt, + maxAttempts, + nextRetryDelayMs: delay, + }); + + this.hooks.incrementCounter(TaskMetrics.TASKS_RETRIED, 1, { + [TaskAttributes.TASK_NAME]: task.name, + [TaskAttributes.TASK_ATTEMPT]: attempt, + }); + + // wait before retry + await new Promise((resolve) => setTimeout(resolve, delay)); + } + } + } + + /** + * Start periodic heartbeat emission + */ + private startHeartbeat(task: Task, context: TaskEventContext): () => void { + const interval = setInterval(() => { + if (task.status !== "running") return; + + task.recordHeartbeat(); + + this.emit(context, { + type: "heartbeat", + message: "Task heartbeat", + timestamp: task.lastHeartbeatAt?.getTime() ?? Date.now(), + }); + }, this.config.heartbeatIntervalMs); + + this.heartbeatIntervals.set(task.idempotencyKey, interval); + + return () => { + clearInterval(interval); + this.heartbeatIntervals.delete(task.idempotencyKey); + }; + } + + /** + * Emit an event to EventLog and subscribers + * WAL-first: persist to EventLog before notifying subscribers + */ + private emit(context: TaskEventContext, input: TaskEventInput): void { + const seq = (this.eventSeqMap.get(context.taskId) ?? 0) + 1; + this.eventSeqMap.set(context.taskId, seq); + + // generate event ID if not provided + const id = input.id ?? this.generateEventId(context.taskId, seq); + + // create full TaskEvent + const event = createTaskEvent({ ...input, id }, context); + + // persist to EventLog first (WAL-first) + this.eventLog.appendEvent(event); + + // notify subscribers (StreamManager) + this.subscribers?.onEvent(context.idempotencyKey, event); + } + + /** + * Generate a unique event ID + */ + private generateEventId(taskId: string, seq: number): string { + return `${taskId}:${seq}:${Date.now()}`; + } +} diff --git a/packages/taskflow/src/execution/index.ts b/packages/taskflow/src/execution/index.ts new file mode 100644 index 00000000..845a9c25 --- /dev/null +++ b/packages/taskflow/src/execution/index.ts @@ -0,0 +1,28 @@ +export { TaskExecutor, type TaskExecutorDeps } from "./executor"; +export { TaskRecovery, type TaskRecoveryDeps } from "./recovery"; +export { TaskSystem, type TaskSystemConfig } from "./system"; + +export { + DEFAULT_EXECUTOR_CONFIG, + DEFAULT_RECOVERY_CONFIG, + DEFAULT_RETRY_CONFIG, + DEFAULT_SHUTDOWN_CONFIG, + type ExecutorConfig, + type ExecutorStats, + mergeExecutorConfig, + mergeRecoveryConfig, + mergeShutdownConfig, + type RecoveryConfig, + type RecoveryStats, + type RetryConfig, + type ShutdownConfig, + type ShutdownOptions, + type TaskEventSubscriber, + type TaskRecoveryParams, + type TaskRunParams, + type TaskStreamOptions, + type TaskSystemStats, + // system types + type TaskSystemStatus, + type TaskTemplate, +} from "./types"; diff --git a/packages/taskflow/src/execution/recovery.ts b/packages/taskflow/src/execution/recovery.ts new file mode 100644 index 00000000..65a9443c --- /dev/null +++ b/packages/taskflow/src/execution/recovery.ts @@ -0,0 +1,438 @@ +import { eventId, type IdempotencyKey } from "@/core/branded"; +import { NotFoundError } from "@/core/errors"; +import type { StreamManager } from "@/delivery/stream"; +import { + createTaskEvent, + isAsyncGenerator, + type RecoveryContext, + type Task, + type TaskDefinition, + type TaskEvent, + type TaskEventContext, + type TaskEventInput, + type TaskHandlerContext, + toTaskEventType, +} from "@/domain"; +import type { Guard } from "@/guard/guard"; +import { + noopHooks, + TaskAttributes, + TaskMetrics, + type TaskSystemHooks, +} from "@/observability"; +import type { StoredEvent, TaskRepository } from "@/persistence"; +import type { TaskExecutor } from "./executor"; +import { + mergeRecoveryConfig, + type RecoveryConfig, + type RecoveryStats, +} from "./types"; + +/** + * Dependencies for TaskRecovery + */ +export interface TaskRecoveryDeps { + /** guard for slot management */ + guard: Guard; + /** task repository for database access */ + repository: TaskRepository; + /** stream manager for event delivery */ + streamManager: StreamManager; + /** executor for re-execution */ + executor: TaskExecutor; + /** function to get task definition by name */ + getDefinition: (taskName: string) => TaskDefinition | undefined; +} + +/** + * TaskRecovery - Recover stale tasks + * - Background polling for stale tasks + * - Smart recovery using recovery handlers + * - Re-execution fallback + * - Database reconnection for clients + */ +export class TaskRecovery { + private readonly config: RecoveryConfig; + private readonly hooks: TaskSystemHooks; + private readonly deps: TaskRecoveryDeps; + + private backgroundTimer: ReturnType | null = null; + private isRecovering = false; + + // outcome counters + private backgroundTasksRecovered = 0; + private userTasksRecovered = 0; + private tasksFailed = 0; + private smartRecoveryCount = 0; + private reexecuteCount = 0; + + // timing + private lastBackgroundScanAt: number | null = null; + private lastScanDurationMs: number | null = null; + private lastScanErrorAt: number | null = null; + + constructor( + config: Partial | undefined, + deps: TaskRecoveryDeps, + hooks: TaskSystemHooks = noopHooks, + ) { + this.config = mergeRecoveryConfig(config); + this.deps = deps; + this.hooks = hooks; + } + + /** + * Start background recovery polling + */ + startBackgroundRecovery(): void { + if (!this.config.enabled || this.backgroundTimer) return; + + this.backgroundTimer = setInterval(async () => { + await this.recoverBackgroundTasks(); + }, this.config.backgroundPollIntervalMs); + + // don't keep process alive just for recovery + this.backgroundTimer.unref(); + } + + /** + * Stop background recovery polling + */ + stopBackgroundRecovery(): void { + if (this.backgroundTimer) { + clearInterval(this.backgroundTimer); + this.backgroundTimer = null; + } + } + + /** + * Recover stale background tasks + */ + async recoverBackgroundTasks(): Promise { + if (!this.config.enabled || this.isRecovering) return; + + this.isRecovering = true; + const scanStartTime = Date.now(); + this.lastBackgroundScanAt = scanStartTime; + + try { + const staleTasks = await this.deps.repository.findStaleTasks( + this.config.staleThresholdMs, + ); + + // only recover background tasks + const backgroundTasks = staleTasks.filter( + (task) => task.type === "background", + ); + + for (const task of backgroundTasks.slice(0, this.config.batchSize)) { + try { + this.deps.guard.acquireRecoverySlot(); + + try { + for await (const event of this.recoverStaleTask(task)) { + this.deps.streamManager.push(task.idempotencyKey, event); + } + this.backgroundTasksRecovered++; + + this.hooks.incrementCounter(TaskMetrics.TASKS_RECOVERED, 1, { + [TaskAttributes.TASK_TYPE]: "background", + }); + } finally { + this.deps.guard.releaseRecoverySlot(); + } + } catch (error) { + this.tasksFailed++; + this.hooks.log({ + severity: "error", + message: `Failed to recover background task ${task.id}`, + error: error instanceof Error ? error : new Error(String(error)), + }); + } + } + + this.lastScanDurationMs = Date.now() - scanStartTime; + } catch (error) { + this.lastScanErrorAt = Date.now(); + this.lastScanDurationMs = Date.now() - scanStartTime; + + this.hooks.log({ + severity: "error", + message: "Background recovery scan failed", + error: error instanceof Error ? error : new Error(String(error)), + }); + } finally { + this.isRecovering = false; + } + } + + /** + * Recover a user task (via reconnection) + */ + async *recoverUserTask(task: Task): AsyncGenerator { + try { + for await (const event of this.recoverStaleTask(task)) { + yield event; + } + this.userTasksRecovered++; + + this.hooks.incrementCounter(TaskMetrics.TASKS_RECOVERED, 1, { + [TaskAttributes.TASK_TYPE]: "user", + }); + } catch (error) { + this.tasksFailed++; + throw error; + } + } + + /** + * Recover a stale task using smart recovery or re-execution + */ + async *recoverStaleTask( + task: Task, + ): AsyncGenerator { + const definition = this.deps.getDefinition(task.name); + + if (!definition) { + throw new NotFoundError( + `Handler for task ${task.name} not found`, + "handler", + { taskId: task.id, templateName: task.name }, + ); + } + + // stream previous events from database + const previousEvents: TaskEvent[] = []; + for await (const event of this.streamFromDB(task)) { + previousEvents.push(event); + yield event; + } + + // create event context + const context: TaskEventContext = { + taskId: task.id, + name: task.name, + idempotencyKey: task.idempotencyKey, + userId: task.userId, + taskType: task.type, + executionOptions: task.executionOptions, + }; + + // determine recovery method + const hasRecoverHandler = !!definition.recover; + let result: + | AsyncGenerator + | Promise; + + if (hasRecoverHandler && definition.recover) { + // smart recovery with previous events + this.smartRecoveryCount++; + + const recoveryContext: RecoveryContext = { + taskId: task.id, + name: task.name, + userId: task.userId, + idempotencyKey: task.idempotencyKey, + attempt: task.attempt, + signal: new AbortController().signal, + previousEvents, + recoveryReason: "stale", + timeSinceLastEventMs: this.getTimeSinceLastEvent(previousEvents), + }; + + result = definition.recover(task.input, recoveryContext); + } else { + // re-execute the handler + this.reexecuteCount++; + const handlerContext: TaskHandlerContext = { + taskId: task.id, + name: task.name, + userId: task.userId, + idempotencyKey: task.idempotencyKey, + attempt: task.attempt + 1, + signal: new AbortController().signal, + }; + + result = definition.handler(task.input, handlerContext); + } + + // yield events from recovery/re-execution + if (isAsyncGenerator(result)) { + for await (const event of result) { + yield this.enrichEvent(event, context); + } + } else { + const value = await result; + if (value) { + yield this.enrichEvent({ type: "complete", result: value }, context); + } + } + } + + /** + * Handle database check for reconnecting clients + * + * Returns the task if found and authorized, null otherwise + * yields events from db or recovery as appropriate + */ + async *handleDatabaseCheck( + idempotencyKey: IdempotencyKey, + requestingUserId: string | null, + ): AsyncGenerator { + // check if repository is initialized + if (!this.deps.repository.isInitialized) return null; + + // find task by idempotency key + const task = + await this.deps.repository.findByIdempotencyKey(idempotencyKey); + if (!task) return null; + + // verify requesting user owns the task (null userId means background task) + if (task.userId !== requestingUserId) return null; + + // handle based on task status + if (task.status === "completed" || task.status === "failed") { + // stream stored events from db + yield* this.streamFromDB(task); + return task; + } + + if (task.status === "running") { + if (this.isTaskAlive(task)) { + // task is still running, wait for completion + yield* this.waitForTaskCompletion(task); + } else { + // task is stale, recover it + yield* this.recoverUserTask(task); + } + + // fetch updated task status + const updatedTask = await this.deps.repository.findById(task.id); + return updatedTask ?? task; + } + + return null; + } + + /** + * Get recovery statistics + */ + getStats(): RecoveryStats { + return { + config: { + enabled: this.config.enabled, + pollIntervalMs: this.config.backgroundPollIntervalMs, + staleThresholdMs: this.config.staleThresholdMs, + batchSize: this.config.batchSize, + }, + background: { + isScanning: this.isRecovering, + lastScanAt: this.lastBackgroundScanAt ?? undefined, + lastScanDurationMs: this.lastScanDurationMs ?? undefined, + lastErrorAt: this.lastScanErrorAt ?? undefined, + }, + outcomes: { + background: this.backgroundTasksRecovered, + user: this.userTasksRecovered, + failed: this.tasksFailed, + byMethod: { + smartRecovery: this.smartRecoveryCount, + reexecution: this.reexecuteCount, + }, + }, + }; + } + + /** + * Check if a task is still alive on heartbeat + */ + private isTaskAlive(task: Task): boolean { + if (!task.lastHeartbeatAt) return false; + const age = Date.now() - task.lastHeartbeatAt.getTime(); + return age < this.config.staleThresholdMs; + } + + /** + * Wait for a running task to complete + */ + private async *waitForTaskCompletion( + task: Task, + ): AsyncGenerator { + const pollIntervalMs = 1000; + const startTime = Date.now(); + + while (Date.now() - startTime < this.config.completionTimeoutMs) { + const updatedTask = await this.deps.repository.findById(task.id); + + if ( + updatedTask?.status === "completed" || + updatedTask?.status === "failed" + ) { + yield* this.streamFromDB(updatedTask); + return; + } + + await new Promise((resolve) => setTimeout(resolve, pollIntervalMs)); + } + } + + /** + * Stream events from database + */ + private async *streamFromDB( + task: Task, + ): AsyncGenerator { + const events = await this.deps.repository.getEvents(task.id); + + for (const entry of events) { + const event = this.storedEventToTaskEvent(entry, task); + if (event) yield event; + } + } + + /** + * Convert stored event to TaskEvent + */ + private storedEventToTaskEvent( + entry: StoredEvent, + task: Task, + ): TaskEvent | null { + // skip TASK_CREATED events (already known to client) + if (entry.type === "TASK_CREATED") return null; + + // map event type from db format (TASK_PROGRESS) to stream format (progress) + const eventType = toTaskEventType(entry.type); + + return { + id: eventId(entry.id), + taskId: task.id, + name: task.name, + idempotencyKey: task.idempotencyKey, + userId: task.userId, + taskType: task.type, + type: eventType, + message: entry.payload?.message as string | undefined, + result: entry.payload?.result, + error: entry.payload?.error as string | undefined, + payload: entry.payload ?? undefined, + timestamp: entry.timestamp.getTime(), + }; + } + + /** + * Enrich event input with task context + */ + private enrichEvent(event: TaskEventInput, context: TaskEventContext) { + return createTaskEvent(event, context); + } + + /** + * Calculate time since last event + */ + private getTimeSinceLastEvent(events: TaskEvent[]): number { + if (events.length === 0) return 0; + + const lastEvent = events[events.length - 1]; + return Date.now() - (lastEvent.timestamp ?? Date.now()); + } +} diff --git a/packages/taskflow/src/execution/system.ts b/packages/taskflow/src/execution/system.ts new file mode 100644 index 00000000..09f7a7ff --- /dev/null +++ b/packages/taskflow/src/execution/system.ts @@ -0,0 +1,579 @@ +import { createHash } from "node:crypto"; +import { canonicalize } from "json-canonicalize"; +import type { ZodType } from "zod"; +import { + eventId, + type IdempotencyKey, + idempotencyKey, + taskName, + userId, +} from "@/core/branded"; +import { TaskSystemError, ValidationError } from "@/core/errors"; +import { StreamManager } from "@/delivery/stream"; +import type { StreamConfig } from "@/delivery/types"; +import { + Task, + type TaskCreationParams, + type TaskDefinition, + type TaskEvent, +} from "@/domain"; +import { Flush, type FlushConfig, type FlushManagerConfig } from "@/flush"; +import { Guard } from "@/guard/guard"; +import type { GuardConfig } from "@/guard/types"; +import { validateInputSchema } from "@/guard/validator"; +import { noopHooks, type TaskSystemHooks } from "@/observability"; +import { + createRepository, + EventLog, + type EventLogConfig, + type RepositoryConfig, +} from "@/persistence"; +import { TaskExecutor } from "./executor"; +import { TaskRecovery } from "./recovery"; +import { + type ExecutorConfig, + mergeShutdownConfig, + type RecoveryConfig, + type ShutdownConfig, + type ShutdownOptions, + type TaskRecoveryParams, + type TaskRunParams, + type TaskStreamOptions, + type TaskSystemStats, + type TaskSystemStatus, + type TaskTemplate, +} from "./types"; + +/** + * Configuration for the TaskSystem + */ +export interface TaskSystemConfig { + /** event log configuration */ + eventLog?: Partial; + /** guard configuration */ + guard?: Partial; + /** stream manager configuration */ + stream?: Partial; + /** flush configuration */ + flush?: Partial; + /** executor configuration */ + executor?: Partial; + /** recovery configuration */ + recovery?: Partial; + /** repository configuration */ + repository?: RepositoryConfig; + /** shutdown configuration */ + shutdown?: Partial; +} + +/** + * TaskSystem is the main orchestrator that coordinates all components: + * - Guard: Admission control, rate limiting, slot management + * - EventLog: Write-ahead log for durability + * - StreamManager: Event streaming to clients + * - Flush: Background persistence to repository + * - Executor: Task execution with retry and heartbeat + * - Recovery: Stale task recovery + */ +export class TaskSystem { + private readonly config: TaskSystemConfig; + private readonly shutdownConfig: ShutdownConfig; + private readonly hooks: TaskSystemHooks; + + // state + private _isShuttingDown = false; + private _isInitialized = false; + private startedAt: number | null = null; + + // registries + private readonly templates: Map = new Map(); + private readonly definitions: Map = new Map(); + + // queues + private readonly pendingQueue: Map = new Map(); + private readonly runningTasks: Map = new Map(); + + // executor tick + private executorInterval: ReturnType | null = null; + private isExecutorTickRunning = false; + + // components + private readonly eventLog: EventLog; + private readonly flush: Flush; + private readonly guard: Guard; + private readonly streamManager: StreamManager; + private readonly executor: TaskExecutor; + private recovery!: TaskRecovery; + + constructor(config?: TaskSystemConfig, hooks: TaskSystemHooks = noopHooks) { + this.config = config ?? {}; + this.shutdownConfig = mergeShutdownConfig(config?.shutdown); + this.hooks = hooks; + + // initialize components + this.eventLog = new EventLog(this.config.eventLog ?? {}, hooks); + this.flush = new Flush( + { + ...this.config.flush, + repository: this.config.repository ?? { + type: "sqlite", + database: "./.taskflow/sqlite.db", + }, + } as FlushManagerConfig, + hooks, + ); + + this.guard = new Guard(this.config.guard ?? {}, hooks); + this.streamManager = new StreamManager(this.config.stream ?? {}, hooks); + this.executor = new TaskExecutor( + this.config.executor, + { + eventLog: this.eventLog, + subscribers: { + onEvent: (key, event) => this.streamManager.push(key, event), + onComplete: (task) => this.completeTask(task), + }, + }, + hooks, + ); + } + + /** + * Initialize the task system + */ + async initialize(): Promise { + await this.eventLog.initialize(); + await this.flush.initialize(); + + // create repository for recovery (async) + const repository = await createRepository( + this.config.repository ?? { + type: "sqlite", + database: "./.taskflow/sqlite.db", + }, + this.hooks, + ); + + this.recovery = new TaskRecovery( + this.config.recovery, + { + guard: this.guard, + repository, + streamManager: this.streamManager, + executor: this.executor, + getDefinition: (name) => this.definitions.get(name), + }, + this.hooks, + ); + + this.startExecutorTick(); + this.recovery.startBackgroundRecovery(); + + this._isInitialized = true; + this.startedAt = Date.now(); + + this.hooks.log({ + severity: "info", + message: "TaskSystem initialized", + }); + } + + /** + * Register a task definition + */ + registerTask( + definition: TaskDefinition, + ): TaskTemplate { + if (this.templates.has(definition.name)) { + throw new ValidationError( + `Task ${definition.name} already registered`, + "name", + ); + } + + this.definitions.set(definition.name, definition as TaskDefinition); + + const template: TaskTemplate = { + name: definition.name, + run: async (params: TaskRunParams) => { + return this.runTask(definition as TaskDefinition, params); + }, + recover: async (params: TaskRecoveryParams) => { + return this.recoverTask(definition as TaskDefinition, params); + }, + }; + + this.templates.set(definition.name, template); + return template; + } + + /** + * Get a registered template + */ + getTemplate(name: string): TaskTemplate | null { + return this.templates.get(name) ?? null; + } + + /** + * Check if a task is currently running + */ + getTaskRunning(idempotencyKey: IdempotencyKey): boolean { + return this.runningTasks.has(idempotencyKey); + } + + /** + * Gracefully shutdown the task system + */ + async shutdown(options: ShutdownOptions = {}): Promise { + const { deleteFiles = false, force = false } = options; + + if (this._isShuttingDown) return; + this._isShuttingDown = true; + + // stop accepting new tasks + this.recovery.stopBackgroundRecovery(); + + if (this.executorInterval) { + clearInterval(this.executorInterval); + this.executorInterval = null; + } + // clear pending queue + this.pendingQueue.clear(); + + // wait for running tasks (unless force) + if (!force && this.runningTasks.size > 0) { + const { gracePeriodMs, pollIntervalMs } = this.shutdownConfig; + const startTime = Date.now(); + + while (this.runningTasks.size > 0) { + const elapsed = Date.now() - startTime; + + if (elapsed >= gracePeriodMs) { + this.hooks.log({ + severity: "warn", + message: `Graceful shutdown timed out after ${gracePeriodMs}ms, ${this.runningTasks.size} tasks still running. Forcing abort.`, + }); + break; + } + await new Promise((resolve) => setTimeout(resolve, pollIntervalMs)); + } + } + + // force abort all running tasks + this.executor.abortAll(); + + // close all streams + for (const task of this.runningTasks.values()) { + this.streamManager.close(task.idempotencyKey); + } + + this.runningTasks.clear(); + this.streamManager.clearAll(); + + // shutdown persistence layer + await this.flush.shutdown(); + await this.eventLog.close(deleteFiles); + + this.guard.shutdown(); + + this.hooks.log({ + severity: "info", + message: "TaskSystem shutdown complete", + }); + } + + /** + * Whether the system is shutting down + */ + get shuttingDown(): boolean { + return this._isShuttingDown; + } + + /** + * Get comprehensive system statistics + */ + getStats(): TaskSystemStats { + const guardStats = this.guard.getStats(); + const executorStats = this.executor.getStats(); + + const queued = this.pendingQueue.size; + const waiting = guardStats.slots.current.waiting; + const executing = executorStats.current.executing; + const inDLQ = guardStats.dlq.size; + + const { completed, failed, cancelled } = executorStats.outcomes; + const totalTerminal = completed + failed; + const successRate = + totalTerminal > 0 ? completed / totalTerminal : undefined; + + // determine system status + let status: TaskSystemStatus = "stopped"; + if (this._isShuttingDown) { + status = "shutting_down"; + } else if (this._isInitialized) { + // check for degraded state + const isDegraded = + guardStats.dlq.size > 0 || + (executorStats.retries.exhausted > 0 && + executorStats.retries.exhausted > executorStats.retries.succeeded); + + status = isDegraded ? "degraded" : "running"; + } else if (this.startedAt === null) { + status = "stopped"; + } else { + status = "starting"; + } + + return { + system: { + status, + startedAt: this.startedAt ?? undefined, + uptimeMs: this.startedAt ? Date.now() - this.startedAt : undefined, + }, + tasks: { + queued, + waiting, + executing, + inDLQ, + inFlight: queued + waiting + executing, + totalCompleted: completed, + totalFailed: failed, + totalCancelled: cancelled, + successRate, + }, + scheduler: { + tickIntervalMs: 100, + isTickActive: this.isExecutorTickRunning, + }, + registry: { + templates: this.templates.size, + handlers: this.definitions.size, + }, + components: { + guard: guardStats, + executor: executorStats, + stream: this.streamManager.getStats(), + eventLog: this.eventLog.getStats(), + flush: this.flush.getStats(), + recovery: this.recovery.getStats(), + }, + }; + } + + /** + * Run a task + */ + private async runTask( + definition: TaskDefinition, + params: TaskRunParams, + ): Promise { + // check if shutting down + if (this._isShuttingDown) { + throw new TaskSystemError("Task system is shutting down", undefined, { + taskName: definition.name, + }); + } + + // validate input schema if provided + let validatedInput = params.input; + if (definition.inputSchema) { + validatedInput = validateInputSchema( + params.input, + definition.inputSchema as ZodType, + ); + } + + // generate idempotency key + const taskIdempotencyKey = params.idempotencyKey + ? idempotencyKey(params.idempotencyKey) + : this.generateIdempotencyKey(definition, params); + + // check for existing running task (deduplication) + const existingTask = this.runningTasks.get(taskIdempotencyKey); + if (existingTask) { + this.streamManager.getOrCreate(taskIdempotencyKey); + return this.attachStream(existingTask, taskIdempotencyKey); + } + + // check pending queue + const pendingTask = this.pendingQueue.get(taskIdempotencyKey); + if (pendingTask) { + this.streamManager.getOrCreate(taskIdempotencyKey); + return this.attachStream(pendingTask, taskIdempotencyKey); + } + + // check database for recovery - client-side retry + if (params.idempotencyKey) { + this.streamManager.getOrCreate(taskIdempotencyKey); + const generator = this.recovery.handleDatabaseCheck( + taskIdempotencyKey, + params.userId, + ); + let recoveredTask: Task | null = null; + let iteratorResult = await generator.next(); + + while (!iteratorResult.done) { + this.streamManager.push(taskIdempotencyKey, iteratorResult.value); + iteratorResult = await generator.next(); + } + + recoveredTask = iteratorResult.value; + if (recoveredTask) + return this.attachStream(recoveredTask, taskIdempotencyKey); + } + + // create new task + const taskParams: TaskCreationParams = { + name: taskName(definition.name), + input: validatedInput, + userId: userId(params.userId), + type: definition.type ?? "user", + executionOptions: definition.defaultOptions, + idempotencyKey: idempotencyKey(taskIdempotencyKey), + }; + + const task = new Task(taskParams); + + // validate through guard (rate limiting, etc.) + this.guard.acceptTask(task); + + // create stream and emit created event + this.streamManager.getOrCreate(taskIdempotencyKey); + + const createdEvent: TaskEvent = { + id: eventId( + `evt_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`, + ), + type: "created", + taskId: task.id, + name: taskName(definition.name), + idempotencyKey: idempotencyKey(taskIdempotencyKey), + userId: task.userId ? userId(task.userId) : null, + input: validatedInput, + taskType: task.type, + executionOptions: task.executionOptions, + timestamp: Date.now(), + }; + + this.streamManager.push(taskIdempotencyKey, createdEvent); + + // add to pending queue + this.pendingQueue.set(task.idempotencyKey, task); + + return this.attachStream(task, taskIdempotencyKey); + } + + /** + * Recover a task from database + */ + private async recoverTask( + _definition: TaskDefinition, + params: TaskRecoveryParams, + ): Promise { + const { idempotencyKey: key, userId: uid } = params; + const idemKey = idempotencyKey(key); + + // create stream and check for existing task + this.streamManager.getOrCreate(idemKey); + + // check for existing task in database + const generator = this.recovery.handleDatabaseCheck(idemKey, uid); + let recoveredTask: Task | null = null; + let iteratorResult = await generator.next(); + + while (!iteratorResult.done) { + this.streamManager.push(idemKey, iteratorResult.value); + iteratorResult = await generator.next(); + } + + recoveredTask = iteratorResult.value; + + if (recoveredTask) return this.attachStream(recoveredTask, idemKey); + return null; + } + + /** + * Attach a stream method to task + */ + private attachStream(task: Task, key: IdempotencyKey): Task { + // add a stream method to task + ( + task as Task & { + stream: ( + options?: TaskStreamOptions, + ) => AsyncGenerator; + } + ).stream = (options?: TaskStreamOptions) => + this.streamManager.createGenerator(key, options); + return task; + } + + /** + * Start executor tick interval + */ + private startExecutorTick(): void { + this.executorInterval = setInterval(async () => { + if (this.isExecutorTickRunning) return; + + this.isExecutorTickRunning = true; + + try { + // get first task from queue + const task = this.pendingQueue.values().next().value as + | Task + | undefined; + if (!task) return; + + // remove from pending queue + this.pendingQueue.delete(task.idempotencyKey); + + // skip if already running (race condition) + if (this.runningTasks.has(task.idempotencyKey)) return; + + // acquire execution slot + try { + await this.guard.acquireExecutionSlot(task); + } catch (error) { + this.guard.addToDLQ(task, "Slot acquisition failed", String(error)); + return; + } + + // add to running tasks + this.runningTasks.set(task.idempotencyKey, task); + this.streamManager.getOrCreate(task.idempotencyKey); + + // execute task + const definition = this.definitions.get(task.name); + await this.executor.execute(task, definition); + } finally { + this.isExecutorTickRunning = false; + } + }, 100); + } + + /** + * Handle task completion + */ + private completeTask(task: Task): void { + this.guard.releaseExecutionSlot(task); + this.runningTasks.delete(task.idempotencyKey); + this.streamManager.close(task.idempotencyKey); + } + + /** + * Generate idempotency key from task params + */ + private generateIdempotencyKey( + definition: TaskDefinition, + params: TaskRunParams, + ): IdempotencyKey { + const payload = { + name: definition.name, + input: params.input, + userId: params.userId, + }; + return idempotencyKey( + createHash("sha256").update(canonicalize(payload)).digest("hex"), + ); + } +} diff --git a/packages/taskflow/src/execution/types.ts b/packages/taskflow/src/execution/types.ts new file mode 100644 index 00000000..b212111b --- /dev/null +++ b/packages/taskflow/src/execution/types.ts @@ -0,0 +1,374 @@ +import type { IdempotencyKey, UserId } from "@/core/branded"; +import type { StreamStats } from "@/delivery/types"; +import type { Task, TaskEvent, TaskExecutionOptions } from "@/domain"; +import type { FlushStats } from "@/flush"; +import type { GuardStats } from "@/guard/types"; +import type { EventLogStats } from "@/persistence"; + +/** + * Retry configuration for task execution + */ +export interface RetryConfig { + /** maximum number of retry attempts */ + maxAttempts: number; + /** initial delay in milliseconds before the first retry */ + initialDelayMs: number; + /** maximum delay in milliseconds between retries */ + maxDelayMs: number; + /** multiplier for exponential backoff */ + backoffMultiplier: number; +} + +/** + * Default retry configuration + */ + +export const DEFAULT_RETRY_CONFIG: RetryConfig = { + maxAttempts: 3, + initialDelayMs: 1000, + maxDelayMs: 30000, + backoffMultiplier: 2, +}; + +/** + * Configuration for TaskExecutor + */ +export interface ExecutorConfig { + /** interval between heartbeat emissions in milliseconds */ + heartbeatIntervalMs: number; + /** retry configuration */ + retry: RetryConfig; +} + +/** + * Default executor configuration + */ +export const DEFAULT_EXECUTOR_CONFIG: ExecutorConfig = { + heartbeatIntervalMs: 30_000, // 30 seconds + retry: DEFAULT_RETRY_CONFIG, +}; + +/** + * Subscriber interface for task events + */ +export interface TaskEventSubscriber { + /** called when a task event is emitted */ + onEvent(idempotencyKey: IdempotencyKey, event: TaskEvent): void; + /** called when a task completes (success, failure, or cancellation) */ + onComplete?(task: Task): void; +} + +/** + * Statistics for TaskExecutor + */ +export interface ExecutorStats { + /** current state */ + current: { + /** number of tasks currently executing */ + executing: number; + /** number of active heartbeat timers */ + heartbeatsActive: number; + }; + + /** outcome counters */ + outcomes: { + /** tasks completed successfully */ + completed: number; + /** tasks that failed */ + failed: number; + /** tasks that were cancelled */ + cancelled: number; + /** tasks where handler was not found */ + handlerMissing: number; + /** total tasks processed (sum of all outcomes) */ + total: number; + }; + + /** retry statistics */ + retries: { + /** total retry attempts */ + attempted: number; + /** successful retries (task succeeded after retry) */ + succeeded: number; + /** retries exhausted (max attempts reached) */ + exhausted: number; + }; + + /** timing information */ + timing: { + /** timestamp of last task start */ + lastStartAt?: number; + /** timestamp of last task completion */ + lastCompleteAt?: number; + }; + + /** debug information */ + debug: { + /** idempotency keys of currently executing tasks */ + executingTaskKeys: IdempotencyKey[]; + }; +} + +/** + * Configuration for TaskRecovery + */ +export interface RecoveryConfig { + /** whether background recovery is enabled */ + enabled: boolean; + /** interval between background recovery scans in milliseconds */ + backgroundPollIntervalMs: number; + /** threshold for considering a task stale (no heartbeat) in milliseconds */ + staleThresholdMs: number; + /** maximum tasks to recover per scan */ + batchSize: number; + /** timeout for waiting on a running task to complete in milliseconds */ + completionTimeoutMs: number; + /** heartbeat interval for determining if a task is alive in milliseconds */ + heartbeatIntervalMs: number; +} + +/** + * Default recovery configuration + */ +export const DEFAULT_RECOVERY_CONFIG: RecoveryConfig = { + enabled: true, + backgroundPollIntervalMs: 60_000, // 1 minute + staleThresholdMs: 120_000, // 2 minutes + batchSize: 10, + completionTimeoutMs: 60_000, // 1 minute + heartbeatIntervalMs: 30_000, // 30 seconds +}; + +/** + * Statistics for TaskRecovery + */ +export interface RecoveryStats { + /** configuration summary */ + config: { + enabled: boolean; + pollIntervalMs: number; + staleThresholdMs: number; + batchSize: number; + }; + + /** background scanner state */ + background: { + /** whether a scan is currently in progress */ + isScanning: boolean; + /** timestamp of last scan */ + lastScanAt?: number; + /** duration of last scan in milliseconds */ + lastScanDurationMs?: number; + /** timestamp of last error during scan */ + lastErrorAt?: number; + }; + + /** recovery outcome counters */ + outcomes: { + /** background tasks recovered */ + background: number; + /** user tasks recovered (via reconnection) */ + user: number; + /** tasks that failed during recovery */ + failed: number; + /** recovery method breakdown */ + byMethod: { + /** tasks recovered using smart recovery handler */ + smartRecovery: number; + /** tasks recovered by re-execution */ + reexecution: number; + }; + }; +} + +/** + * Task system status + */ +export type TaskSystemStatus = + | "starting" + | "running" + | "degraded" + | "shutting_down" + | "stopped"; + +/** + * Options for graceful shutdown + */ +export interface ShutdownOptions { + /** delete event log files after shutdown */ + deleteFiles?: boolean; + /** force immediate shutdown without waiting for tasks */ + force?: boolean; +} + +/** + * Configuration for graceful shutdown + */ +export interface ShutdownConfig { + /** maximum time to wait for running tasks in milliseconds */ + gracePeriodMs: number; + /** interval for polling task completion in milliseconds */ + pollIntervalMs: number; +} + +/** + * default shutdown configuration + */ +export const DEFAULT_SHUTDOWN_CONFIG: ShutdownConfig = { + gracePeriodMs: 30_000, + pollIntervalMs: 100, +}; + +/** + * Comprehensive statistics for the entire TaskSystem + */ +export interface TaskSystemStats { + /** system health and lifecycle */ + system: { + status: TaskSystemStatus; + startedAt?: number; + uptimeMs?: number; + }; + + /** high-level task counts */ + tasks: { + /** tasks in pending queue */ + queued: number; + /** tasks waiting for execution slot */ + waiting: number; + /** tasks currently executing */ + executing: number; + /** tasks in dead letter queue */ + inDLQ: number; + /** total in-flight (queued + waiting + executing) */ + inFlight: number; + /** lifetime completed tasks */ + totalCompleted: number; + /** lifetime failed tasks */ + totalFailed: number; + /** lifetime cancelled tasks */ + totalCancelled: number; + /** success rate (completed / (completed + failed)) */ + successRate?: number; + }; + + /** scheduler state */ + scheduler: { + /** tick interval in milliseconds */ + tickIntervalMs: number; + /** whether a tick is currently active */ + isTickActive: boolean; + }; + + /** registry information */ + registry: { + /** number of registered templates */ + templates: number; + /** number of registered handlers */ + handlers: number; + }; + + /** component-specific statistics */ + components: { + guard: GuardStats; + executor: ExecutorStats; + stream: StreamStats; + eventLog: EventLogStats; + flush: FlushStats; + recovery: RecoveryStats; + }; +} + +/** + * Parameters for running a task + */ +export interface TaskRunParams { + /** user id for user-initiated tasks */ + userId: UserId | null; + /** task input data */ + input: unknown; + /** custom idempotency key (optional, auto-generated if not provided) */ + idempotencyKey?: IdempotencyKey; + /** custom execution options (optional) */ + executionOptions?: TaskExecutionOptions; +} + +/** + * Parameters for recovering a task + */ +export interface TaskRecoveryParams { + /** idempotency key of the task to recover */ + idempotencyKey: IdempotencyKey; + /** user id requesting recovery (for authorization) */ + userId: UserId | null; +} + +/** + * Options for streaming task events + */ +export interface TaskStreamOptions { + /** last received sequence number for reconnection */ + lastSeq?: number; + /** abort signal for cancellation */ + signal?: AbortSignal; +} + +/** + * Task template returned by registerTask + */ +export interface TaskTemplate { + /** template name */ + name: string; + /** run a new task */ + run: (params: TaskRunParams) => Promise; + /** recover an existing task */ + recover: (params: TaskRecoveryParams) => Promise; +} + +/** + * Merge helper for executor config + */ + +export function mergeExecutorConfig( + partial?: Partial, +): ExecutorConfig { + if (!partial) return DEFAULT_EXECUTOR_CONFIG; + + return { + heartbeatIntervalMs: + partial.heartbeatIntervalMs ?? + DEFAULT_EXECUTOR_CONFIG.heartbeatIntervalMs, + retry: { + ...DEFAULT_RETRY_CONFIG, + ...partial.retry, + }, + }; +} + +/** + * Merge helper for recovery config + */ +export function mergeRecoveryConfig( + partial?: Partial, +): RecoveryConfig { + if (!partial) return DEFAULT_RECOVERY_CONFIG; + + return { + ...DEFAULT_RECOVERY_CONFIG, + ...partial, + }; +} + +/** + * Merge helper for shutdown config + */ +export function mergeShutdownConfig( + partial?: Partial, +): ShutdownConfig { + if (!partial) return DEFAULT_SHUTDOWN_CONFIG; + + return { + ...DEFAULT_SHUTDOWN_CONFIG, + ...partial, + }; +} diff --git a/packages/taskflow/src/flush/flush-manager.ts b/packages/taskflow/src/flush/flush-manager.ts new file mode 100644 index 00000000..cec7b958 --- /dev/null +++ b/packages/taskflow/src/flush/flush-manager.ts @@ -0,0 +1,333 @@ +import type { ChildProcess } from "node:child_process"; +import { fork } from "node:child_process"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { noopHooks, type TaskSystemHooks } from "@/observability"; +import type { RepositoryConfig } from "@/persistence"; +import { + DEFAULT_FLUSH_CONFIG, + type FlushConfig, + type FlushStats, + type FlushStatus, + type FlushWorkerStats, + type IPCCommand, + type IPCMessage, +} from "./types"; + +/** + * Full configuration for the Flush manager + */ +export interface FlushManagerConfig extends FlushConfig { + repository: RepositoryConfig; +} + +/** + * Flush manager spawns and monitors a worker process for flushing + * events from the EventLog to the repository. + */ +export class Flush { + private readonly config: Required> & { + repository: RepositoryConfig; + }; + private readonly hooks: TaskSystemHooks; + + private worker: ChildProcess | null = null; + private healthCheckTimer: ReturnType | null = null; + private restartCount: number = 0; + private isShuttingDown: boolean = false; + private lastStats: FlushWorkerStats | null = null; + + constructor( + config: Partial & { repository: RepositoryConfig }, + hooks: TaskSystemHooks = noopHooks, + ) { + this.config = { + ...DEFAULT_FLUSH_CONFIG, + ...config, + repository: config.repository, + }; + this.hooks = hooks; + } + + /** + * Initialize the flush manager + * Spawns the worker process and starts health checks + */ + async initialize(): Promise { + await this.spawnWorker(); + this.startHealthCheck(); + + this.hooks.log({ + severity: "info", + message: "Flush manager initialized", + attributes: { + pid: this.worker?.pid, + }, + }); + } + + /** + * Shutdown the flush manager and worker + * @param timeoutMs - Maximum time to wait for graceful shutdown + */ + async shutdown(timeoutMs: number = 30_000): Promise { + this.isShuttingDown = true; + this.stopHealthCheck(); + + if (!this.worker || !this.isAlive()) { + return; + } + + return new Promise((resolve) => { + const timeout = setTimeout(() => { + this.hooks.log({ + severity: "warn", + message: "Worker did not exit in time, sending SIGKILL", + }); + this.worker?.kill("SIGKILL"); + resolve(); + }, timeoutMs); + + this.worker?.once("exit", () => { + clearTimeout(timeout); + resolve(); + }); + + this.sendCommand({ type: "shutdown", payload: { timeoutMs } }); + }); + } + + /** + * Check if the worker process is alive + */ + isAlive(): boolean { + if (!this.worker) return false; + + try { + if (!this.worker.pid) return false; + + // sending signal 0 tests if process exists without killing it + process.kill(this.worker.pid, 0); + return true; + } catch { + return false; + } + } + + /** + * Get worker stats via IPC (async, fetches fresh stats) + * Returns cached stats if worker doesn't respond in time + */ + async getWorkerStats(): Promise { + if (!this.isAlive()) return null; + + return new Promise((resolve) => { + const timeout = setTimeout(() => { + resolve(this.lastStats); + }, 1000); + + const handler = (message: IPCMessage) => { + if (message.type === "stats") { + clearTimeout(timeout); + this.worker?.off("message", handler); + this.lastStats = message.payload; + resolve(message.payload); + } + }; + + this.worker?.on("message", handler); + this.sendCommand({ type: "get-stats" }); + }); + } + + /** + * Get combined process and worker stats (sync, uses cached stats) + */ + getStats(): FlushStats { + const workerStats = this.lastStats; + + return { + process: { + isAlive: this.isAlive(), + pid: this.worker?.pid ?? null, + restartCount: this.restartCount, + isShuttingDown: this.isShuttingDown, + }, + worker: workerStats + ? { + isRunning: true, + flushCount: workerStats.flushCount, + errorCount: workerStats.errorCount, + consecutiveErrors: workerStats.consecutiveErrors, + totalEntriesFlushed: workerStats.totalEntriesFlushed, + lastFlushAt: workerStats.lastFlushAt, + lastErrorAt: workerStats.lastErrorAt, + } + : null, + }; + } + + /** + * Get current status (legacy method, use getStats instead) + */ + getStatus(): FlushStatus { + return { + isAlive: this.isAlive(), + isShuttingDown: this.isShuttingDown, + restartCount: this.restartCount, + pid: this.worker?.pid ?? null, + lastStats: this.lastStats, + }; + } + + /** + * Spawn the worker process + */ + private async spawnWorker(): Promise { + return new Promise((resolve, reject) => { + // get the path to the worker entry file + const __dirname = path.dirname(fileURLToPath(import.meta.url)); + const workerPath = path.join(__dirname, "flush-worker-entry.js"); + + this.worker = fork(workerPath, [], { + env: { + ...process.env, + FLUSH_CONFIG: JSON.stringify(this.config), + }, + stdio: ["pipe", "pipe", "pipe", "ipc"], + }); + + // forward worker output for debugging + this.worker.stdout?.on("data", (data: Buffer) => { + process.stdout.write(`[FlushWorker] ${data.toString()}`); + }); + + this.worker.stderr?.on("data", (data: Buffer) => { + process.stderr.write(`[FlushWorker] ${data.toString()}`); + }); + + // wait for ready message + const onReady = (message: IPCMessage) => { + if (message.type === "ready") { + this.worker?.off("message", onReady); + resolve(); + } else if (message.type === "error") { + this.worker?.off("message", onReady); + reject(new Error(message.payload)); + } + }; + this.worker?.on("message", onReady); + + // handle worker errors + this.worker?.on("error", (error) => { + this.hooks.log({ + severity: "error", + message: "Worker process error", + error, + }); + if (!this.isShuttingDown) { + this.handleWorkerExit(); + } + }); + + // handle worker exit + this.worker?.on("exit", (code, signal) => { + this.hooks.log({ + severity: "warn", + message: `Worker exited with code ${code} and signal ${signal}`, + }); + if (!this.isShuttingDown) { + this.handleWorkerExit(); + } + }); + + // timeout for worker startup + setTimeout(() => { + if (!this.isAlive()) { + reject(new Error("Worker failed to start")); + } + }, 5000); + }); + } + + /** + * Handle unexpected worker exit - attempt restart + */ + private async handleWorkerExit(): Promise { + if (this.isShuttingDown) return; + + if (this.restartCount >= this.config.maxRestarts) { + this.hooks.log({ + severity: "error", + message: "Max worker restarts reached, giving up", + attributes: { + restartCount: this.restartCount, + maxRestarts: this.config.maxRestarts, + }, + }); + return; + } + + this.restartCount++; + + this.hooks.log({ + severity: "info", + message: `Restarting worker (${this.restartCount}/${this.config.maxRestarts})`, + }); + + await new Promise((resolve) => + setTimeout(resolve, this.config.restartDelayMs), + ); + + try { + await this.spawnWorker(); + this.hooks.log({ + severity: "info", + message: "Worker restarted successfully", + }); + } catch (error) { + this.hooks.log({ + severity: "error", + message: "Failed to restart worker", + error: error instanceof Error ? error : new Error(String(error)), + }); + } + } + + /** + * Start periodic health checks + */ + private startHealthCheck(): void { + this.healthCheckTimer = setInterval(() => { + if (!this.isAlive() && !this.isShuttingDown) { + this.hooks.log({ + severity: "warn", + message: "Health check detected dead worker, restarting", + }); + this.handleWorkerExit(); + } + }, this.config.healthCheckIntervalMs); + + // don't keep process alive just for health checks + this.healthCheckTimer.unref(); + } + + /** + * Stop health checks + */ + private stopHealthCheck(): void { + if (this.healthCheckTimer) { + clearInterval(this.healthCheckTimer); + this.healthCheckTimer = null; + } + } + + /** + * Send an IPC command to the worker + */ + private sendCommand(command: IPCCommand): void { + if (this.worker?.connected) { + this.worker.send(command); + } + } +} diff --git a/packages/taskflow/src/flush/flush-worker-entry.ts b/packages/taskflow/src/flush/flush-worker-entry.ts new file mode 100644 index 00000000..a8751f0d --- /dev/null +++ b/packages/taskflow/src/flush/flush-worker-entry.ts @@ -0,0 +1,103 @@ +/** + * FlushWorker entry point for the forked process + * + * This file is executed in a child process spawned by the Flush manager. + * It handles IPC communication and signal handling. + */ + +import { createRepository, type RepositoryConfig } from "@/persistence"; +import type { FlushConfig, IPCCommand, IPCMessage } from "./types"; +import { noopHooks } from "@/observability"; +import { FlushWorker } from "./flush-worker"; + +interface FlushWorkerConfig extends FlushConfig { + repository: RepositoryConfig; +} + +/** + * Send a IPC message to the parent process + */ +function send(message: IPCMessage): void { + if (process.send) process.send(message); +} + +/** + * Main entry point for the flush worker process + */ +async function main(): Promise { + // parse configuration from environment + const configJson = process.env.FLUSH_CONFIG; + if (!configJson) { + send({ + type: "error", + payload: "FLUSH_CONFIG environment variable not set", + }); + process.exit(1); + } + + let config: FlushWorkerConfig; + try { + config = JSON.parse(configJson) as FlushWorkerConfig; + } catch (error) { + send({ type: "error", payload: `Failed to parse FLUSH_CONFIG: ${error}` }); + process.exit(1); + } + + // create repository (hooks not available in worker process) + const repository = await createRepository(config.repository, noopHooks); + const worker = new FlushWorker(config, repository, noopHooks); + + // handle IPC commands from parent + process.on("message", async (command: IPCCommand) => { + switch (command.type) { + case "get-stats": + send({ type: "stats", payload: worker.getStats() }); + break; + case "shutdown": + await worker.gracefulShutdown(command.payload.timeoutMs); + send({ type: "shutdown-complete" }); + process.exit(0); + break; + } + }); + + // handle signals + process.on("SIGTERM", async () => { + await worker.gracefulShutdown(); + process.exit(0); + }); + + process.on("SIGINT", async () => { + await worker.gracefulShutdown(); + process.exit(0); + }); + + // handle uncaught errors + process.on("uncaughtException", (error) => { + console.error("[FlushWorker] Uncaught exception:", error); + send({ type: "error", payload: String(error) }); + process.exit(1); + }); + + process.on("unhandledRejection", (reason) => { + console.error("[FlushWorker] Unhandled rejection:", reason); + send({ type: "error", payload: String(reason) }); + process.exit(1); + }); + + // start the worker + try { + await worker.start(); + send({ type: "ready" }); + } catch (error) { + console.error("[FlushWorker] Failed to start:", error); + send({ type: "error", payload: String(error) }); + process.exit(1); + } +} + +// execute main +main().catch((error) => { + console.error("[FlushWorker] Fatal error:", error); + process.exit(1); +}); diff --git a/packages/taskflow/src/flush/flush-worker.ts b/packages/taskflow/src/flush/flush-worker.ts new file mode 100644 index 00000000..16e86e8f --- /dev/null +++ b/packages/taskflow/src/flush/flush-worker.ts @@ -0,0 +1,364 @@ +import fs from "node:fs/promises"; +import { + noopHooks, + TaskAttributes, + TaskMetrics, + TaskSpans, + type TaskSystemHooks, +} from "@/observability"; +import { EventLog, type TaskRepository } from "@/persistence"; +import { + DEFAULT_FLUSH_CONFIG, + type FlushConfig, + type FlushWorkerRuntimeStats, + type FlushWorkerStats, +} from "./types"; + +/** + * FlushWorker - Reads events from EventLog and flushes to repository + */ +export class FlushWorker { + private readonly config: Required>; + private readonly repository: TaskRepository; + private readonly hooks: TaskSystemHooks; + private readonly eventLog: EventLog; + + private checkpoint: number = 0; + private isShuttingDown: boolean = false; + private _isRunning: boolean = false; + private flushInterval: ReturnType | null = null; + private circuitBreakerOpenUntil: number | null = null; + + private stats: FlushWorkerStats = { + flushCount: 0, + errorCount: 0, + consecutiveErrors: 0, + totalEntriesFlushed: 0, + lastFlushAt: null, + lastErrorAt: null, + }; + + constructor( + config: Partial, + repository: TaskRepository, + hooks: TaskSystemHooks = noopHooks, + ) { + this.config = { ...DEFAULT_FLUSH_CONFIG, ...config }; + this.repository = repository; + this.hooks = hooks; + this.eventLog = new EventLog( + { + eventLogPath: this.config.eventLogPath, + }, + hooks, + ); + } + + /** + * Start the flush worker + * - Initialize repository + * - Load checkpoint from file + * - Start periodic flush interval + */ + async start(): Promise { + await this.repository.initialize(); + this.checkpoint = await this.loadCheckpoint(); + + this.flushInterval = setInterval(async () => { + await this.flush(); + }, this.config.flushIntervalMs); + + // don't keep process alive just for flush interval + this.flushInterval.unref(); + + this._isRunning = true; + + this.hooks.log({ + severity: "info", + message: "FlushWorker started", + attributes: { + checkpoint: this.checkpoint, + flushInterval: this.config.flushIntervalMs, + }, + }); + } + + /** + * Stop the flush worker loop + */ + stop(): void { + if (this.flushInterval) { + clearInterval(this.flushInterval); + this.flushInterval = null; + } + this._isRunning = false; + + this.hooks.log({ + severity: "info", + message: "FlushWorker stopped", + attributes: { + checkpoint: this.checkpoint, + }, + }); + } + + /** + * Graceful shutdown - drain remaining events before stopping + * @param timeoutMs - Maximum time to wait for draining + */ + async gracefulShutdown(timeoutMs: number = 30_000): Promise { + this.isShuttingDown = true; + this.stop(); + + const startTime = Date.now(); + + // drain remaining events + while (Date.now() - startTime < timeoutMs) { + const entries = await this.eventLog.readEntriesFromCheckpoint( + this.checkpoint, + ); + + if (entries.length === 0) break; + + try { + await this.flush(); + } catch (error) { + this.hooks.log({ + severity: "error", + message: "Error during graceful shutdown flush", + error: error instanceof Error ? error : new Error(String(error)), + }); + } + + // small delay between flush attempts + await new Promise((resolve) => setTimeout(resolve, 100)); + } + + await this.repository.close(); + + this.hooks.log({ + severity: "info", + message: "FlushWorker shutdown complete", + attributes: { + totalEntriesFlushed: this.stats.totalEntriesFlushed, + durationMs: Date.now() - startTime, + }, + }); + } + + /** + * Flush entries from EventLog to repository + */ + async flush(): Promise { + // skip if not running (unless shutting down - need to drain) + if (!this.isShuttingDown && !this._isRunning) return; + + // skip if circuit breaker is open + if (this.isCircuitOpen()) return; + + const startTime = Date.now(); + + return this.hooks.withSpan( + TaskSpans.FLUSH_BATCH, + { + [TaskAttributes.REPOSITORY_TYPE]: this.repository.type, + }, + async (span) => { + let batch = await this.eventLog.readEntriesFromCheckpoint( + this.checkpoint, + ); + + if (batch.length === 0) return; + + // limit batch size + if (batch.length > this.config.maxBatchSize) { + batch = batch.slice(0, this.config.maxBatchSize); + } + + span.setAttribute(TaskAttributes.FLUSH_BATCH_SIZE, batch.length); + + // retry loop with exponential backoff + for ( + let attempt = 1; + attempt <= this.config.maxFlushRetries; + attempt++ + ) { + try { + await this.repository.executeBatch(batch); + await this.saveCheckpoint(this.checkpoint + batch.length); + + // update stats on success + this.stats.lastFlushAt = Date.now(); + this.stats.flushCount++; + this.stats.totalEntriesFlushed += batch.length; + this.stats.consecutiveErrors = 0; + + // record metrics + this.hooks.incrementCounter(TaskMetrics.FLUSH_BATCHES, 1, { + [TaskAttributes.REPOSITORY_TYPE]: this.repository.type, + }); + + this.hooks.incrementCounter( + TaskMetrics.FLUSH_ENTRIES, + batch.length, + { + [TaskAttributes.REPOSITORY_TYPE]: this.repository.type, + }, + ); + + this.hooks.recordHistogram( + TaskMetrics.FLUSH_DURATION_MS, + Date.now() - startTime, + { + [TaskAttributes.REPOSITORY_TYPE]: this.repository.type, + }, + ); + + this.hooks.recordHistogram( + TaskMetrics.FLUSH_BATCH_SIZE, + batch.length, + { + [TaskAttributes.REPOSITORY_TYPE]: this.repository.type, + }, + ); + + span.setStatus("ok"); + return; + } catch (error) { + this.hooks.log({ + severity: "error", + message: `Flush attempt ${attempt}/${this.config.maxFlushRetries} failed`, + error: error instanceof Error ? error : new Error(String(error)), + attributes: { + attempt, + maxAttempts: this.config.maxFlushRetries, + }, + }); + + this.stats.errorCount++; + this.stats.lastErrorAt = Date.now(); + this.stats.consecutiveErrors++; + + this.hooks.incrementCounter(TaskMetrics.FLUSH_ERRORS, 1, { + [TaskAttributes.REPOSITORY_TYPE]: this.repository.type, + }); + + if (attempt < this.config.maxFlushRetries) { + // exponential backoff + const delay = this.config.retryBaseDelayMs * 2 ** (attempt - 1); + await new Promise((resolve) => setTimeout(resolve, delay)); + } + } + } + + // all retries exhausted - check circuit breaker + if (this.stats.consecutiveErrors >= this.config.circuitBreakerThreshold) + this.openCircuitBreaker(); + + span.setStatus("error", "All flush retries exhausted"); + }, + ); + } + + /** + * Whether the worker is currently running + */ + get isRunning(): boolean { + return this._isRunning; + } + + /** + * Get current worker statistics + */ + getStats(): FlushWorkerRuntimeStats { + return { + ...this.stats, + isRunning: this._isRunning, + isShuttingDown: this.isShuttingDown, + isCircuitOpen: this.isCircuitOpen(), + }; + } + + /** + * Save checkpoint atomically using write-then-rename + */ + private async saveCheckpoint(newCheckpoint: number): Promise { + const checkpointPath = this.getCheckpointPath(); + const tempPath = `${checkpointPath}.temp`; + + // write new checkpoint + await fs.writeFile(tempPath, newCheckpoint.toString(), "utf-8"); + await fs.rename(tempPath, checkpointPath); + + this.checkpoint = newCheckpoint; + } + + /** + * Load checkpoint from file, returns 0 if not found or invalid + */ + private async loadCheckpoint(): Promise { + const checkpointPath = this.getCheckpointPath(); + + try { + const content = await fs.readFile(checkpointPath, "utf-8"); + const parsed = parseInt(content.trim(), 10); + + if (Number.isNaN(parsed) || parsed < 0) { + this.hooks.log({ + severity: "warn", + message: `Invalid checkpoint value: ${content.trim()}, resetting to 0`, + }); + return 0; + } + + return parsed; + } catch (error) { + // file doesn't exist or can't be read + if (error instanceof Error && "code" in error && error.code === "ENOENT") + return 0; + throw error; + } + } + + /** + * Open the circuit breaker to block flushes temporarily + */ + private openCircuitBreaker(): void { + this.circuitBreakerOpenUntil = + Date.now() + this.config.circuitBreakerDurationMs; + + this.hooks.log({ + severity: "warn", + message: `Circuit breaker opened, blocking flushes for ${this.config.circuitBreakerDurationMs}ms`, + attributes: { + consecutiveErrors: this.stats.consecutiveErrors, + threshold: this.config.circuitBreakerThreshold, + }, + }); + } + + /** + * Check if circuit breaker is currently open + */ + private isCircuitOpen(): boolean { + if (this.circuitBreakerOpenUntil === null) return false; + + if (Date.now() >= this.circuitBreakerOpenUntil) { + this.circuitBreakerOpenUntil = null; + this.hooks.log({ + severity: "info", + message: "Circuit breaker reset, resuming flushes", + }); + return false; + } + + return true; + } + + /** + * Get the checkpoint file path + */ + private getCheckpointPath(): string { + return `${this.config.eventLogPath}.flush-checkpoint`; + } +} diff --git a/packages/taskflow/src/flush/index.ts b/packages/taskflow/src/flush/index.ts new file mode 100644 index 00000000..2c684627 --- /dev/null +++ b/packages/taskflow/src/flush/index.ts @@ -0,0 +1,12 @@ +export { Flush, type FlushManagerConfig } from "./flush-manager"; +export { FlushWorker } from "./flush-worker"; + +export { + DEFAULT_FLUSH_CONFIG, + type FlushConfig, + type FlushStats, + type FlushStatus, + type FlushWorkerStats, + type IPCCommand, + type IPCMessage, +} from "./types"; diff --git a/packages/taskflow/src/flush/types.ts b/packages/taskflow/src/flush/types.ts new file mode 100644 index 00000000..c4687b00 --- /dev/null +++ b/packages/taskflow/src/flush/types.ts @@ -0,0 +1,131 @@ +import type { RepositoryConfig } from "@/persistence/repository"; + +/** + * Flush configuration options + */ +export interface FlushConfig { + /** interval between flush attempts in milliseconds */ + flushIntervalMs: number; + /** path to the event log file */ + eventLogPath: string; + /** maximum number of entries to flush per batch */ + maxBatchSize: number; + /** maximum number of retry attempts per flush */ + maxFlushRetries: number; + /** base delay for exponential backoff in milliseconds */ + retryBaseDelayMs: number; + /** duration to keep circuit breaker open in milliseconds */ + circuitBreakerDurationMs: number; + /** number of consecutive errors before opening circuit breaker */ + circuitBreakerThreshold: number; + /** interval between health checks in milliseconds */ + healthCheckIntervalMs: number; + /** maximum number of worker restarts */ + maxRestarts: number; + /** delay between restart attempts in milliseconds */ + restartDelayMs: number; + /** repository configuration */ + repository: RepositoryConfig; +} + +/** + * Default flush configuration values + */ + +export const DEFAULT_FLUSH_CONFIG: Required> = { + flushIntervalMs: 1000, + eventLogPath: "./.taskflow/event.log", + maxBatchSize: 1000, + maxFlushRetries: 3, + retryBaseDelayMs: 100, + circuitBreakerDurationMs: 30_000, + circuitBreakerThreshold: 5, + healthCheckIntervalMs: 5000, + maxRestarts: 3, + restartDelayMs: 1000, +}; + +/** + * Statistics tracked by the flush worker + */ +export interface FlushWorkerStats { + /** number of successful flush operations */ + flushCount: number; + /** total number of errors encountered */ + errorCount: number; + /** number of consecutive errors (resets on success) */ + consecutiveErrors: number; + /** total number of entries flushed to repository */ + totalEntriesFlushed: number; + /** timestamp of last successful flush */ + lastFlushAt: number | null; + /** timestamp of last error */ + lastErrorAt: number | null; +} + +/** + * Extended worker stats including runtime state + */ +export interface FlushWorkerRuntimeStats extends FlushWorkerStats { + /** whether the worker is running */ + isRunning: boolean; + /** whether the worker is shutting down */ + isShuttingDown: boolean; + /** whether the circuit breaker is open */ + isCircuitOpen: boolean; +} + +/** + * Messages sent from worker to manager + */ +export type IPCMessage = + | { type: "ready" } + | { type: "stats"; payload: FlushWorkerRuntimeStats } + | { type: "shutdown-complete" } + | { type: "error"; payload: string }; + +/** + * Commands sent from manager to worker + */ +export type IPCCommand = + | { type: "shutdown"; payload: { timeoutMs: number } } + | { type: "get-stats" }; + +/** + * Manager's view of worker status + */ +export interface FlushStatus { + /** whether the worker process is alive */ + isAlive: boolean; + /** whether the manager is shutting down */ + isShuttingDown: boolean; + /** number of times the worker has been restarted */ + restartCount: number; + /** PID of the worker process */ + pid: number | null; + /** last known worker stats */ + lastStats: FlushWorkerStats | null; +} + +/** + * Combined process and worker statistics + */ +export interface FlushStats { + /** process-level statistics */ + process: { + isAlive: boolean; + pid: number | null; + restartCount: number; + isShuttingDown: boolean; + }; + /** worker-level statistics (null if worker not running) */ + worker: { + isRunning: boolean; + flushCount: number; + errorCount: number; + consecutiveErrors: number; + totalEntriesFlushed: number; + lastFlushAt: number | null; + lastErrorAt: number | null; + } | null; +} diff --git a/packages/taskflow/src/guard/types.ts b/packages/taskflow/src/guard/types.ts index 351d6525..65d2e832 100644 --- a/packages/taskflow/src/guard/types.ts +++ b/packages/taskflow/src/guard/types.ts @@ -1,4 +1,4 @@ -import { IdempotencyKey, TaskName, UserId } from "@/core/branded"; +import type { IdempotencyKey, TaskName, UserId } from "@/core/branded"; import type { Task } from "@/domain"; /** diff --git a/packages/taskflow/src/index.test.ts b/packages/taskflow/src/index.test.ts deleted file mode 100644 index dd3acf8f..00000000 --- a/packages/taskflow/src/index.test.ts +++ /dev/null @@ -1,8 +0,0 @@ -import { describe, expect, it } from "vitest"; -import { hello } from "./index"; - -describe("hello", () => { - it("should return a greeting", () => { - expect(hello("World")).toBe("Hello, World!"); - }); -}); diff --git a/packages/taskflow/src/index.ts b/packages/taskflow/src/index.ts index bbc74285..4bd4641d 100644 --- a/packages/taskflow/src/index.ts +++ b/packages/taskflow/src/index.ts @@ -1,3 +1,193 @@ -export function hello(name: string): string { - return `Hello, ${name}!`; -} +/** + * @databricks/taskflow - Durable task execution system + * + * A standalone, zero-dependency task execution library with: + * - Write-ahead log for durability + * - Event streaming with reconnection support + * - Retry with exponential backoff + * - Dead letter queue for failed tasks + * - Background task recovery + */ + +export { + type EventId, + eventId, + type IdempotencyKey, + idempotencyKey, + type TaskId, + type TaskName, + taskId, + taskName, + type UserId, + userId, +} from "./core/branded"; + +export { + BackpressureError, + ConfigValidationError, + ConflictError, + type ErrorCode, + ErrorCodes, + type ErrorContext, + EventLogError, + type HTTP429Response, + InitializationError, + InvalidPathError, + isRetryableError, + isTaskSystemError, + NotFoundError, + RepositoryError, + RetryExhaustedError, + SlotTimeoutError, + StreamOverflowError, + TaskStateError, + TaskSystemError, + ValidationError, +} from "./core/errors"; +export { + isTerminalStatus, + isValidTransition, + type TaskStatus, + type TaskType, + VALID_TRANSITIONS, +} from "./core/types"; +export { RingBuffer } from "./delivery/ring-buffer"; +// Delivery +export { StreamManager } from "./delivery/stream"; +export type { + StreamConfig, + StreamStats, + StreamTaskEvent, + TaskStream, + TaskStreamOptions, +} from "./delivery/types"; +// Domain +export { + createTaskEvent, + type EventLogEntry, + type EventLogEntryType, + isRecoveryRelevant, + shouldStoreInTaskEvents, + type TaskEvent, + type TaskEventContext, + type TaskEventInput, + // Events + type TaskEventType, + toEventLogEntry, + toEventLogEntryType, + toTaskEventType, +} from "./domain/events"; +export { + type GeneratorTaskHandler, + isAsyncGenerator, + type PromiseTaskHandler, + type RecoveryContext, + type RecoveryHandler, + type TaskDefinition, + type TaskHandler, + type TaskHandlerContext, + type TaskHandlerResult, +} from "./domain/handler"; +export { Task } from "./domain/task"; +export type { + StoredEventType, + TaskCreationParams, + TaskEventRecord, + TaskExecutionOptions, + TaskJSON, + TaskRecord, +} from "./domain/types"; +export { TaskExecutor, type TaskExecutorDeps } from "./execution/executor"; +export { TaskRecovery, type TaskRecoveryDeps } from "./execution/recovery"; +export { + TaskSystem, + TaskSystem as createTaskSystem, + type TaskSystemConfig, +} from "./execution/system"; +export { + DEFAULT_EXECUTOR_CONFIG, + DEFAULT_RECOVERY_CONFIG, + DEFAULT_RETRY_CONFIG, + DEFAULT_SHUTDOWN_CONFIG, + type ExecutorConfig, + type ExecutorStats, + type RecoveryConfig, + type RecoveryStats, + type RetryConfig, + type ShutdownConfig, + type ShutdownOptions, + type TaskEventSubscriber, + type TaskRecoveryParams, + type TaskRunParams, + type TaskSystemStats, + type TaskSystemStatus, + type TaskTemplate, +} from "./execution/types"; +export { Flush } from "./flush/flush-manager"; +export { FlushWorker } from "./flush/flush-worker"; +export type { + FlushConfig, + FlushStats, + FlushStatus, + FlushWorkerRuntimeStats, + FlushWorkerStats, + IPCCommand, + IPCMessage, +} from "./flush/types"; +export { Backpressure } from "./guard/backpressure"; +export { DeadLetterQueue } from "./guard/dlq"; +export { Guard } from "./guard/guard"; +export { SlotManager } from "./guard/slot-manager"; +export type { + AdmissionStats, + BackpressureConfig, + DLQConfig, + DLQEntry, + DLQEvent, + DLQEventListener, + DLQEventType, + DLQStats, + GuardConfig, + GuardStats, + RecoverySlotConfig, + RecoverySlotStats, + SlotManagerConfig, + SlotStats, +} from "./guard/types"; +export { + defaultValidator, + TaskValidator, + validateInputSchema, + validateTaskInput, +} from "./guard/validator"; +export type { + Attributes, + LogRecord, + LogSeverity, + Span, + SpanCallback, + SpanContext, + SpanStatus, + TaskSystemHooks, +} from "./observability"; +export { + createHooks, + NoopSpan, + noopHooks, + TaskAttributes, + TaskMetrics, + TaskSpans, +} from "./observability"; +export { EventLog } from "./persistence/event-log"; +export { createRepository } from "./persistence/repository"; +export type { + BaseRepositoryConfig, + RepositoryType, + StoredEvent, + TaskRepository, +} from "./persistence/repository/types"; +export type { + EventLogConfig, + EventLogEvent, + EventLogStats, +} from "./persistence/types"; diff --git a/packages/taskflow/src/persistence/event-log.ts b/packages/taskflow/src/persistence/event-log.ts index cec86dec..8c0c3ba6 100644 --- a/packages/taskflow/src/persistence/event-log.ts +++ b/packages/taskflow/src/persistence/event-log.ts @@ -4,7 +4,7 @@ import path from "node:path"; import { canonicalize } from "json-canonicalize"; import { EventLogError } from "@/core/errors"; import type { TaskStatus } from "@/core/types"; -import type { EventLogEntry, TaskEvent } from "@/domain"; +import { type EventLogEntry, type TaskEvent, toEventLogEntry } from "@/domain"; import { noopHooks, TaskAttributes, @@ -47,6 +47,8 @@ export class EventLog { private entriesWritten = 0; /** count of malformed entries skipped during reads */ private malformedEntriesSkipped = 0; + /** current file size in bytes (tracked in memory for reliable rotation checks) */ + private currentFileSize = 0; constructor( config: Partial, @@ -69,6 +71,14 @@ export class EventLog { await fs.mkdir(dir, { recursive: true }); this.fileHandle = await fs.open(this.config.eventLogPath, "a"); + // initialize file size from existing file + try { + const stats = await fs.stat(this.config.eventLogPath); + this.currentFileSize = stats.size; + } catch { + this.currentFileSize = 0; + } + // load or create checkpoint const previousSeq = await this.loadCheckpoint(); this.currentSeq = previousSeq; @@ -121,7 +131,9 @@ export class EventLog { // write to file const line = `${JSON.stringify(eventPayload)}\n`; + const lineBytes = Buffer.byteLength(line, "utf8"); await this.fileHandle.write(line); + this.currentFileSize += lineBytes; if (fsync) await this.fileHandle.sync(); @@ -171,80 +183,22 @@ export class EventLog { * Converts TaskEvent to EventLogEntry format */ async appendEvent(event: TaskEvent): Promise { - const base = { - taskId: event.taskId, - idempotencyKey: event.idempotencyKey, - name: event.name, - userId: event.userId ?? null, - taskType: event.taskType, - timestamp: event.timestamp ?? Date.now(), - }; - - switch (event.type) { - case "created": - await this.appendEntry( - { - ...base, - type: "TASK_CREATED", - input: event.input, - executionOptions: event.executionOptions, - }, - true, - ); - break; - case "start": - await this.appendEntry( - { - ...base, - type: "TASK_START", - }, - true, - ); - break; - case "progress": - await this.appendEntry({ - ...base, - type: "TASK_PROGRESS", - payload: event.payload, - }); - break; - case "complete": - await this.appendEntry( - { - ...base, - type: "TASK_COMPLETE", - result: event.result, - }, - true, - ); - break; - case "heartbeat": - await this.appendEntry({ - ...base, - type: "TASK_HEARTBEAT", - }); - break; - case "error": - await this.appendEntry( - { - ...base, - type: "TASK_CANCELLED", - error: event.error ?? "Unknown reason", - }, - true, - ); - break; - case "custom": - await this.appendEntry({ - ...base, - type: "TASK_CUSTOM", - payload: { - eventName: event.eventName, - ...event.payload, - }, - }); - break; + const entry = toEventLogEntry(event); + if (!entry) { + return; } + + // critical events to be written to disk immediately + const criticalEvents = [ + "TASK_CREATED", + "TASK_START", + "TASK_COMPLETE", + "TASK_ERROR", + "TASK_CANCELLED", + ]; + const fsync = criticalEvents.includes(entry.type); + + await this.appendEntry(entry, fsync); } /** @@ -278,13 +232,16 @@ export class EventLog { * Check if log file should be rotated */ async shouldRotateEventLog(): Promise { + // use tracked file size for reliable size-based rotation + if (this.currentFileSize >= this.config.maxSizeBytesPerFile) { + return true; + } + + // check age-based rotation using fs.stat try { const stats = await fs.stat(this.config.eventLogPath); const age = Date.now() - stats.mtime.getTime(); - return ( - stats.size >= this.config.maxSizeBytesPerFile || - age >= this.config.maxAgePerFile - ); + return age >= this.config.maxAgePerFile; } catch { return false; } @@ -407,6 +364,7 @@ export class EventLog { } this.currentSeq = 0; + this.currentFileSize = 0; if (deleteFiles) { try { @@ -523,8 +481,9 @@ export class EventLog { ); } - // open new file handle + // open new file handle and reset tracked size this.fileHandle = await fs.open(this.config.eventLogPath, "a+"); + this.currentFileSize = 0; } catch (error) { throw new EventLogError( "Failed to rotate event log", diff --git a/packages/taskflow/src/persistence/repository/lakebase/repository.ts b/packages/taskflow/src/persistence/repository/lakebase/repository.ts index 5753ce5e..e704089d 100644 --- a/packages/taskflow/src/persistence/repository/lakebase/repository.ts +++ b/packages/taskflow/src/persistence/repository/lakebase/repository.ts @@ -2,8 +2,15 @@ import fs from "node:fs"; import path from "node:path"; import { fileURLToPath } from "node:url"; import type { IdempotencyKey, TaskId } from "@/core/branded"; +import { RepositoryError } from "@/core/errors"; import type { TaskStatus } from "@/core/types"; import { type EventLogEntry, type EventLogEntryType, Task } from "@/domain"; +import { + noopHooks, + TaskAttributes, + TaskMetrics, + type TaskSystemHooks, +} from "@/observability"; import type { StoredEvent, TaskRepository } from "../types"; import type { LakebaseConnector, @@ -12,13 +19,6 @@ import type { LakebaseTaskRecord, LakebaseTransactionClient, } from "./types"; -import { - noopHooks, - TaskAttributes, - TaskMetrics, - TaskSystemHooks, -} from "@/observability"; -import { RepositoryError } from "@/core/errors"; /** * Lakebase Task Repository diff --git a/packages/taskflow/src/tests/execution/executor.test.ts b/packages/taskflow/src/tests/execution/executor.test.ts new file mode 100644 index 00000000..00e08f05 --- /dev/null +++ b/packages/taskflow/src/tests/execution/executor.test.ts @@ -0,0 +1,592 @@ +import { + afterEach, + beforeEach, + describe, + expect, + it, + type Mock, + vi, +} from "vitest"; +import { idempotencyKey, taskName, userId } from "@/core/branded"; +import { ValidationError } from "@/core/errors"; +import type { TaskEvent, TaskEventInput } from "@/domain/events"; +import type { + TaskDefinition, + TaskHandler, + TaskHandlerContext, +} from "@/domain/handler"; +import { Task } from "@/domain/task"; +import { TaskExecutor } from "@/execution/executor"; +import type { ExecutorConfig } from "@/execution/types"; +import type { EventLog } from "@/persistence/event-log"; + +function createMockEventLog(): EventLog { + return { + appendEvent: vi.fn(), + initialize: vi.fn().mockResolvedValue(undefined), + close: vi.fn().mockResolvedValue(undefined), + readEntriesFromCheckpoint: vi.fn().mockReturnValue([]), + getCheckpoint: vi.fn().mockReturnValue(0), + setCheckpoint: vi.fn(), + getStats: vi.fn().mockReturnValue({}), + } as unknown as EventLog; +} + +interface MockTaskOptions { + name?: string; + input?: unknown; + userId?: string | null; + idempotencyKey?: string; + type?: "background" | "user"; +} + +function createMockTask(options?: MockTaskOptions): Task { + return new Task({ + name: taskName(options?.name ?? "test-task"), + input: options?.input ?? { data: "test" }, + userId: + options?.userId !== undefined + ? userId(options.userId) + : userId("user-123"), + idempotencyKey: idempotencyKey(options?.idempotencyKey ?? "test-key-123"), + type: options?.type ?? "user", + }); +} + +function createMockDefinition( + executeFn: ( + input: unknown, + context: TaskHandlerContext, + ) => Promise | AsyncGenerator, +): TaskDefinition { + return { + name: "test-task", + handler: executeFn as TaskHandler, + defaultOptions: {}, + }; +} + +describe("TaskExecutor", () => { + let executor: TaskExecutor; + let mockEventLog: EventLog; + let onEvent: Mock; + let onComplete: Mock; + let events: TaskEvent[]; + + const fastConfig: Partial = { + heartbeatIntervalMs: 1000, + retry: { + maxAttempts: 3, + initialDelayMs: 100, + maxDelayMs: 1000, + backoffMultiplier: 2, + }, + }; + + beforeEach(() => { + events = []; + mockEventLog = createMockEventLog(); + onEvent = vi.fn((_key: string, event: TaskEvent) => { + events.push(event); + }); + onComplete = vi.fn(); + executor = new TaskExecutor(fastConfig, { + eventLog: mockEventLog, + subscribers: { onEvent, onComplete }, + }); + }); + + afterEach(() => { + executor.abortAll(); + vi.clearAllMocks(); + }); + + describe("execute", () => { + it("should emit error when handler is not provided", async () => { + const task = createMockTask(); + + await executor.execute(task, undefined); + + expect(task.status).toBe("failed"); + expect(events).toHaveLength(1); + expect(events[0].type).toBe("error"); + expect(events[0].message).toContain("Handler for task"); + expect(onComplete).toHaveBeenCalledWith(task); + }); + + it("should emit start event and complete event on success", async () => { + const task = createMockTask(); + const definition = createMockDefinition(async () => { + return { result: "Done" }; + }); + + await executor.execute(task, definition); + + expect(task.status).toBe("completed"); + expect(task.startedAt).toBeDefined(); + expect(task.completedAt).toBeDefined(); + expect(task.durationMs).toBeGreaterThanOrEqual(0); + + const eventTypes = events.map((e) => e.type); + expect(eventTypes).toContain("start"); + expect(eventTypes).toContain("complete"); + expect(onComplete).toHaveBeenCalledWith(task); + }); + + it("should handle async generator handlers", async () => { + const task = createMockTask(); + const definition = createMockDefinition(async function* () { + yield { type: "progress", message: "Step 1" }; + yield { type: "progress", message: "Step 2" }; + return { result: "Done" }; + }); + + await executor.execute(task, definition); + + expect(task.status).toBe("completed"); + const progressEvents = events.filter((e) => e.type === "progress"); + expect(progressEvents).toHaveLength(2); + }); + }); + + describe("retry behavior", () => { + it("should retry on retryable error with exponential backoff", async () => { + const task = createMockTask(); + let attempts = 0; + + const definition = createMockDefinition(async () => { + attempts++; + if (attempts < 3) { + throw new Error("ECONNRESET"); + } + return { result: "Success" }; + }); + + await executor.execute(task, definition); + + expect(attempts).toBe(3); + expect(task.status).toBe("completed"); + + const retryEvents = events.filter((e) => e.type === "retry"); + expect(retryEvents).toHaveLength(2); + expect(retryEvents[0].nextRetryDelayMs).toBe(100); + expect(retryEvents[1].nextRetryDelayMs).toBe(200); + }); + + it("should not retry on permanent error", async () => { + const task = createMockTask(); + let attempts = 0; + + const definition = createMockDefinition(async () => { + attempts++; + throw new ValidationError("Invalid input", "field"); + }); + + await executor.execute(task, definition); + + expect(attempts).toBe(1); + expect(task.status).toBe("failed"); + expect(task.error).toContain("Invalid input"); + + const errorEvents = events.filter((e) => e.type === "error"); + expect(errorEvents).toHaveLength(1); + expect(errorEvents[0].retryable).toBe(false); + }); + + it("should fail after max attempts exhausted", async () => { + const task = createMockTask(); + let attempts = 0; + + const definition = createMockDefinition(async () => { + attempts++; + throw new Error("timeout"); + }); + + await executor.execute(task, definition); + + expect(attempts).toBe(3); + expect(task.status).toBe("failed"); + + const errorEvents = events.filter((e) => e.type === "error"); + expect(errorEvents).toHaveLength(1); + expect(errorEvents[0].attempt).toBe(3); + expect(errorEvents[0].maxAttempts).toBe(3); + }); + + it("should cap delay at maxDelayMs", async () => { + const cappedExecutor = new TaskExecutor( + { + heartbeatIntervalMs: 1000, + retry: { + maxAttempts: 5, + initialDelayMs: 500, + maxDelayMs: 1000, + backoffMultiplier: 2, + }, + }, + { + eventLog: mockEventLog, + subscribers: { onEvent, onComplete }, + }, + ); + + const task = createMockTask(); + let attempts = 0; + + const definition = createMockDefinition(async () => { + attempts++; + if (attempts < 4) { + throw new Error("timeout"); + } + return { result: "Success" }; + }); + + await cappedExecutor.execute(task, definition); + + const retryEvents = events.filter((e) => e.type === "retry"); + expect(retryEvents[0].nextRetryDelayMs).toBe(500); + expect(retryEvents[1].nextRetryDelayMs).toBe(1000); + expect(retryEvents[2].nextRetryDelayMs).toBe(1000); // capped + }); + }); + + describe("abort", () => { + it("should abort a running task", async () => { + const task = createMockTask(); + const definition = createMockDefinition(async (_input, context) => { + await new Promise((resolve, reject) => { + const timeout = setTimeout(resolve, 5000); + context?.signal?.addEventListener("abort", () => { + clearTimeout(timeout); + reject(new Error("Task aborted")); + }); + }); + return { result: "Done" }; + }); + + const executePromise = executor.execute(task, definition); + + await new Promise((resolve) => setTimeout(resolve, 20)); + expect(executor.isExecuting(task.idempotencyKey)).toBe(true); + + executor.abort(task.idempotencyKey); + + await executePromise; + + expect(["cancelled", "failed"]).toContain(task.status); + expect(onComplete).toHaveBeenCalled(); + }); + + it("should do nothing when aborting non-existent task", () => { + expect(() => + executor.abort(idempotencyKey("non-existent")), + ).not.toThrow(); + }); + }); + + describe("abortAll", () => { + it("should abort all running tasks", async () => { + const task1 = createMockTask({ idempotencyKey: "key-1" }); + const task2 = createMockTask({ idempotencyKey: "key-2" }); + + const slowDefinition = createMockDefinition(async (_input, context) => { + await new Promise((resolve, reject) => { + const timeout = setTimeout(resolve, 5000); + context?.signal?.addEventListener("abort", () => { + clearTimeout(timeout); + reject(new Error("Task aborted")); + }); + }); + return { result: "Done" }; + }); + + const promise1 = executor.execute(task1, slowDefinition); + const promise2 = executor.execute(task2, slowDefinition); + + await new Promise((resolve) => setTimeout(resolve, 20)); + expect(executor.isExecuting(idempotencyKey("key-1"))).toBe(true); + expect(executor.isExecuting(idempotencyKey("key-2"))).toBe(true); + + executor.abortAll(); + + await Promise.all([promise1, promise2]); + + expect(executor.isExecuting(idempotencyKey("key-1"))).toBe(false); + expect(executor.isExecuting(idempotencyKey("key-2"))).toBe(false); + }); + }); + + describe("isExecuting", () => { + it("should return true while task is executing", async () => { + const task = createMockTask(); + const definition = createMockDefinition(async () => { + await new Promise((resolve) => setTimeout(resolve, 100)); + return { result: "Done" }; + }); + + const executePromise = executor.execute(task, definition); + + await new Promise((resolve) => setTimeout(resolve, 10)); + expect(executor.isExecuting(task.idempotencyKey)).toBe(true); + + await executePromise; + expect(executor.isExecuting(task.idempotencyKey)).toBe(false); + }); + + it("should return false for non-existent task", () => { + expect(executor.isExecuting(idempotencyKey("non-existent"))).toBe(false); + }); + }); + + describe("heartbeat", () => { + it("should emit heartbeat events periodically", async () => { + const task = createMockTask(); + const definition = createMockDefinition(async () => { + await new Promise((resolve) => setTimeout(resolve, 2500)); + return { result: "Done" }; + }); + + await executor.execute(task, definition); + + const heartbeatEvents = events.filter((e) => e.type === "heartbeat"); + expect(heartbeatEvents.length).toBeGreaterThanOrEqual(2); + expect(heartbeatEvents[0].timestamp).toBeDefined(); + expect(task.lastHeartbeatAt).toBeDefined(); + }, 10000); + + it("should stop heartbeat after task completes", async () => { + const task = createMockTask(); + const definition = createMockDefinition(async () => { + return { result: "Done" }; + }); + + await executor.execute(task, definition); + + const heartbeatCountBefore = events.filter( + (e) => e.type === "heartbeat", + ).length; + + await new Promise((resolve) => setTimeout(resolve, 100)); + + const heartbeatCountAfter = events.filter( + (e) => e.type === "heartbeat", + ).length; + + expect(heartbeatCountAfter).toBe(heartbeatCountBefore); + }); + }); + + describe("error handling", () => { + it("should track error message from Error instance", async () => { + const task = createMockTask(); + const definition = createMockDefinition(async () => { + throw new Error("Something went wrong"); + }); + + await executor.execute(task, definition); + + expect(task.error).toBe("Something went wrong"); + }); + + it("should convert non-Error to string", async () => { + const task = createMockTask(); + const definition = createMockDefinition(async () => { + throw "String error"; + }); + + await executor.execute(task, definition); + + expect(task.error).toBe("String error"); + }); + + it("should include attempt info in error event", async () => { + const task = createMockTask(); + const definition = createMockDefinition(async () => { + throw new ValidationError("Bad input"); + }); + + await executor.execute(task, definition); + + const errorEvent = events.find((e) => e.type === "error"); + expect(errorEvent?.attempt).toBe(1); + expect(errorEvent?.maxAttempts).toBe(3); + }); + }); + + describe("duration tracking", () => { + it("should calculate duration on success", async () => { + const task = createMockTask(); + const definition = createMockDefinition(async () => { + await new Promise((resolve) => setTimeout(resolve, 50)); + return { result: "Done" }; + }); + + await executor.execute(task, definition); + + expect(task.durationMs).toBeGreaterThanOrEqual(50); + const completeEvent = events.find((e) => e.type === "complete"); + expect(completeEvent?.durationMs).toBeGreaterThanOrEqual(50); + }); + + it("should calculate duration on failure", async () => { + const task = createMockTask(); + const definition = createMockDefinition(async () => { + await new Promise((resolve) => setTimeout(resolve, 30)); + throw new ValidationError("Bad"); + }); + + await executor.execute(task, definition); + + expect(task.durationMs).toBeGreaterThanOrEqual(30); + }); + }); + + describe("EventLog persistence", () => { + it("should persist all events to EventLog", async () => { + const task = createMockTask(); + const definition = createMockDefinition(async () => { + return { result: "Done" }; + }); + + await executor.execute(task, definition); + + expect(mockEventLog.appendEvent).toHaveBeenCalled(); + const calls = vi.mocked(mockEventLog.appendEvent).mock.calls; + const eventTypes = calls.map((call) => call[0].type); + + expect(eventTypes).toContain("start"); + expect(eventTypes).toContain("complete"); + }); + + it("should persist events before broadcasting to subscribers (WAL-first)", async () => { + const callOrder: string[] = []; + + const orderedEventLog = { + appendEvent: vi.fn(() => { + callOrder.push("eventLog"); + }), + } as unknown as EventLog; + + const orderedOnEvent = vi.fn(() => { + callOrder.push("subscriber"); + }); + + const orderedExecutor = new TaskExecutor(fastConfig, { + eventLog: orderedEventLog, + subscribers: { onEvent: orderedOnEvent }, + }); + + const task = createMockTask(); + const definition = createMockDefinition(async () => { + return { result: "Done" }; + }); + + await orderedExecutor.execute(task, definition); + + // verify WAL-first pattern + for (let i = 0; i < callOrder.length - 1; i += 2) { + expect(callOrder[i]).toBe("eventLog"); + expect(callOrder[i + 1]).toBe("subscriber"); + } + }); + + it("should include correct task metadata in persisted events", async () => { + const task = createMockTask({ + name: "my-task", + idempotencyKey: "idem-456", + userId: "user-789", + type: "background", + }); + const definition = createMockDefinition(async () => { + return { result: "Done" }; + }); + definition.name = "my-task"; + + await executor.execute(task, definition); + + const calls = vi.mocked(mockEventLog.appendEvent).mock.calls; + const startEvent = calls.find((call) => call[0].type === "start")?.[0]; + + expect(startEvent).toBeDefined(); + expect(startEvent?.taskId).toBe(task.id); + expect(startEvent?.name).toBe("my-task"); + expect(startEvent?.idempotencyKey).toBe("idem-456"); + expect(startEvent?.userId).toBe("user-789"); + expect(startEvent?.taskType).toBe("background"); + }); + + it("should persist error events on failure", async () => { + const task = createMockTask(); + const definition = createMockDefinition(async () => { + throw new ValidationError("Something went wrong"); + }); + + await executor.execute(task, definition); + + const calls = vi.mocked(mockEventLog.appendEvent).mock.calls; + const eventTypes = calls.map((call) => call[0].type); + + expect(eventTypes).toContain("start"); + expect(eventTypes).toContain("error"); + expect(eventTypes).toContain("complete"); + + const errorEvent = calls.find((call) => call[0].type === "error")?.[0]; + expect(errorEvent?.message).toContain("Something went wrong"); + }); + + it("should persist retry events", async () => { + const task = createMockTask(); + let attempts = 0; + + const definition = createMockDefinition(async () => { + attempts++; + if (attempts < 2) { + throw new Error("timeout"); + } + return { result: "Success" }; + }); + + await executor.execute(task, definition); + + const calls = vi.mocked(mockEventLog.appendEvent).mock.calls; + const eventTypes = calls.map((call) => call[0].type); + + expect(eventTypes).toContain("retry"); + }); + + it("should persist heartbeat events", async () => { + const task = createMockTask(); + const definition = createMockDefinition(async () => { + await new Promise((resolve) => setTimeout(resolve, 1500)); + return { result: "Done" }; + }); + + await executor.execute(task, definition); + + const calls = vi.mocked(mockEventLog.appendEvent).mock.calls; + const heartbeatEvents = calls.filter( + (call) => call[0].type === "heartbeat", + ); + + expect(heartbeatEvents.length).toBeGreaterThanOrEqual(1); + }, 10000); + }); + + describe("getStats", () => { + it("should return correct statistics", async () => { + const task = createMockTask(); + const definition = createMockDefinition(async () => { + return { result: "Done" }; + }); + + await executor.execute(task, definition); + + const stats = executor.getStats(); + + expect(stats.outcomes.completed).toBe(1); + expect(stats.outcomes.total).toBe(1); + expect(stats.timing.lastStartAt).toBeDefined(); + expect(stats.timing.lastCompleteAt).toBeDefined(); + }); + }); +}); diff --git a/packages/taskflow/src/tests/execution/recovery.test.ts b/packages/taskflow/src/tests/execution/recovery.test.ts new file mode 100644 index 00000000..fb6e43ef --- /dev/null +++ b/packages/taskflow/src/tests/execution/recovery.test.ts @@ -0,0 +1,525 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { idempotencyKey, taskName, userId } from "@/core/branded"; +import type { StreamManager } from "@/delivery/stream"; +import type { TaskEvent } from "@/domain/events"; +import type { TaskDefinition } from "@/domain/handler"; +import { Task } from "@/domain/task"; +import type { TaskExecutor } from "@/execution/executor"; +import { TaskRecovery } from "@/execution/recovery"; +import type { RecoveryConfig } from "@/execution/types"; +import type { Guard } from "@/guard/guard"; +import type { + StoredEvent, + TaskRepository, +} from "@/persistence/repository/types"; + +function createMockTask(options?: { + id?: string; + name?: string; + userId?: string; + status?: "created" | "running" | "completed" | "failed" | "cancelled"; + type?: "user" | "background"; + lastHeartbeatAt?: Date; +}): Task { + const task = new Task({ + name: taskName(options?.name ?? "test-task"), + input: { data: "test" }, + userId: userId(options?.userId ?? "user-123"), + type: options?.type ?? "user", + }); + + // override status and other fields if provided + if (options?.status) { + (task as any)._status = options.status; + } + if (options?.lastHeartbeatAt) { + (task as any)._lastHeartbeatAt = options.lastHeartbeatAt; + } + + return task; +} + +function createMockRepository(): TaskRepository { + return { + type: "sqlite", + isInitialized: true, + initialize: vi.fn().mockResolvedValue(undefined), + executeBatch: vi.fn().mockResolvedValue(undefined), + findById: vi.fn().mockResolvedValue(null), + findByIdempotencyKey: vi.fn().mockResolvedValue(null), + findStaleTasks: vi.fn().mockResolvedValue([]), + getEvents: vi.fn().mockResolvedValue([]), + healthCheck: vi.fn().mockResolvedValue(true), + close: vi.fn().mockResolvedValue(undefined), + }; +} + +function createMockGuard(): Guard { + return { + acquireRecoverySlot: vi.fn(), + releaseRecoverySlot: vi.fn(), + } as unknown as Guard; +} + +function createMockStreamManager(): StreamManager { + return { + push: vi.fn(), + getOrCreate: vi.fn(), + close: vi.fn(), + } as unknown as StreamManager; +} + +function createMockExecutor(): TaskExecutor { + return { + execute: vi.fn().mockResolvedValue(undefined), + } as unknown as TaskExecutor; +} + +describe("TaskRecovery", () => { + let recovery: TaskRecovery; + let mockRepository: TaskRepository; + let mockGuard: Guard; + let mockStreamManager: StreamManager; + let mockExecutor: TaskExecutor; + let definitions: Map; + + const fastConfig: Partial = { + enabled: true, + backgroundPollIntervalMs: 100, + staleThresholdMs: 1000, + batchSize: 5, + completionTimeoutMs: 5000, + heartbeatIntervalMs: 500, + }; + + beforeEach(() => { + definitions = new Map(); + mockRepository = createMockRepository(); + mockGuard = createMockGuard(); + mockStreamManager = createMockStreamManager(); + mockExecutor = createMockExecutor(); + + recovery = new TaskRecovery(fastConfig, { + guard: mockGuard, + repository: mockRepository, + streamManager: mockStreamManager, + executor: mockExecutor, + getDefinition: (name) => definitions.get(name), + }); + }); + + afterEach(() => { + recovery.stopBackgroundRecovery(); + vi.clearAllMocks(); + }); + + describe("configuration", () => { + it("should use default config when not provided", () => { + const defaultRecovery = new TaskRecovery(undefined, { + guard: mockGuard, + repository: mockRepository, + streamManager: mockStreamManager, + executor: mockExecutor, + getDefinition: () => undefined, + }); + + const stats = defaultRecovery.getStats(); + expect(stats.config.enabled).toBe(true); + expect(stats.config.pollIntervalMs).toBe(60000); + }); + + it("should merge custom config with defaults", () => { + const stats = recovery.getStats(); + expect(stats.config.enabled).toBe(true); + expect(stats.config.pollIntervalMs).toBe(100); + expect(stats.config.staleThresholdMs).toBe(1000); + }); + }); + + describe("startBackgroundRecovery", () => { + it("should start background recovery interval", () => { + recovery.startBackgroundRecovery(); + expect(() => recovery.startBackgroundRecovery()).not.toThrow(); + }); + + it("should not start if already running", () => { + recovery.startBackgroundRecovery(); + recovery.startBackgroundRecovery(); + }); + + it("should not start if disabled", () => { + const disabledRecovery = new TaskRecovery( + { ...fastConfig, enabled: false }, + { + guard: mockGuard, + repository: mockRepository, + streamManager: mockStreamManager, + executor: mockExecutor, + getDefinition: () => undefined, + }, + ); + + disabledRecovery.startBackgroundRecovery(); + const stats = disabledRecovery.getStats(); + expect(stats.config.enabled).toBe(false); + }); + }); + + describe("stopBackgroundRecovery", () => { + it("should stop and clear interval", () => { + recovery.startBackgroundRecovery(); + recovery.stopBackgroundRecovery(); + }); + + it("should do nothing if not running", () => { + recovery.stopBackgroundRecovery(); + }); + }); + + describe("recoverBackgroundTasks", () => { + it("should find and recover stale background tasks", async () => { + const staleTask = createMockTask({ + type: "background", + status: "running", + lastHeartbeatAt: new Date(Date.now() - 10000), + }); + + vi.mocked(mockRepository.findStaleTasks).mockResolvedValue([staleTask]); + + definitions.set("test-task", { + name: "test-task", + handler: async function* () { + yield { type: "complete", message: "Recovered" }; + }, + defaultOptions: {}, + }); + + await recovery.recoverBackgroundTasks(); + + expect(mockGuard.acquireRecoverySlot).toHaveBeenCalled(); + expect(mockGuard.releaseRecoverySlot).toHaveBeenCalled(); + + const stats = recovery.getStats(); + expect(stats.outcomes.background).toBe(1); + }); + + it("should only recover background tasks, not user tasks", async () => { + const userTask = createMockTask({ + type: "user", + status: "running", + lastHeartbeatAt: new Date(Date.now() - 10000), + }); + + vi.mocked(mockRepository.findStaleTasks).mockResolvedValue([userTask]); + + await recovery.recoverBackgroundTasks(); + + expect(mockGuard.acquireRecoverySlot).not.toHaveBeenCalled(); + }); + + it("should respect batch size limit", async () => { + const tasks = Array.from({ length: 10 }, (_, i) => + createMockTask({ + type: "background", + status: "running", + name: `task-${i}`, + }), + ); + + vi.mocked(mockRepository.findStaleTasks).mockResolvedValue(tasks); + + // register definitions for all tasks + for (let i = 0; i < 10; i++) { + definitions.set(`task-${i}`, { + name: `task-${i}`, + handler: async function* () { + yield { type: "complete", message: "Done" }; + }, + defaultOptions: {}, + }); + } + + await recovery.recoverBackgroundTasks(); + + // should only call acquireRecoverySlot 5 times + expect(mockGuard.acquireRecoverySlot).toHaveBeenCalledTimes(5); + }); + + it("should skip if disabled", async () => { + const disabledRecovery = new TaskRecovery( + { ...fastConfig, enabled: false }, + { + guard: mockGuard, + repository: mockRepository, + streamManager: mockStreamManager, + executor: mockExecutor, + getDefinition: () => undefined, + }, + ); + + await disabledRecovery.recoverBackgroundTasks(); + + expect(mockRepository.findStaleTasks).not.toHaveBeenCalled(); + }); + + it("should increment tasksFailed on error", async () => { + const staleTask = createMockTask({ + type: "background", + status: "running", + }); + + vi.mocked(mockRepository.findStaleTasks).mockResolvedValue([staleTask]); + + await recovery.recoverBackgroundTasks(); + + const stats = recovery.getStats(); + expect(stats.outcomes.failed).toBe(1); + }); + + it("should update lastBackgroundScanAt", async () => { + vi.mocked(mockRepository.findStaleTasks).mockResolvedValue([]); + + await recovery.recoverBackgroundTasks(); + + const stats = recovery.getStats(); + expect(stats.background.lastScanAt).toBeDefined(); + }); + }); + + describe("recoverStaleTask", () => { + it("should use recover handler when available (smart recovery)", async () => { + const staleTask = createMockTask({ status: "running" }); + let recoveryHandlerCalled = false; + + definitions.set("test-task", { + name: "test-task", + handler: async function* () { + yield { type: "complete", message: "Execute" }; + }, + recover: async function* (_input, ctx) { + recoveryHandlerCalled = true; + expect(ctx.previousEvents).toBeDefined(); + expect(ctx.recoveryReason).toBe("stale"); + yield { type: "complete", message: "Recovered" }; + }, + defaultOptions: {}, + }); + + vi.mocked(mockRepository.getEvents).mockResolvedValue([]); + + const events: TaskEvent[] = []; + for await (const event of recovery.recoverStaleTask(staleTask)) { + events.push(event); + } + + expect(recoveryHandlerCalled).toBe(true); + const stats = recovery.getStats(); + expect(stats.outcomes.byMethod.smartRecovery).toBe(1); + }); + + it("should use execute handler when no recover handler (re-execute)", async () => { + const staleTask = createMockTask({ status: "running" }); + let executeHandlerCalled = false; + + definitions.set("test-task", { + name: "test-task", + handler: async function* () { + executeHandlerCalled = true; + yield { type: "complete", message: "Re-executed" }; + }, + defaultOptions: {}, + }); + + vi.mocked(mockRepository.getEvents).mockResolvedValue([]); + + const events: TaskEvent[] = []; + for await (const event of recovery.recoverStaleTask(staleTask)) { + events.push(event); + } + + expect(executeHandlerCalled).toBe(true); + const stats = recovery.getStats(); + expect(stats.outcomes.byMethod.reexecution).toBe(1); + }); + + it("should yield previous events from DB before recovery", async () => { + const staleTask = createMockTask({ status: "running" }); + + const storedEvents: StoredEvent[] = [ + { + id: "evt-1", + taskId: staleTask.id, + seq: 1, + type: "TASK_PROGRESS", + timestamp: new Date(), + payload: { message: "Previous progress" }, + }, + ]; + + definitions.set("test-task", { + name: "test-task", + handler: async function* () { + yield { type: "complete", message: "Done" }; + }, + defaultOptions: {}, + }); + + vi.mocked(mockRepository.getEvents).mockResolvedValue(storedEvents); + + const events: TaskEvent[] = []; + for await (const event of recovery.recoverStaleTask(staleTask)) { + events.push(event); + } + + expect(events.length).toBe(2); + expect(events[0].type).toBe("progress"); + }); + + it("should throw if handler not found", async () => { + const staleTask = createMockTask({ + status: "running", + name: "unknown-task", + }); + + await expect(async () => { + for await (const _ of recovery.recoverStaleTask(staleTask)) { + // consume + } + }).rejects.toThrow("Handler for task unknown-task not found"); + }); + }); + + describe("handleDatabaseCheck", () => { + it("should return null if repository not initialized", async () => { + (mockRepository as any).isInitialized = false; + + const generator = recovery.handleDatabaseCheck( + idempotencyKey("test-key"), + "user-123", + ); + + const result = await generator.next(); + expect(result.done).toBe(true); + expect(result.value).toBeNull(); + }); + + it("should return null if task not found", async () => { + vi.mocked(mockRepository.findByIdempotencyKey).mockResolvedValue(null); + + const generator = recovery.handleDatabaseCheck( + idempotencyKey("test-key"), + "user-123", + ); + + const result = await generator.next(); + expect(result.done).toBe(true); + expect(result.value).toBeNull(); + }); + + it("should return null if userId does not match (security)", async () => { + const task = createMockTask({ userId: "other-user" }); + vi.mocked(mockRepository.findByIdempotencyKey).mockResolvedValue(task); + + const generator = recovery.handleDatabaseCheck( + idempotencyKey("test-key"), + "user-123", + ); + + const result = await generator.next(); + expect(result.done).toBe(true); + expect(result.value).toBeNull(); + }); + + it("should stream from DB if task completed", async () => { + const task = createMockTask({ userId: "user-123", status: "completed" }); + vi.mocked(mockRepository.findByIdempotencyKey).mockResolvedValue(task); + vi.mocked(mockRepository.getEvents).mockResolvedValue([ + { + id: "evt-1", + taskId: task.id, + seq: 1, + type: "TASK_COMPLETE", + timestamp: new Date(), + payload: { message: "Done" }, + }, + ]); + + const generator = recovery.handleDatabaseCheck( + task.idempotencyKey, + "user-123", + ); + + const events: TaskEvent[] = []; + let result = await generator.next(); + + while (!result.done) { + events.push(result.value); + result = await generator.next(); + } + + expect(events.length).toBe(1); + expect(result.value).toBe(task); + }); + + it("should stream from DB if task failed", async () => { + const task = createMockTask({ userId: "user-123", status: "failed" }); + vi.mocked(mockRepository.findByIdempotencyKey).mockResolvedValue(task); + vi.mocked(mockRepository.getEvents).mockResolvedValue([]); + + const generator = recovery.handleDatabaseCheck( + task.idempotencyKey, + "user-123", + ); + + let result = await generator.next(); + while (!result.done) { + result = await generator.next(); + } + + expect(result.value).toBe(task); + }); + }); + + describe("isTaskAlive", () => { + it("should return true if heartbeat is recent", () => { + const task = createMockTask({ + lastHeartbeatAt: new Date(Date.now() - 100), // 100ms ago + }); + + const isAlive = recovery["isTaskAlive"](task); + expect(isAlive).toBe(true); + }); + + it("should return false if no heartbeat", () => { + const task = createMockTask(); + (task as any)._lastHeartbeatAt = undefined; + + const isAlive = recovery["isTaskAlive"](task); + expect(isAlive).toBe(false); + }); + + it("should return false if heartbeat is old", () => { + const task = createMockTask({ + lastHeartbeatAt: new Date(Date.now() - 10000), // 10s ago, threshold is 500ms + }); + + const isAlive = recovery["isTaskAlive"](task); + expect(isAlive).toBe(false); + }); + }); + + describe("getStats", () => { + it("should return recovery statistics", () => { + const stats = recovery.getStats(); + + expect(stats.config.enabled).toBe(true); + expect(stats.config.pollIntervalMs).toBe(100); + expect(stats.background.isScanning).toBe(false); + expect(stats.outcomes.background).toBe(0); + expect(stats.outcomes.user).toBe(0); + expect(stats.outcomes.failed).toBe(0); + expect(stats.outcomes.byMethod.smartRecovery).toBe(0); + expect(stats.outcomes.byMethod.reexecution).toBe(0); + }); + }); +}); diff --git a/packages/taskflow/src/tests/execution/system.test.ts b/packages/taskflow/src/tests/execution/system.test.ts new file mode 100644 index 00000000..d2547460 --- /dev/null +++ b/packages/taskflow/src/tests/execution/system.test.ts @@ -0,0 +1,448 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { idempotencyKey, userId } from "@/core/branded"; +import type { TaskEvent } from "@/domain/events"; +import type { TaskDefinition, TaskHandlerContext } from "@/domain/handler"; +import type { Task } from "@/domain/task"; +import { TaskSystem, type TaskSystemConfig } from "@/execution/system"; +import type { TaskRunParams } from "@/execution/types"; + +// mock the flush class to avoid forking real processes in tests +vi.mock("@/flush/flush-manager", () => ({ + Flush: vi.fn().mockImplementation(() => ({ + initialize: vi.fn().mockResolvedValue(undefined), + shutdown: vi.fn().mockResolvedValue(undefined), + isAlive: vi.fn().mockReturnValue(false), + getStats: vi.fn().mockReturnValue({ + process: { + isAlive: false, + pid: null, + restartCount: 0, + isShuttingDown: false, + }, + worker: null, + }), + getStatus: vi.fn().mockReturnValue({ + isAlive: false, + isShuttingDown: false, + restartCount: 0, + pid: null, + lastStats: null, + }), + })), +})); + +// mock the repository +vi.mock("@/persistence/repository", () => ({ + createRepository: vi.fn().mockImplementation(() => ({ + type: "sqlite", + isInitialized: false, + initialize: vi.fn().mockResolvedValue(undefined), + executeBatch: vi.fn().mockResolvedValue(undefined), + findById: vi.fn().mockResolvedValue(null), + findByIdempotencyKey: vi.fn().mockResolvedValue(null), + findStaleTasks: vi.fn().mockResolvedValue([]), + getEvents: vi.fn().mockResolvedValue([]), + healthCheck: vi.fn().mockResolvedValue(true), + close: vi.fn().mockResolvedValue(undefined), + })), +})); + +async function collectStreamEvents( + task: Task & { + stream?: (options?: { + lastSeq?: number; + signal?: AbortSignal; + }) => AsyncGenerator; + }, + timeoutMs: number = 5_000, +): Promise { + if (!task.stream) { + throw new Error("Task stream is undefined"); + } + + const events: TaskEvent[] = []; + const collectPromise = (async () => { + for await (const event of task.stream!()) { + events.push(event); + } + return events; + })(); + + const timeoutPromise = new Promise((_, reject) => { + setTimeout(() => { + reject( + new Error(`Timed out waiting for task stream after ${timeoutMs}ms`), + ); + }, timeoutMs); + }); + + return Promise.race([collectPromise, timeoutPromise]); +} + +const basicTaskDefinition: TaskDefinition = { + name: "basic-task", + handler: async function* (_input: unknown, _context: TaskHandlerContext) { + await new Promise((resolve) => setTimeout(resolve, 100)); + yield { type: "progress", message: "Working..." }; + yield { type: "complete", result: "Done" }; + }, + defaultOptions: {}, +}; + +const streamingTaskDefinition: TaskDefinition = { + name: "streaming-task", + handler: async function* (_input: unknown, _context: TaskHandlerContext) { + yield { type: "progress", message: "Step 1" }; + await new Promise((resolve) => setTimeout(resolve, 50)); + yield { type: "progress", message: "Step 2" }; + await new Promise((resolve) => setTimeout(resolve, 50)); + yield { type: "progress", message: "Step 3" }; + yield { type: "complete", result: "Success" }; + }, + defaultOptions: {}, +}; + +describe("TaskSystem", () => { + let taskSystem: TaskSystem; + + const testConfig: TaskSystemConfig = { + eventLog: { + eventLogPath: "./test-event-log", + }, + executor: { + heartbeatIntervalMs: 1000, + }, + shutdown: { + gracePeriodMs: 5000, + pollIntervalMs: 50, + }, + }; + + beforeEach(async () => { + taskSystem = new TaskSystem(testConfig); + await taskSystem.initialize(); + }); + + afterEach(async () => { + await taskSystem.shutdown({ deleteFiles: true, force: true }); + }); + + describe("registerTask", () => { + it("should register a task and return template", () => { + const template = taskSystem.registerTask(basicTaskDefinition); + + expect(template.name).toBe("basic-task"); + expect(typeof template.run).toBe("function"); + expect(typeof template.recover).toBe("function"); + }); + + it("should throw when registering duplicate task name", () => { + taskSystem.registerTask(basicTaskDefinition); + + expect(() => taskSystem.registerTask(basicTaskDefinition)).toThrow( + "Task basic-task already registered", + ); + }); + }); + + describe("getTemplate", () => { + it("should return registered template", () => { + taskSystem.registerTask(basicTaskDefinition); + + const template = taskSystem.getTemplate("basic-task"); + expect(template).not.toBeNull(); + expect(template?.name).toBe("basic-task"); + }); + + it("should return null for unregistered template", () => { + const template = taskSystem.getTemplate("unknown"); + expect(template).toBeNull(); + }); + }); + + describe("run", () => { + it("should run a task", async () => { + const template = taskSystem.registerTask(basicTaskDefinition); + + const params: TaskRunParams = { + input: { data: "test" }, + userId: userId("user-123"), + }; + + const task = await template.run(params); + + expect(task).toBeDefined(); + expect(task.name).toBe("basic-task"); + expect(task.userId).toBe("user-123"); + }); + + it("should stream task events from execution", async () => { + const template = taskSystem.registerTask(streamingTaskDefinition); + + const task = await template.run({ + input: {}, + userId: userId("user-123"), + }); + + const events = await collectStreamEvents(task as any); + + expect(events.length).toBeGreaterThanOrEqual(4); // created + progress*3 + complete + expect(events[0].type).toBe("created"); + expect(events.some((e) => e.type === "progress")).toBe(true); + expect(events[events.length - 1].type).toBe("complete"); + }); + }); + + describe("recover", () => { + it("should return null when task not found", async () => { + const template = taskSystem.registerTask(basicTaskDefinition); + + const result = await template.recover({ + idempotencyKey: idempotencyKey("non-existent"), + userId: userId("user-123"), + }); + + expect(result).toBeNull(); + }); + }); + + describe("deduplication", () => { + it("should return existing task when running duplicate", async () => { + const template = taskSystem.registerTask(streamingTaskDefinition); + + const params: TaskRunParams = { + input: { data: "test" }, + userId: userId("user-123"), + }; + + const task1 = await template.run(params); + + // wait a bit for task to start + await new Promise((resolve) => setTimeout(resolve, 20)); + + const task2 = await template.run(params); + + expect(task2.id).toBe(task1.id); + }); + + it("should connect to existing stream when duplicate", async () => { + const template = taskSystem.registerTask(streamingTaskDefinition); + + const params: TaskRunParams = { + input: {}, + userId: userId("user-123"), + }; + + const task1 = await template.run(params); + await new Promise((resolve) => setTimeout(resolve, 20)); + + const task2 = await template.run(params); + expect(task2.id).toBe(task1.id); + + const events = await collectStreamEvents(task2 as any); + expect(events.length).toBeGreaterThanOrEqual(4); + }); + }); + + describe("custom events", () => { + it("should handle custom events from handler", async () => { + const customEventTask: TaskDefinition = { + name: "custom-event-task", + handler: async function* () { + yield { + type: "custom", + eventName: "query-submitted", + payload: { statementId: "stmt-123" }, + }; + await new Promise((resolve) => setTimeout(resolve, 50)); + yield { + type: "custom", + eventName: "rows-processed", + payload: { count: 5000 }, + }; + yield { type: "complete", result: { totalRows: 10000 } }; + }, + defaultOptions: {}, + }; + + const template = taskSystem.registerTask(customEventTask); + const task = await template.run({ + input: {}, + userId: userId("user-123"), + }); + + const events = await collectStreamEvents(task as any); + + const customEvents = events.filter((e) => e.type === "custom"); + expect(customEvents.length).toBe(2); + expect(customEvents[0].eventName).toBe("query-submitted"); + expect(customEvents[1].eventName).toBe("rows-processed"); + }); + }); + + describe("shutdown", () => { + it("should wait for running tasks during graceful shutdown", async () => { + let taskCompleted = false; + + const slowTask: TaskDefinition = { + name: "slow-task", + handler: async function* () { + await new Promise((resolve) => setTimeout(resolve, 200)); + taskCompleted = true; + yield { type: "complete", result: "done" }; + }, + defaultOptions: {}, + }; + + const template = taskSystem.registerTask(slowTask); + await template.run({ input: {}, userId: userId("user-123") }); + + // wait for task to start (executor tick interval is 100ms, so wait longer) + await new Promise((resolve) => setTimeout(resolve, 150)); + + await taskSystem.shutdown({ deleteFiles: true }); + + expect(taskCompleted).toBe(true); + }); + + it("should force abort after grace period expires", async () => { + const shortGraceSystem = new TaskSystem({ + ...testConfig, + shutdown: { gracePeriodMs: 100, pollIntervalMs: 20 }, + }); + await shortGraceSystem.initialize(); + + let taskCompleted = false; + + const verySlowTask: TaskDefinition = { + name: "very-slow-task", + handler: async function* (_input, context) { + for (let i = 0; i < 50; i++) { + if (context?.signal?.aborted) { + yield { type: "cancelled", message: "Aborted" }; + return; + } + await new Promise((resolve) => setTimeout(resolve, 100)); + } + taskCompleted = true; + yield { type: "complete", result: "done" }; + }, + defaultOptions: {}, + }; + + const template = shortGraceSystem.registerTask(verySlowTask); + await template.run({ input: {}, userId: userId("user-123") }); + + await new Promise((resolve) => setTimeout(resolve, 50)); + + const startTime = Date.now(); + await shortGraceSystem.shutdown({ deleteFiles: true }); + const elapsed = Date.now() - startTime; + + expect(elapsed).toBeLessThan(1000); + expect(taskCompleted).toBe(false); + }); + + it("should immediately abort with force: true", async () => { + let taskCompleted = false; + + const longTask: TaskDefinition = { + name: "long-task", + handler: async function* () { + await new Promise((resolve) => setTimeout(resolve, 5000)); + taskCompleted = true; + yield { type: "complete", result: "done" }; + }, + defaultOptions: {}, + }; + + const template = taskSystem.registerTask(longTask); + await template.run({ input: {}, userId: userId("user-123") }); + + await new Promise((resolve) => setTimeout(resolve, 50)); + + const startTime = Date.now(); + await taskSystem.shutdown({ force: true, deleteFiles: true }); + const elapsed = Date.now() - startTime; + + expect(elapsed).toBeLessThan(500); + expect(taskCompleted).toBe(false); + }); + + it("should reject new tasks during shutdown", async () => { + const slowTask: TaskDefinition = { + name: "blocking-task", + handler: async function* () { + await new Promise((resolve) => setTimeout(resolve, 1000)); + yield { type: "complete", result: "done" }; + }, + defaultOptions: {}, + }; + + const template = taskSystem.registerTask(slowTask); + await template.run({ input: {}, userId: userId("user-123") }); + + await new Promise((resolve) => setTimeout(resolve, 50)); + + const shutdownPromise = taskSystem.shutdown({ deleteFiles: true }); + + await new Promise((resolve) => setTimeout(resolve, 20)); + + await expect( + template.run({ input: { new: true }, userId: userId("user-2") }), + ).rejects.toThrow("shutting down"); + + await shutdownPromise; + }); + + it("should be idempotent - multiple shutdown calls are safe", async () => { + await taskSystem.shutdown({ deleteFiles: true }); + await expect( + taskSystem.shutdown({ deleteFiles: true }), + ).resolves.toBeUndefined(); + }); + }); + + describe("shuttingDown", () => { + it("should expose shuttingDown state", async () => { + expect(taskSystem.shuttingDown).toBe(false); + + const slowTask: TaskDefinition = { + name: "state-task", + handler: async function* () { + await new Promise((resolve) => setTimeout(resolve, 300)); + yield { type: "complete", result: "done" }; + }, + defaultOptions: {}, + }; + + const template = taskSystem.registerTask(slowTask); + await template.run({ input: {}, userId: userId("user-123") }); + + await new Promise((resolve) => setTimeout(resolve, 50)); + + const shutdownPromise = taskSystem.shutdown({ deleteFiles: true }); + await new Promise((resolve) => setTimeout(resolve, 20)); + + expect(taskSystem.shuttingDown).toBe(true); + + await shutdownPromise; + }); + }); + + describe("getStats", () => { + it("should return comprehensive statistics", async () => { + const template = taskSystem.registerTask(basicTaskDefinition); + await template.run({ input: {}, userId: userId("user-123") }); + + const stats = taskSystem.getStats(); + + expect(stats.system.status).toBe("running"); + expect(stats.system.startedAt).toBeDefined(); + expect(stats.tasks).toBeDefined(); + expect(stats.scheduler).toBeDefined(); + expect(stats.registry.templates).toBe(1); + expect(stats.components).toBeDefined(); + }); + }); +}); diff --git a/packages/taskflow/src/tests/flush/flush.test.ts b/packages/taskflow/src/tests/flush/flush.test.ts new file mode 100644 index 00000000..b3468f7f --- /dev/null +++ b/packages/taskflow/src/tests/flush/flush.test.ts @@ -0,0 +1,960 @@ +import { EventEmitter } from "node:events"; +import fs from "node:fs"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import type { EventLogEntry } from "@/domain"; +import { Flush } from "@/flush/flush-manager"; +import { FlushWorker } from "@/flush/flush-worker"; +import type { IPCMessage } from "@/flush/types"; +import { EventLog } from "@/persistence/event-log"; +import { createRepository } from "@/persistence/repository"; +import type { TaskRepository } from "@/persistence/repository/types"; + +// mock child_process +vi.mock("node:child_process", () => ({ + fork: vi.fn(), +})); + +import { fork } from "node:child_process"; + +// helper to create a mock child process +function createMockChildProcess(pid: number = 12345) { + const emitter = new EventEmitter(); + return { + pid, + connected: true, + send: vi.fn(), + kill: vi.fn(), + on: emitter.on.bind(emitter), + once: emitter.once.bind(emitter), + off: emitter.off.bind(emitter), + emit: emitter.emit.bind(emitter), + stdout: new EventEmitter(), + stderr: new EventEmitter(), + }; +} + +describe("Flush", () => { + let flush: Flush; + let mockChild: ReturnType; + + const createFlush = (overrides = {}) => + new Flush({ + repository: { type: "sqlite", database: ":memory:" }, + flushIntervalMs: 1000, + eventLogPath: "./event-log-test", + maxBatchSize: 1000, + maxFlushRetries: 3, + retryBaseDelayMs: 100, + circuitBreakerDurationMs: 1000, + circuitBreakerThreshold: 3, + healthCheckIntervalMs: 1000, + maxRestarts: 3, + restartDelayMs: 100, + ...overrides, + }); + + beforeEach(() => { + vi.clearAllMocks(); + mockChild = createMockChildProcess(); + vi.mocked(fork).mockReturnValue(mockChild as any); + }); + + afterEach(async () => { + if (flush) { + // @ts-expect-error - accessing private property + flush.isShuttingDown = true; + // @ts-expect-error - accessing private property + flush.stopHealthCheck(); + } + }); + + describe("initialize", () => { + it("should spawn a fork process", async () => { + flush = createFlush(); + + const initPromise = flush.initialize(); + + setTimeout(() => { + mockChild.emit("message", { type: "ready" } as IPCMessage); + }, 10); + + await initPromise; + + expect(fork).toHaveBeenCalledTimes(1); + expect(fork).toHaveBeenCalledWith( + expect.stringContaining("flush-worker-entry.js"), + [], + expect.objectContaining({ + env: expect.objectContaining({ + FLUSH_CONFIG: expect.any(String), + }), + }), + ); + }); + + it("should start health check after spawn", async () => { + flush = createFlush(); + + const initPromise = flush.initialize(); + + setTimeout(() => { + mockChild.emit("message", { type: "ready" } as IPCMessage); + }, 10); + + await initPromise; + + // @ts-expect-error - accessing private property + expect(flush.healthCheckTimer).not.toBeNull(); + }); + + it("should reject if worker fails to start within timeout", async () => { + flush = createFlush(); + + mockChild.pid = undefined as any; + + const initPromise = flush.initialize(); + + await expect(initPromise).rejects.toThrow("Worker failed to start"); + }, 10000); + }); + + describe("shutdown", () => { + it("should send shutdown command to worker", async () => { + flush = createFlush(); + + const initPromise = flush.initialize(); + setTimeout(() => { + mockChild.emit("message", { type: "ready" } as IPCMessage); + }, 10); + await initPromise; + + const originalKill = process.kill; + process.kill = vi.fn(); + + const shutdownPromise = flush.shutdown(1000); + + setTimeout(() => { + mockChild.emit("exit", 0, null); + }, 10); + + await shutdownPromise; + + expect(mockChild.send).toHaveBeenCalledWith({ + type: "shutdown", + payload: { timeoutMs: 1000 }, + }); + + process.kill = originalKill; + }); + + it("should stop health check on shutdown", async () => { + flush = createFlush(); + + const initPromise = flush.initialize(); + setTimeout(() => { + mockChild.emit("message", { type: "ready" } as IPCMessage); + }, 10); + await initPromise; + + const originalKill = process.kill; + process.kill = vi.fn(); + + const shutdownPromise = flush.shutdown(1000); + setTimeout(() => { + mockChild.emit("exit", 0, null); + }, 10); + await shutdownPromise; + + // @ts-expect-error - accessing private property + expect(flush.healthCheckTimer).toBeNull(); + + process.kill = originalKill; + }); + + it("should force kill if worker does not exit within timeout", async () => { + flush = createFlush(); + + const initPromise = flush.initialize(); + setTimeout(() => { + mockChild.emit("message", { type: "ready" } as IPCMessage); + }, 10); + await initPromise; + + const originalKill = process.kill; + process.kill = vi.fn(); + + await flush.shutdown(50); + + expect(mockChild.kill).toHaveBeenCalledWith("SIGKILL"); + + process.kill = originalKill; + }); + + it("should return immediately if worker is not alive", async () => { + flush = createFlush(); + + const result = await flush.shutdown(1000); + + expect(result).toBeUndefined(); + expect(mockChild.send).not.toHaveBeenCalled(); + }); + }); + + describe("isAlive", () => { + it("should return false if worker is null", () => { + flush = createFlush(); + expect(flush.isAlive()).toBe(false); + }); + + it("should return false if worker has no pid", async () => { + flush = createFlush(); + mockChild.pid = undefined as any; + + const initPromise = flush.initialize(); + setTimeout(() => { + mockChild.emit("message", { type: "ready" } as IPCMessage); + }, 10); + await initPromise; + + expect(flush.isAlive()).toBe(false); + }); + + it("should return true if process exists", async () => { + flush = createFlush(); + + const initPromise = flush.initialize(); + setTimeout(() => { + mockChild.emit("message", { type: "ready" } as IPCMessage); + }, 10); + await initPromise; + + const originalKill = process.kill; + process.kill = vi.fn(); + + expect(flush.isAlive()).toBe(true); + + process.kill = originalKill; + }); + + it("should return false if process does not exist", async () => { + flush = createFlush(); + + const initPromise = flush.initialize(); + setTimeout(() => { + mockChild.emit("message", { type: "ready" } as IPCMessage); + }, 10); + await initPromise; + + const originalKill = process.kill; + process.kill = vi.fn().mockImplementation(() => { + throw new Error("ESRCH"); + }); + + expect(flush.isAlive()).toBe(false); + + process.kill = originalKill; + }); + }); + + describe("respawn", () => { + it("should respawn worker on unexpected exit", async () => { + flush = createFlush({ restartDelayMs: 100 }); + + const initPromise = flush.initialize(); + setTimeout(() => { + mockChild.emit("message", { type: "ready" } as IPCMessage); + }, 10); + await initPromise; + + vi.mocked(fork).mockClear(); + + const newMockChild = createMockChildProcess(99999); + vi.mocked(fork).mockReturnValue(newMockChild as any); + + mockChild.emit("exit", 1, "SIGKILL"); + + await new Promise((resolve) => setTimeout(resolve, 200)); + + newMockChild.emit("message", { type: "ready" } as IPCMessage); + + await new Promise((resolve) => setTimeout(resolve, 50)); + + expect(fork).toHaveBeenCalledTimes(1); + expect(flush.getStatus().restartCount).toBe(1); + }); + + it("should not respawn if shutting down", async () => { + flush = createFlush(); + + const initPromise = flush.initialize(); + setTimeout(() => { + mockChild.emit("message", { type: "ready" } as IPCMessage); + }, 10); + await initPromise; + + vi.mocked(fork).mockClear(); + + // @ts-expect-error - accessing private property + flush.isShuttingDown = true; + + mockChild.emit("exit", 0, null); + + await new Promise((resolve) => setTimeout(resolve, 200)); + + expect(fork).not.toHaveBeenCalled(); + }); + + it("should stop respawning after max restarts", async () => { + flush = createFlush({ maxRestarts: 2, restartDelayMs: 100 }); + + const initPromise = flush.initialize(); + setTimeout(() => { + mockChild.emit("message", { type: "ready" } as IPCMessage); + }, 10); + await initPromise; + + for (let i = 0; i < 3; i++) { + vi.mocked(fork).mockClear(); + const newMock = createMockChildProcess(10000 + i); + vi.mocked(fork).mockReturnValue(newMock as any); + + mockChild.emit("exit", 1, null); + await new Promise((resolve) => setTimeout(resolve, 200)); + + if (i < 2) { + newMock.emit("message", { type: "ready" } as IPCMessage); + mockChild = newMock; + } + } + + expect(flush.getStatus().restartCount).toBeGreaterThanOrEqual(2); + }, 10000); + }); + + describe("getWorkerStats", () => { + it("should return null if worker is not alive", async () => { + flush = createFlush(); + const stats = await flush.getWorkerStats(); + expect(stats).toBeNull(); + }); + + it("should request stats via IPC", async () => { + flush = createFlush(); + + const initPromise = flush.initialize(); + setTimeout(() => { + mockChild.emit("message", { type: "ready" } as IPCMessage); + }, 10); + await initPromise; + + const originalKill = process.kill; + process.kill = vi.fn(); + + const statsPromise = flush.getWorkerStats(); + + setTimeout(() => { + mockChild.emit("message", { + type: "stats", + payload: { + flushCount: 10, + errorCount: 2, + consecutiveErrors: 0, + totalEntriesFlushed: 100, + lastFlushAt: Date.now(), + lastErrorAt: null, + isRunning: true, + isShuttingDown: false, + isCircuitOpen: false, + }, + } as IPCMessage); + }, 10); + + const stats = await statsPromise; + + expect(mockChild.send).toHaveBeenCalledWith({ type: "get-stats" }); + expect(stats?.flushCount).toBe(10); + expect(stats?.totalEntriesFlushed).toBe(100); + + process.kill = originalKill; + }); + + it("should return cached stats on timeout", async () => { + flush = createFlush(); + + const initPromise = flush.initialize(); + setTimeout(() => { + mockChild.emit("message", { type: "ready" } as IPCMessage); + }, 10); + await initPromise; + + const originalKill = process.kill; + process.kill = vi.fn(); + + // @ts-expect-error - accessing private property + flush.lastStats = { + flushCount: 5, + errorCount: 1, + consecutiveErrors: 0, + totalEntriesFlushed: 50, + lastFlushAt: Date.now(), + lastErrorAt: null, + }; + + const stats = await flush.getWorkerStats(); + + expect(stats?.flushCount).toBe(5); + + process.kill = originalKill; + }); + }); + + describe("getStatus", () => { + it("should return current status", async () => { + flush = createFlush(); + + const initPromise = flush.initialize(); + setTimeout(() => { + mockChild.emit("message", { type: "ready" } as IPCMessage); + }, 10); + await initPromise; + + const originalKill = process.kill; + process.kill = vi.fn(); + + const status = flush.getStatus(); + + expect(status).toEqual({ + isAlive: true, + isShuttingDown: false, + restartCount: 0, + pid: 12345, + lastStats: null, + }); + + process.kill = originalKill; + }); + }); + + describe("health check", () => { + it("should trigger respawn when worker dies", async () => { + flush = createFlush({ healthCheckInterval: 1000, restartDelayMs: 100 }); + + const initPromise = flush.initialize(); + setTimeout(() => { + mockChild.emit("message", { type: "ready" } as IPCMessage); + }, 10); + await initPromise; + + const originalKill = process.kill; + process.kill = vi.fn().mockImplementation(() => { + throw new Error("ESRCH"); + }); + + vi.mocked(fork).mockClear(); + const newMock = createMockChildProcess(99999); + vi.mocked(fork).mockReturnValue(newMock as any); + + await new Promise((resolve) => setTimeout(resolve, 1200)); + + expect(fork).toHaveBeenCalled(); + + process.kill = originalKill; + }, 10000); + }); +}); + +describe("FlushWorker", () => { + const eventLogPath = "./event-log-flush-test"; + const checkpointPath = `${eventLogPath}.flush-checkpoint`; + const dbPath = "./tasks-flush-test.db"; + + let worker: FlushWorker; + let eventLog: EventLog; + let repository: TaskRepository; + + const createEntries = (): EventLogEntry[] => [ + { + timestamp: Date.now(), + taskId: "task-001", + type: "TASK_CREATED", + name: "test-task", + idempotencyKey: "idem-001", + userId: "user-123", + input: { test: "test" }, + executionOptions: { maxConcurrentExecutions: 1 }, + taskType: "user", + }, + { + timestamp: Date.now(), + taskId: "task-002", + type: "TASK_CREATED", + name: "test-task", + idempotencyKey: "idem-002", + userId: "user-123", + input: { test: "test" }, + executionOptions: { maxConcurrentExecutions: 1 }, + taskType: "user", + }, + { + timestamp: Date.now(), + taskId: "task-001", + type: "TASK_START", + name: "test-task", + idempotencyKey: "idem-001", + userId: "user-123", + taskType: "user", + }, + ]; + + const createWorker = async (overrides = {}) => { + repository = await createRepository({ type: "sqlite", database: dbPath }); + return new FlushWorker( + { + flushIntervalMs: 1000, + eventLogPath, + maxBatchSize: 1000, + maxFlushRetries: 3, + retryBaseDelayMs: 100, + circuitBreakerDurationMs: 1000, + circuitBreakerThreshold: 3, + healthCheckIntervalMs: 5000, + maxRestarts: 3, + restartDelayMs: 1000, + ...overrides, + }, + repository, + ); + }; + + const cleanup = async () => { + const filesToDelete = [ + dbPath, + checkpointPath, + `${checkpointPath}.temp`, + eventLogPath, + `${eventLogPath}.checkpoint`, + ]; + + for (const file of filesToDelete) { + try { + fs.unlinkSync(file); + } catch { + // ignore + } + } + }; + + beforeEach(async () => { + await cleanup(); + + eventLog = new EventLog({ eventLogPath }); + await eventLog.initialize(); + + const entries = createEntries(); + for (const entry of entries) { + await eventLog.appendEntry(entry); + } + + worker = await createWorker(); + }); + + afterEach(async () => { + worker?.stop(); + await repository?.close(); + await eventLog?.close(true); + await cleanup(); + }); + + describe("lifecycle", () => { + it("should start and set isRunning to true", async () => { + expect(worker.isRunning).toBe(false); + await worker.start(); + expect(worker.isRunning).toBe(true); + }); + + it("should load checkpoint from file on start", async () => { + await fs.promises.writeFile(checkpointPath, "5", "utf-8"); + + await worker.start(); + await worker.flush(); + + const stats = worker.getStats(); + expect(stats.totalEntriesFlushed).toBe(0); + }); + + it("should stop and clear the interval on stop", async () => { + await worker.start(); + // @ts-expect-error - accessing private property + expect(worker.flushInterval).not.toBeNull(); + + worker.stop(); + // @ts-expect-error - accessing private property + expect(worker.flushInterval).toBeNull(); + }); + + it("should set isRunning to false on stop", async () => { + await worker.start(); + expect(worker.isRunning).toBe(true); + + worker.stop(); + expect(worker.isRunning).toBe(false); + }); + }); + + describe("flush", () => { + it("should flush batch to database", async () => { + await worker.start(); + await worker.flush(); + + const stats = worker.getStats(); + expect(stats.flushCount).toBe(1); + expect(stats.totalEntriesFlushed).toBe(3); + }); + + it("should limit batch to max batch size", async () => { + for (let i = 0; i < 10; i++) { + await eventLog.appendEntry({ + timestamp: Date.now(), + taskId: `task-extra-${i}`, + type: "TASK_CREATED", + name: "test-task", + idempotencyKey: `idem-extra-${i}`, + userId: "user-123", + input: { test: "test" }, + executionOptions: { maxConcurrentExecutions: 1 }, + taskType: "user", + }); + } + + const smallBatchWorker = await createWorker({ maxBatchSize: 5 }); + await smallBatchWorker.start(); + await smallBatchWorker.flush(); + + const stats = smallBatchWorker.getStats(); + expect(stats.totalEntriesFlushed).toBe(5); + + smallBatchWorker.stop(); + }); + + it("should update checkpoint after successful flush", async () => { + await worker.start(); + await worker.flush(); + + const checkpointContent = await fs.promises.readFile( + checkpointPath, + "utf-8", + ); + expect(parseInt(checkpointContent, 10)).toBe(3); + }); + + it("should skip flush if no entries", async () => { + await eventLog.close(true); + eventLog = new EventLog({ eventLogPath }); + await eventLog.initialize(); + + worker = await createWorker(); + await worker.start(); + await worker.flush(); + + const stats = worker.getStats(); + expect(stats.flushCount).toBe(0); + }); + + it("should skip flush if not running and not shutting down", async () => { + await worker.flush(); + + const stats = worker.getStats(); + expect(stats.flushCount).toBe(0); + }); + }); + + describe("retry", () => { + it("should retry flush on failure with exponential backoff", async () => { + await worker.start(); + + const mockExecuteBatch = vi + .fn() + .mockRejectedValueOnce(new Error("DB error 1")) + .mockRejectedValueOnce(new Error("DB error 2")) + .mockResolvedValueOnce(undefined); + + // @ts-expect-error - accessing private property + worker.repository.executeBatch = mockExecuteBatch; + + await worker.flush(); + + expect(mockExecuteBatch).toHaveBeenCalledTimes(3); + const stats = worker.getStats(); + expect(stats.flushCount).toBe(1); + expect(stats.errorCount).toBe(2); + }); + + it("should stop retrying after maxFlushRetries", async () => { + await worker.start(); + + const mockExecuteBatch = vi + .fn() + .mockRejectedValue(new Error("Persistent error")); + + // @ts-expect-error - accessing private property + worker.repository.executeBatch = mockExecuteBatch; + + await worker.flush(); + + expect(mockExecuteBatch).toHaveBeenCalledTimes(3); + const stats = worker.getStats(); + expect(stats.flushCount).toBe(0); + expect(stats.errorCount).toBe(3); + }); + + it("should track error stats on failure", async () => { + await worker.start(); + + const mockExecuteBatch = vi.fn().mockRejectedValue(new Error("DB error")); + + // @ts-expect-error - accessing private property + worker.repository.executeBatch = mockExecuteBatch; + + await worker.flush(); + + const stats = worker.getStats(); + expect(stats.errorCount).toBe(3); + expect(stats.consecutiveErrors).toBe(3); + expect(stats.lastErrorAt).not.toBeNull(); + }); + + it("should reset consecutive errors on success", async () => { + await worker.start(); + + const mockExecuteBatch = vi + .fn() + .mockRejectedValueOnce(new Error("DB error")) + .mockResolvedValue(undefined); + + // @ts-expect-error - accessing private property + worker.repository.executeBatch = mockExecuteBatch; + + await worker.flush(); + + const stats = worker.getStats(); + expect(stats.consecutiveErrors).toBe(0); + expect(stats.errorCount).toBe(1); + }); + }); + + describe("circuit breaker", () => { + it("should open circuit breaker after threshold errors", async () => { + const cbWorker = await createWorker({ + circuitBreakerThreshold: 3, + maxFlushRetries: 1, + }); + await cbWorker.start(); + + const mockExecuteBatch = vi.fn().mockRejectedValue(new Error("DB error")); + + // @ts-expect-error - accessing private property + cbWorker.repository.executeBatch = mockExecuteBatch; + + await cbWorker.flush(); + await cbWorker.flush(); + await cbWorker.flush(); + + const stats = cbWorker.getStats(); + expect(stats.isCircuitOpen).toBe(true); + + cbWorker.stop(); + }); + + it("should block flushes while circuit breaker is open", async () => { + const cbWorker = await createWorker({ + circuitBreakerThreshold: 1, + maxFlushRetries: 1, + circuitBreakerDurationMs: 10000, + }); + await cbWorker.start(); + + const mockExecuteBatch = vi + .fn() + .mockRejectedValueOnce(new Error("DB error")) + .mockResolvedValue(undefined); + + // @ts-expect-error - accessing private property + cbWorker.repository.executeBatch = mockExecuteBatch; + + await cbWorker.flush(); + await cbWorker.flush(); + + expect(mockExecuteBatch).toHaveBeenCalledTimes(1); + + cbWorker.stop(); + }); + + it("should reset circuit breaker after duration", async () => { + const cbWorker = await createWorker({ + circuitBreakerThreshold: 1, + maxFlushRetries: 1, + circuitBreakerDurationMs: 1000, + }); + await cbWorker.start(); + + const mockExecuteBatch = vi + .fn() + .mockRejectedValueOnce(new Error("DB error")) + .mockResolvedValue(undefined); + + // @ts-expect-error - accessing private property + cbWorker.repository.executeBatch = mockExecuteBatch; + + await cbWorker.flush(); + expect(cbWorker.getStats().isCircuitOpen).toBe(true); + + await new Promise((resolve) => setTimeout(resolve, 1100)); + + await cbWorker.flush(); + expect(cbWorker.getStats().isCircuitOpen).toBe(false); + expect(mockExecuteBatch).toHaveBeenCalledTimes(2); + + cbWorker.stop(); + }, 10000); + }); + + describe("graceful shutdown", () => { + it("should drain remaining events on shutdown", async () => { + await worker.start(); + + await worker.gracefulShutdown(5000); + + const stats = worker.getStats(); + expect(stats.totalEntriesFlushed).toBe(3); + }); + + it("should respect timeout on graceful shutdown", async () => { + await worker.start(); + + const mockExecuteBatch = vi + .fn() + .mockRejectedValue(new Error("Persistent error")); + + // @ts-expect-error - accessing private property + worker.repository.executeBatch = mockExecuteBatch; + + const startTime = Date.now(); + await worker.gracefulShutdown(200); + const duration = Date.now() - startTime; + + expect(duration).toBeLessThan(500); + }); + + it("should close repository on graceful shutdown", async () => { + await worker.start(); + + // @ts-expect-error - accessing private property + const closeSpy = vi.spyOn(worker.repository, "close"); + + await worker.gracefulShutdown(1000); + + expect(closeSpy).toHaveBeenCalled(); + }); + + it("should allow flush during shutdown even if not running", async () => { + await worker.start(); + worker.stop(); + + // @ts-expect-error - accessing private property + worker.isShuttingDown = true; + + await worker.flush(); + + const stats = worker.getStats(); + expect(stats.totalEntriesFlushed).toBe(3); + }); + }); + + describe("checkpoint", () => { + it("should create checkpoint file if not exists", async () => { + try { + fs.unlinkSync(checkpointPath); + } catch { + // ignore + } + + await worker.start(); + await worker.flush(); + + expect(fs.existsSync(checkpointPath)).toBe(true); + }); + + it("should handle invalid checkpoint values", async () => { + await fs.promises.writeFile(checkpointPath, "invalid", "utf-8"); + + await worker.start(); + await worker.flush(); + + const stats = worker.getStats(); + expect(stats.totalEntriesFlushed).toBe(3); + }); + + it("should write checkpoint file atomically", async () => { + await worker.start(); + await worker.flush(); + + expect(fs.existsSync(`${checkpointPath}.temp`)).toBe(false); + expect(fs.existsSync(checkpointPath)).toBe(true); + }); + }); + + describe("stats", () => { + it("should track flushCount", async () => { + await worker.start(); + + expect(worker.getStats().flushCount).toBe(0); + + await worker.flush(); + expect(worker.getStats().flushCount).toBe(1); + }); + + it("should track totalEntriesFlushed", async () => { + await worker.start(); + + expect(worker.getStats().totalEntriesFlushed).toBe(0); + + await worker.flush(); + expect(worker.getStats().totalEntriesFlushed).toBe(3); + }); + + it("should track errorCount", async () => { + await worker.start(); + + const mockExecuteBatch = vi + .fn() + .mockRejectedValueOnce(new Error("DB error")) + .mockResolvedValue(undefined); + + // @ts-expect-error - accessing private property + worker.repository.executeBatch = mockExecuteBatch; + + await worker.flush(); + + expect(worker.getStats().errorCount).toBe(1); + }); + + it("should expose stats via getStats method", async () => { + await worker.start(); + await worker.flush(); + + const stats = worker.getStats(); + + expect(stats).toMatchObject({ + flushCount: 1, + errorCount: 0, + consecutiveErrors: 0, + totalEntriesFlushed: 3, + isRunning: true, + isShuttingDown: false, + isCircuitOpen: false, + }); + expect(stats.lastFlushAt).not.toBeNull(); + expect(stats.lastErrorAt).toBeNull(); + }); + }); +}); From f46cf1b88d44bc6cd1cc2a5db73e0622ae02d20d Mon Sep 17 00:00:00 2001 From: Ditadi Date: Tue, 27 Jan 2026 16:55:00 +0000 Subject: [PATCH 10/13] docs(taskflow): ai generated readme --- packages/taskflow/README.md | 437 ++++++++++++++++++++++++++++++++++++ 1 file changed, 437 insertions(+) create mode 100644 packages/taskflow/README.md diff --git a/packages/taskflow/README.md b/packages/taskflow/README.md new file mode 100644 index 00000000..67b44890 --- /dev/null +++ b/packages/taskflow/README.md @@ -0,0 +1,437 @@ +# @databricks/taskflow + +A production-grade, durable task execution system for Node.js applications. Built with reliability, observability, and developer experience in mind. + +## Features + +- **Durable Execution** - Write-ahead log ensures tasks survive process crashes +- **Event Streaming** - Real-time SSE streaming with automatic reconnection +- **Rate Limiting** - Sliding window backpressure with per-user quotas +- **Retry & Recovery** - Exponential backoff, dead letter queue, stale task recovery +- **Type Safety** - Branded types prevent ID mix-ups at compile time +- **Observability** - OpenTelemetry-compatible hooks for traces, metrics, and logs +- **Zero Lock-in** - Pluggable storage backends (SQLite, Lakebase, or custom) + +## Installation + +```bash +pnpm add @databricks/taskflow +``` + +## Quick Start + +```typescript +import { TaskSystem } from '@databricks/taskflow'; + +// Create the task system +const taskSystem = new TaskSystem({ + repository: { + type: 'sqlite', + database: './.taskflow/tasks.db' + } +}); + +// Define a task handler +taskSystem.defineTask('send-email', { + handler: async (input, ctx) => { + const { to, subject, body } = input; + + ctx.progress({ status: 'sending' }); + await sendEmail(to, subject, body); + + return { sent: true, timestamp: Date.now() }; + }, + schema: z.object({ + to: z.string().email(), + subject: z.string(), + body: z.string() + }) +}); + +// Initialize and start +await taskSystem.initialize(); + +// Submit a task +const task = await taskSystem.submit('send-email', { + input: { + to: 'user@example.com', + subject: 'Hello', + body: 'World' + }, + userId: 'user-123' +}); + +// Subscribe to events +for await (const event of taskSystem.subscribe(task.idempotencyKey)) { + console.log(event.type, event.payload); +} +``` + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ TaskSystem │ +│ submit() · subscribe() · getStatus() │ +└─────────────────────────────────────────────────────────────┘ + │ + ┌────────────────────┼────────────────────┐ + ▼ ▼ ▼ +┌────────────────┐ ┌────────────────┐ ┌────────────────┐ +│ Guard │ │ Delivery │ │ Persistence │ +│ │ │ │ │ │ +│ • Backpressure │ │ • RingBuffer │ │ • EventLog │ +│ • SlotManager │ │ • SSE Streams │ │ • Repository │ +│ • DLQ │ │ • Reconnect │ │ • Checkpoints │ +└────────────────┘ └────────────────┘ └────────────────┘ + │ + ┌─────────┴─────────┐ + ▼ ▼ + ┌────────────────┐ ┌────────────────┐ + │ Flush │ │ Execution │ + │ │ │ │ + │ • BatchWriter │ │ • Executor │ + │ • CircuitBreak │ │ • Recovery │ + │ • IPC Worker │ │ • Heartbeat │ + └────────────────┘ └────────────────┘ +``` + +## Components + +### Guard Layer + +Controls task admission and execution concurrency. + +```typescript +// Rate limiting configuration +const taskSystem = new TaskSystem({ + guard: { + backpressure: { + maxTasksPerWindow: 1000, // Global rate limit + maxTasksPerUserWindow: 100, // Per-user rate limit + windowSizeMs: 60_000, // 1 minute window + maxQueuedSize: 500 // Max queued tasks + }, + slots: { + maxExecutionGlobal: 50, // Max concurrent tasks + maxExecutionPerUser: 10, // Per-user concurrency + slotTimeoutMs: 30_000 // Slot acquisition timeout + }, + dlq: { + maxSize: 1000, // DLQ capacity + ttlMs: 86_400_000, // 24 hour TTL + maxRetries: 3 // Max retry attempts + } + } +}); +``` + +### Delivery Layer + +Manages real-time event streaming with reconnection support. + +```typescript +// Stream configuration +const taskSystem = new TaskSystem({ + stream: { + streamBufferSize: 100, // Events per stream buffer + streamRetentionMs: 60_000 // Keep closed streams for 1 min + } +}); + +// Subscribe with reconnection +const stream = taskSystem.subscribe(idempotencyKey, { + lastSeq: 42, // Resume from sequence 42 + signal: abortController.signal // Cancellation support +}); + +for await (const event of stream) { + console.log(`[${event.seq}] ${event.type}:`, event.payload); +} +``` + +### Persistence Layer + +Provides durable storage with write-ahead logging. + +```typescript +// Event log configuration +const taskSystem = new TaskSystem({ + eventLog: { + eventLogPath: './.taskflow/events.log', + maxSizeBytesPerFile: 10_485_760, // 10MB before rotation + maxAgePerFile: 3_600_000, // 1 hour max age + retentionCount: 5 // Keep 5 rotated files + }, + repository: { + type: 'sqlite', + database: './.taskflow/tasks.db' + } +}); +``` + +### Flush Layer + +Background worker that batches WAL entries to the repository. + +```typescript +// Flush configuration +const taskSystem = new TaskSystem({ + flush: { + flushIntervalMs: 1000, // Flush every second + maxBatchSize: 1000, // Max entries per batch + circuitBreakerThreshold: 5, // Open after 5 failures + circuitBreakerDurationMs: 30_000 // Stay open for 30s + } +}); +``` + +### Execution Layer + +Runs task handlers with retry, timeout, and heartbeat. + +```typescript +// Executor configuration +const taskSystem = new TaskSystem({ + executor: { + heartbeatIntervalMs: 5000, // Heartbeat every 5s + defaultTimeoutMs: 300_000, // 5 minute default timeout + tickIntervalMs: 100 // Check queue every 100ms + }, + recovery: { + staleThresholdMs: 30_000, // Task stale after 30s + scanIntervalMs: 10_000, // Scan every 10s + maxConcurrentRecoveries: 5 // Max parallel recoveries + } +}); +``` + +## Task Handlers + +### Promise Handler + +Simple async function that returns a result. + +```typescript +taskSystem.defineTask('process-data', { + handler: async (input, ctx) => { + const result = await processData(input); + return result; + } +}); +``` + +### Generator Handler + +Yields progress events during execution. + +```typescript +taskSystem.defineTask('batch-import', { + handler: async function* (input, ctx) { + const items = input.items; + + for (let i = 0; i < items.length; i++) { + await processItem(items[i]); + + yield { + type: 'progress', + payload: { + processed: i + 1, + total: items.length + } + }; + } + + return { imported: items.length }; + } +}); +``` + +### Recovery Handler + +Custom logic for recovering stale tasks. + +```typescript +taskSystem.defineTask('long-running-job', { + handler: async (input, ctx) => { + // Normal execution + }, + recovery: async (task, ctx) => { + // Check external state + const status = await checkJobStatus(task.id); + + if (status === 'completed') { + return { recovered: true, result: status.result }; + } + + // Re-execute from checkpoint + return { recovered: false }; + } +}); +``` + +## Observability + +TaskFlow uses a hooks-based observability interface compatible with OpenTelemetry. + +```typescript +import { createHooks } from '@databricks/taskflow'; +import { trace, metrics } from '@opentelemetry/api'; + +const hooks = createHooks({ + tracer: trace.getTracer('taskflow'), + meter: metrics.getMeter('taskflow'), + logger: console +}); + +const taskSystem = new TaskSystem(config, hooks); +``` + +### Available Metrics + +| Metric | Type | Description | +|--------|------|-------------| +| `taskflow.tasks.submitted` | Counter | Tasks submitted | +| `taskflow.tasks.completed` | Counter | Tasks completed | +| `taskflow.tasks.failed` | Counter | Tasks failed | +| `taskflow.tasks.running` | Gauge | Currently running tasks | +| `taskflow.tasks.queued` | Gauge | Tasks waiting in queue | +| `taskflow.guard.rejections` | Counter | Rejected by rate limit | +| `taskflow.dlq.size` | Gauge | Dead letter queue size | +| `taskflow.flush.entries` | Counter | Entries flushed | +| `taskflow.streams.active` | Gauge | Active SSE streams | + +### Available Spans + +| Span | Description | +|------|-------------| +| `taskflow.task.execute` | Full task execution | +| `taskflow.task.handler` | Handler function only | +| `taskflow.flush.batch` | Batch flush operation | +| `taskflow.recovery.scan` | Recovery scan cycle | + +## Error Handling + +TaskFlow provides typed errors with retry information. + +```typescript +import { + BackpressureError, + SlotTimeoutError, + ValidationError +} from '@databricks/taskflow'; + +try { + await taskSystem.submit('my-task', { input: data }); +} catch (error) { + if (BackpressureError.is(error)) { + // Rate limited - retry after delay + console.log(`Retry after ${error.retryAfterMs}ms`); + } + + if (SlotTimeoutError.is(error)) { + // No execution slots available + console.log(`Slot timeout after ${error.timeoutMs}ms`); + } + + if (ValidationError.is(error)) { + // Invalid input + console.log(`Invalid field: ${error.field}`); + } +} +``` + +## Graceful Shutdown + +TaskFlow handles shutdown gracefully, completing in-flight tasks. + +```typescript +// Configure shutdown behavior +const taskSystem = new TaskSystem({ + shutdown: { + timeoutMs: 30_000, // Max shutdown time + forceKillTimeoutMs: 5_000 // Force kill after this + } +}); + +// Shutdown on SIGTERM +process.on('SIGTERM', async () => { + await taskSystem.shutdown(); + process.exit(0); +}); +``` + +## Storage Backends + +### SQLite (Default) + +Best for single-node deployments and development. + +```typescript +const taskSystem = new TaskSystem({ + repository: { + type: 'sqlite', + database: './.taskflow/tasks.db' + } +}); +``` + +### Lakebase + +For distributed deployments with Databricks Lakebase. + +```typescript +const taskSystem = new TaskSystem({ + repository: { + type: 'lakebase', + connector: myLakebaseConnector // You provide the connector + } +}); +``` + +### Custom Repository + +Implement the `TaskRepository` interface for custom backends. + +```typescript +interface TaskRepository { + initialize(): Promise; + executeBatch(entries: EventLogEntry[]): Promise; + findById(taskId: TaskId): Promise; + findByIdempotencyKey(key: IdempotencyKey): Promise; + findStaleTasks(threshold: number): Promise; + getEvents(taskId: TaskId): Promise; + healthCheck(): Promise; + close(): Promise; +} +``` + +## API Reference + +### TaskSystem + +| Method | Description | +|--------|-------------| +| `initialize()` | Initialize all components | +| `defineTask(name, definition)` | Register a task handler | +| `submit(name, params)` | Submit a task for execution | +| `getStatus(idempotencyKey)` | Get task status | +| `subscribe(idempotencyKey, options?)` | Subscribe to task events | +| `getStats()` | Get system statistics | +| `shutdown(options?)` | Graceful shutdown | + +### Task Events + +| Event Type | Description | +|------------|-------------| +| `created` | Task created and queued | +| `start` | Task execution started | +| `progress` | Progress update from handler | +| `heartbeat` | Periodic heartbeat | +| `complete` | Task completed successfully | +| `error` | Task failed with error | +| `custom` | Custom event from handler | + +## License + +Apache-2.0 From 4139b587e99205a55ae825ce699ea823ca1c5dd3 Mon Sep 17 00:00:00 2001 From: ditadi Date: Fri, 30 Jan 2026 17:55:32 +0000 Subject: [PATCH 11/13] feat(taskflow): perf optimizations with bulk ops, byte-offset checkpoint, and dynamic batching --- packages/taskflow/src/execution/system.ts | 104 ++-- packages/taskflow/src/execution/types.ts | 25 + packages/taskflow/src/flush/flush-manager.ts | 28 +- .../taskflow/src/flush/flush-worker-entry.ts | 7 +- packages/taskflow/src/flush/flush-worker.ts | 171 ++++-- packages/taskflow/src/flush/types.ts | 18 +- packages/taskflow/src/guard/backpressure.ts | 46 ++ packages/taskflow/src/guard/guard.ts | 23 +- packages/taskflow/src/guard/types.ts | 10 +- packages/taskflow/src/index.ts | 2 + .../taskflow/src/persistence/event-log.ts | 247 ++++++++- .../repository/sqlite/connector.ts | 506 ++++++++++++------ 12 files changed, 934 insertions(+), 253 deletions(-) diff --git a/packages/taskflow/src/execution/system.ts b/packages/taskflow/src/execution/system.ts index 09f7a7ff..15874451 100644 --- a/packages/taskflow/src/execution/system.ts +++ b/packages/taskflow/src/execution/system.ts @@ -32,6 +32,7 @@ import { TaskExecutor } from "./executor"; import { TaskRecovery } from "./recovery"; import { type ExecutorConfig, + mergeExecutorConfig, mergeShutdownConfig, type RecoveryConfig, type ShutdownConfig, @@ -78,6 +79,7 @@ export interface TaskSystemConfig { export class TaskSystem { private readonly config: TaskSystemConfig; private readonly shutdownConfig: ShutdownConfig; + private readonly executorConfig: ExecutorConfig; private readonly hooks: TaskSystemHooks; // state @@ -108,6 +110,7 @@ export class TaskSystem { constructor(config?: TaskSystemConfig, hooks: TaskSystemHooks = noopHooks) { this.config = config ?? {}; this.shutdownConfig = mergeShutdownConfig(config?.shutdown); + this.executorConfig = mergeExecutorConfig(config?.executor); this.hooks = hooks; // initialize components @@ -270,7 +273,8 @@ export class TaskSystem { this.runningTasks.clear(); this.streamManager.clearAll(); - // shutdown persistence layer + // sync event log to disk before shutting down flush (drain remaining events) + await this.eventLog.sync(); await this.flush.shutdown(); await this.eventLog.close(deleteFiles); @@ -342,7 +346,7 @@ export class TaskSystem { successRate, }, scheduler: { - tickIntervalMs: 100, + tickIntervalMs: this.executorConfig.scheduler.tickIntervalMs, isTickActive: this.isExecutorTickRunning, }, registry: { @@ -434,8 +438,8 @@ export class TaskSystem { const task = new Task(taskParams); - // validate through guard (rate limiting, etc.) - this.guard.acceptTask(task); + // accept task and wait for capacity if needed (with timeout) + await this.guard.acceptTaskWithWait(task); // create stream and emit created event this.streamManager.getOrCreate(taskIdempotencyKey); @@ -512,43 +516,83 @@ export class TaskSystem { * Start executor tick interval */ private startExecutorTick(): void { - this.executorInterval = setInterval(async () => { - if (this.isExecutorTickRunning) return; + const { tickIntervalMs, maxTasksPerTick } = this.executorConfig.scheduler; + this.executorInterval = setInterval(() => { + if (this.isExecutorTickRunning) return; this.isExecutorTickRunning = true; try { - // get first task from queue - const task = this.pendingQueue.values().next().value as - | Task - | undefined; - if (!task) return; - - // remove from pending queue - this.pendingQueue.delete(task.idempotencyKey); - - // skip if already running (race condition) - if (this.runningTasks.has(task.idempotencyKey)) return; - - // acquire execution slot - try { - await this.guard.acquireExecutionSlot(task); - } catch (error) { - this.guard.addToDLQ(task, "Slot acquisition failed", String(error)); - return; + // check if there's work to do + if (this.pendingQueue.size === 0) return; + + // get available slots from guard + const guardStats = this.guard.getStats(); + const availableSlots = guardStats.slots.current.available; + if (availableSlots === 0) return; + + // determine how many tasks to process this tick + const tasksToStart = Math.min( + availableSlots, + maxTasksPerTick, + this.pendingQueue.size, + ); + + // get tasks to process + const tasks: Task[] = []; + for (const task of this.pendingQueue.values()) { + if (tasks.length >= tasksToStart) break; + // skip if already running + if (this.runningTasks.has(task.idempotencyKey)) continue; + tasks.push(task); } + // remove tasks from pending queue + for (const task of tasks) { + this.pendingQueue.delete(task.idempotencyKey); + } + + // start tasks concurrently + for (const task of tasks) { + this.startTaskExecution(task); + } + } finally { + this.isExecutorTickRunning = false; + } + }, tickIntervalMs); + } + + /** + * Start task execution + * + * Acquires slot and starts execution. + */ + private startTaskExecution(task: Task): void { + // acquire slot and execute + this.guard + .acquireExecutionSlot(task) + .then(() => { // add to running tasks this.runningTasks.set(task.idempotencyKey, task); this.streamManager.getOrCreate(task.idempotencyKey); - // execute task + // get definition and execute const definition = this.definitions.get(task.name); - await this.executor.execute(task, definition); - } finally { - this.isExecutorTickRunning = false; - } - }, 100); + this.executor.execute(task, definition).catch((error) => { + this.hooks.log({ + severity: "error", + message: `Task execution error: ${String(error)}`, + attributes: { + taskId: task.id, + idempotencyKey: task.idempotencyKey, + }, + }); + }); + }) + .catch((error) => { + // failed to acquire slot - add to DLQ + this.guard.addToDLQ(task, "Slot acquisition failed", String(error)); + }); } /** diff --git a/packages/taskflow/src/execution/types.ts b/packages/taskflow/src/execution/types.ts index b212111b..5f12db49 100644 --- a/packages/taskflow/src/execution/types.ts +++ b/packages/taskflow/src/execution/types.ts @@ -30,6 +30,24 @@ export const DEFAULT_RETRY_CONFIG: RetryConfig = { backoffMultiplier: 2, }; +/** + * Configuration for the scheduler that processes pending tasks + */ +export interface SchedulerConfig { + /** interval between scheduler ticks in milliseconds */ + tickIntervalMs: number; + /** maximum number of tasks to start per tick */ + maxTasksPerTick: number; +} + +/** + * Default scheduler configuration + */ +export const DEFAULT_SCHEDULER_CONFIG: SchedulerConfig = { + tickIntervalMs: 50, + maxTasksPerTick: 50, +}; + /** * Configuration for TaskExecutor */ @@ -38,6 +56,8 @@ export interface ExecutorConfig { heartbeatIntervalMs: number; /** retry configuration */ retry: RetryConfig; + /** scheduler configuration */ + scheduler: SchedulerConfig; } /** @@ -46,6 +66,7 @@ export interface ExecutorConfig { export const DEFAULT_EXECUTOR_CONFIG: ExecutorConfig = { heartbeatIntervalMs: 30_000, // 30 seconds retry: DEFAULT_RETRY_CONFIG, + scheduler: DEFAULT_SCHEDULER_CONFIG, }; /** @@ -342,6 +363,10 @@ export function mergeExecutorConfig( ...DEFAULT_RETRY_CONFIG, ...partial.retry, }, + scheduler: { + ...DEFAULT_SCHEDULER_CONFIG, + ...partial.scheduler, + }, }; } diff --git a/packages/taskflow/src/flush/flush-manager.ts b/packages/taskflow/src/flush/flush-manager.ts index cec7b958..726f2856 100644 --- a/packages/taskflow/src/flush/flush-manager.ts +++ b/packages/taskflow/src/flush/flush-manager.ts @@ -9,7 +9,7 @@ import { type FlushConfig, type FlushStats, type FlushStatus, - type FlushWorkerStats, + type FlushWorkerRuntimeStats, type IPCCommand, type IPCMessage, } from "./types"; @@ -35,7 +35,7 @@ export class Flush { private healthCheckTimer: ReturnType | null = null; private restartCount: number = 0; private isShuttingDown: boolean = false; - private lastStats: FlushWorkerStats | null = null; + private lastStats: FlushWorkerRuntimeStats | null = null; constructor( config: Partial & { repository: RepositoryConfig }, @@ -88,6 +88,15 @@ export class Flush { resolve(); }, timeoutMs); + // listen for shutdown-complete message + const onMessage = (message: IPCMessage) => { + if (message.type === "shutdown-complete") { + this.lastStats = message.payload; + this.worker?.off("message", onMessage); + } + }; + this.worker?.on("message", onMessage); + this.worker?.once("exit", () => { clearTimeout(timeout); resolve(); @@ -118,7 +127,7 @@ export class Flush { * Get worker stats via IPC (async, fetches fresh stats) * Returns cached stats if worker doesn't respond in time */ - async getWorkerStats(): Promise { + async getWorkerStats(): Promise { if (!this.isAlive()) return null; return new Promise((resolve) => { @@ -155,13 +164,14 @@ export class Flush { }, worker: workerStats ? { - isRunning: true, + isRunning: workerStats.isRunning, flushCount: workerStats.flushCount, errorCount: workerStats.errorCount, consecutiveErrors: workerStats.consecutiveErrors, totalEntriesFlushed: workerStats.totalEntriesFlushed, lastFlushAt: workerStats.lastFlushAt, lastErrorAt: workerStats.lastErrorAt, + lastError: workerStats.lastError, } : null, }; @@ -206,6 +216,13 @@ export class Flush { process.stderr.write(`[FlushWorker] ${data.toString()}`); }); + // get stats from worker + this.worker?.on("message", (message: IPCMessage) => { + if (message.type === "stats") { + this.lastStats = message.payload; + } + }); + // wait for ready message const onReady = (message: IPCMessage) => { if (message.type === "ready") { @@ -305,6 +322,9 @@ export class Flush { message: "Health check detected dead worker, restarting", }); this.handleWorkerExit(); + } else if (this.isAlive()) { + // requests stats from worker periodically + this.sendCommand({ type: "get-stats" }); } }, this.config.healthCheckIntervalMs); diff --git a/packages/taskflow/src/flush/flush-worker-entry.ts b/packages/taskflow/src/flush/flush-worker-entry.ts index a8751f0d..d351c140 100644 --- a/packages/taskflow/src/flush/flush-worker-entry.ts +++ b/packages/taskflow/src/flush/flush-worker-entry.ts @@ -54,8 +54,11 @@ async function main(): Promise { send({ type: "stats", payload: worker.getStats() }); break; case "shutdown": - await worker.gracefulShutdown(command.payload.timeoutMs); - send({ type: "shutdown-complete" }); + await worker.gracefulShutdown(command.payload.timeoutMs, (stats) => { + // send stats during graceful shutdown + send({ type: "stats", payload: stats }); + }); + send({ type: "shutdown-complete", payload: worker.getStats() }); process.exit(0); break; } diff --git a/packages/taskflow/src/flush/flush-worker.ts b/packages/taskflow/src/flush/flush-worker.ts index 16e86e8f..f4526131 100644 --- a/packages/taskflow/src/flush/flush-worker.ts +++ b/packages/taskflow/src/flush/flush-worker.ts @@ -1,4 +1,5 @@ import fs from "node:fs/promises"; +import path from "node:path"; import { noopHooks, TaskAttributes, @@ -23,12 +24,14 @@ export class FlushWorker { private readonly hooks: TaskSystemHooks; private readonly eventLog: EventLog; - private checkpoint: number = 0; + private byteOffset: number = 0; private isShuttingDown: boolean = false; private _isRunning: boolean = false; private flushInterval: ReturnType | null = null; private circuitBreakerOpenUntil: number | null = null; + private currentBatchSize: number; + private stats: FlushWorkerStats = { flushCount: 0, errorCount: 0, @@ -36,6 +39,7 @@ export class FlushWorker { totalEntriesFlushed: 0, lastFlushAt: null, lastErrorAt: null, + lastError: null, }; constructor( @@ -52,17 +56,25 @@ export class FlushWorker { }, hooks, ); + this.currentBatchSize = this.config.minBatchSize; } /** * Start the flush worker * - Initialize repository + * - Ensure checkpoint directory exists * - Load checkpoint from file * - Start periodic flush interval */ async start(): Promise { await this.repository.initialize(); - this.checkpoint = await this.loadCheckpoint(); + + // ensure checkpoint directory exists + const checkpointDir = path.dirname(this.getCheckpointPath()); + await fs.mkdir(checkpointDir, { recursive: true }); + + // load byte offset from checkpoint file + this.byteOffset = await this.loadByteOffset(); this.flushInterval = setInterval(async () => { await this.flush(); @@ -77,7 +89,7 @@ export class FlushWorker { severity: "info", message: "FlushWorker started", attributes: { - checkpoint: this.checkpoint, + byteOffset: this.byteOffset, flushInterval: this.config.flushIntervalMs, }, }); @@ -97,7 +109,7 @@ export class FlushWorker { severity: "info", message: "FlushWorker stopped", attributes: { - checkpoint: this.checkpoint, + byteOffset: this.byteOffset, }, }); } @@ -105,8 +117,12 @@ export class FlushWorker { /** * Graceful shutdown - drain remaining events before stopping * @param timeoutMs - Maximum time to wait for draining + * @param onStats - Optional callback to send stats to parent process */ - async gracefulShutdown(timeoutMs: number = 30_000): Promise { + async gracefulShutdown( + timeoutMs: number = 30_000, + onStats?: (stats: FlushWorkerRuntimeStats) => void, + ): Promise { this.isShuttingDown = true; this.stop(); @@ -114,14 +130,16 @@ export class FlushWorker { // drain remaining events while (Date.now() - startTime < timeoutMs) { - const entries = await this.eventLog.readEntriesFromCheckpoint( - this.checkpoint, - ); + // check if there are any remaining entries + const { entries: remaining } = + await this.eventLog.readEntriesFromByteOffset(this.byteOffset, 1); - if (entries.length === 0) break; + if (remaining.length === 0) break; try { await this.flush(); + // send stats to parent process + onStats?.(this.getStats()); } catch (error) { this.hooks.log({ severity: "error", @@ -131,7 +149,7 @@ export class FlushWorker { } // small delay between flush attempts - await new Promise((resolve) => setTimeout(resolve, 100)); + await new Promise((resolve) => setTimeout(resolve, 50)); } await this.repository.close(); @@ -164,15 +182,39 @@ export class FlushWorker { [TaskAttributes.REPOSITORY_TYPE]: this.repository.type, }, async (span) => { - let batch = await this.eventLog.readEntriesFromCheckpoint( - this.checkpoint, - ); - - if (batch.length === 0) return; + // read entries from byte offset + let batch: Awaited< + ReturnType + >["entries"]; + let newByteOffset: number; + + try { + const result = await this.eventLog.readEntriesFromByteOffset( + this.byteOffset, + this.currentBatchSize, + ); + batch = result.entries; + newByteOffset = result.newByteOffset; + } catch (error) { + // handle file not found error + if ( + error instanceof Error && + (error as NodeJS.ErrnoException).code === "ENOENT" + ) { + this.hooks.log({ + severity: "warn", + message: + "Event log file not found (rotation in progress?), will retry", + }); + return; + } + throw error; + } - // limit batch size - if (batch.length > this.config.maxBatchSize) { - batch = batch.slice(0, this.config.maxBatchSize); + if (batch.length === 0) { + // no work to do, shrink batch size + this.adjustBatchSize(0); + return; } span.setAttribute(TaskAttributes.FLUSH_BATCH_SIZE, batch.length); @@ -185,7 +227,7 @@ export class FlushWorker { ) { try { await this.repository.executeBatch(batch); - await this.saveCheckpoint(this.checkpoint + batch.length); + await this.saveByteOffset(newByteOffset); // update stats on success this.stats.lastFlushAt = Date.now(); @@ -222,9 +264,21 @@ export class FlushWorker { }, ); + // adjust batch size based on how much work we got + this.adjustBatchSize(batch.length); + span.setStatus("ok"); return; } catch (error) { + // extract root cause from error + const err = error as Error & { cause?: Error }; + const rootCause = err.cause?.message ?? null; + const errorMessage = rootCause + ? `${err.message} - Cause: ${rootCause}` + : error instanceof Error + ? error.message + : String(error); + this.hooks.log({ severity: "error", message: `Flush attempt ${attempt}/${this.config.maxFlushRetries} failed`, @@ -238,6 +292,7 @@ export class FlushWorker { this.stats.errorCount++; this.stats.lastErrorAt = Date.now(); this.stats.consecutiveErrors++; + this.stats.lastError = errorMessage; this.hooks.incrementCounter(TaskMetrics.FLUSH_ERRORS, 1, { [TaskAttributes.REPOSITORY_TYPE]: this.repository.type, @@ -279,34 +334,57 @@ export class FlushWorker { }; } - /** - * Save checkpoint atomically using write-then-rename - */ - private async saveCheckpoint(newCheckpoint: number): Promise { + private async saveByteOffset(newByteOffset: number): Promise { + // update in-memory offset + this.byteOffset = newByteOffset; + const checkpointPath = this.getCheckpointPath(); const tempPath = `${checkpointPath}.temp`; - // write new checkpoint - await fs.writeFile(tempPath, newCheckpoint.toString(), "utf-8"); - await fs.rename(tempPath, checkpointPath); - - this.checkpoint = newCheckpoint; + try { + await fs.writeFile(tempPath, newByteOffset.toString(), "utf-8"); + await fs.rename(tempPath, checkpointPath); + } catch (error) { + // handle file not found error + if ((error as NodeJS.ErrnoException).code === "ENOENT") { + try { + await fs.mkdir(path.dirname(checkpointPath), { recursive: true }); + await fs.writeFile(tempPath, newByteOffset.toString(), "utf-8"); + await fs.rename(tempPath, checkpointPath); + } catch (retryError) { + // log error that checkpoint directory was deleted + this.hooks.log({ + severity: "warn", + message: "Failed to persist checkpoint to disk", + error: + retryError instanceof Error + ? retryError + : new Error(String(retryError)), + }); + } + } else { + // log error that checkpoint file could not be saved + this.hooks.log({ + severity: "warn", + message: "Failed to save checkpoint", + error: error instanceof Error ? error : new Error(String(error)), + }); + } + } } - /** - * Load checkpoint from file, returns 0 if not found or invalid - */ - private async loadCheckpoint(): Promise { + private async loadByteOffset(): Promise { const checkpointPath = this.getCheckpointPath(); try { const content = await fs.readFile(checkpointPath, "utf-8"); const parsed = parseInt(content.trim(), 10); + // check if byte offset is valid if (Number.isNaN(parsed) || parsed < 0) { this.hooks.log({ severity: "warn", - message: `Invalid checkpoint value: ${content.trim()}, resetting to 0`, + message: `Invalid byte offset value: ${content.trim()}, resetting to 0`, }); return 0; } @@ -361,4 +439,31 @@ export class FlushWorker { private getCheckpointPath(): string { return `${this.config.eventLogPath}.flush-checkpoint`; } + + /** + * Adjust batch size based on WAL lag heuristic + */ + private adjustBatchSize(entriesRead: number): void { + const { minBatchSize, maxBatchSize } = this.config; + + if (entriesRead === 0) { + // no work to do, shrink batch size + this.currentBatchSize = Math.max( + minBatchSize, + Math.floor(this.currentBatchSize * 0.75), + ); + } else if (entriesRead >= this.currentBatchSize) { + // full batch, grow aggressively + this.currentBatchSize = Math.min( + maxBatchSize, + Math.floor(this.currentBatchSize * 1.5), + ); + } else if (entriesRead < this.currentBatchSize * 0.5) { + // less than half full, shrink gradually + this.currentBatchSize = Math.max( + minBatchSize, + Math.floor(this.currentBatchSize * 0.9), + ); + } + } } diff --git a/packages/taskflow/src/flush/types.ts b/packages/taskflow/src/flush/types.ts index c4687b00..21c56aac 100644 --- a/packages/taskflow/src/flush/types.ts +++ b/packages/taskflow/src/flush/types.ts @@ -8,8 +8,14 @@ export interface FlushConfig { flushIntervalMs: number; /** path to the event log file */ eventLogPath: string; + /** minimum batch size (dynamic batching lower bound) */ + minBatchSize: number; /** maximum number of entries to flush per batch */ maxBatchSize: number; + /** WAL lag threshold to trigger batch size increase */ + lagThresholdForIncrease: number; + /** WAL lag threshold to trigger batch size decrease */ + lagThresholdForDecrease: number; /** maximum number of retry attempts per flush */ maxFlushRetries: number; /** base delay for exponential backoff in milliseconds */ @@ -33,9 +39,12 @@ export interface FlushConfig { */ export const DEFAULT_FLUSH_CONFIG: Required> = { - flushIntervalMs: 1000, + flushIntervalMs: 100, eventLogPath: "./.taskflow/event.log", - maxBatchSize: 1000, + minBatchSize: 500, + maxBatchSize: 5000, + lagThresholdForIncrease: 10000, + lagThresholdForDecrease: 1000, maxFlushRetries: 3, retryBaseDelayMs: 100, circuitBreakerDurationMs: 30_000, @@ -61,6 +70,8 @@ export interface FlushWorkerStats { lastFlushAt: number | null; /** timestamp of last error */ lastErrorAt: number | null; + /** last error message for debugging */ + lastError: string | null; } /** @@ -81,7 +92,7 @@ export interface FlushWorkerRuntimeStats extends FlushWorkerStats { export type IPCMessage = | { type: "ready" } | { type: "stats"; payload: FlushWorkerRuntimeStats } - | { type: "shutdown-complete" } + | { type: "shutdown-complete"; payload: FlushWorkerRuntimeStats } | { type: "error"; payload: string }; /** @@ -127,5 +138,6 @@ export interface FlushStats { totalEntriesFlushed: number; lastFlushAt: number | null; lastErrorAt: number | null; + lastError: string | null; } | null; } diff --git a/packages/taskflow/src/guard/backpressure.ts b/packages/taskflow/src/guard/backpressure.ts index 2d973120..50f623bc 100644 --- a/packages/taskflow/src/guard/backpressure.ts +++ b/packages/taskflow/src/guard/backpressure.ts @@ -150,6 +150,52 @@ export class Backpressure { } } + /** + * Accept a task with waiting for queue capacity + * Instead of rejecting when queue is full, waits until capacity is available + * Throw BackpressureError if timeout is reached + * @param task The task to accept + * @param isInDLQ Whether the task is in the DLQ + * @param timeoutMs Maximum time to wait for capacity (default: 30s) + */ + async acceptWithWait( + task: Task, + isInDLQ: boolean, + timeoutMs: number = 30_000, + ): Promise { + const startTime = Date.now(); + const pollIntervalMs = 50; + + while (Date.now() - startTime < timeoutMs) { + try { + this.accept(task, isInDLQ); + return; // task accepted successfully + } catch (error) { + if (error instanceof BackpressureError) { + // queue full or rate limited - wait and retry + await new Promise((resolve) => setTimeout(resolve, pollIntervalMs)); + continue; + } + // other errors (e.g., validation error for DLQ) - rethrow + throw error; + } + } + + // timeout - throw the backpressure error + this.trackRejection("queue_full", task); + throw new BackpressureError( + "Timeout waiting for queue capacity", + this.config.maxQueuedSize, + 0, + 1000, + { + taskId: task.id, + taskName: task.name, + waitedMs: Date.now() - startTime, + }, + ); + } + /** * Decrement queue size when a task acquires an execution slot */ diff --git a/packages/taskflow/src/guard/guard.ts b/packages/taskflow/src/guard/guard.ts index a5f583bf..589ea9fe 100644 --- a/packages/taskflow/src/guard/guard.ts +++ b/packages/taskflow/src/guard/guard.ts @@ -79,7 +79,7 @@ export class Guard { /** * Accept a task for processing - * Validates raate limits and queue capacity + * Validates rate limits and queue capacity * @throws {ValidationError} if task is in DLQ * @throws {BackpressureError} if limits exceeded */ @@ -87,6 +87,22 @@ export class Guard { this.backpressure.accept(task, this.dlq.has(task.idempotencyKey)); } + /** + * Accept a task for processing with waiting for capacity + * If queue is full or rate limited, waits until capacity is available + * @param task The task to accept + * @param timeoutMs Maximum time to wait for capacity (default: 30s) + * @throws {ValidationError} if task is in DLQ + * @throws {BackpressureError} if timeout reached while waiting + */ + async acceptTaskWithWait(task: Task, timeoutMs?: number): Promise { + await this.backpressure.acceptWithWait( + task, + this.dlq.has(task.idempotencyKey), + timeoutMs, + ); + } + /** * Acquire an execution slot for a task * Decrements queue size on success @@ -258,11 +274,12 @@ export class Guard { } /** - * Shutdown the guard (clears all state, stop timers) + * Shutdown the guard */ shutdown(): void { this.dlq.shutdown(); - this.clear(); + this.slotManager.clear(); + this.recoverySlotsInUse = 0; } /** diff --git a/packages/taskflow/src/guard/types.ts b/packages/taskflow/src/guard/types.ts index 65d2e832..4b204863 100644 --- a/packages/taskflow/src/guard/types.ts +++ b/packages/taskflow/src/guard/types.ts @@ -234,14 +234,14 @@ export interface GuardConfig { export const DEFAULT_GUARD_CONFIG: GuardConfig = { backpressure: { windowSizeMs: 60_000, // 1 minute - maxTasksPerWindow: 1000, - maxTasksPerUserWindow: 100, - maxQueuedSize: 500, + maxTasksPerWindow: 5000, + maxTasksPerUserWindow: 200, + maxQueuedSize: 1000, }, slots: { - maxExecutionGlobal: 50, + maxExecutionGlobal: 100, maxExecutionPerUser: 10, - slotTimeoutMs: 30_000, // 30 seconds + slotTimeoutMs: 60_000, // 60 seconds }, dlq: { maxSize: 1000, diff --git a/packages/taskflow/src/index.ts b/packages/taskflow/src/index.ts index 4bd4641d..b28ee50b 100644 --- a/packages/taskflow/src/index.ts +++ b/packages/taskflow/src/index.ts @@ -108,12 +108,14 @@ export { DEFAULT_EXECUTOR_CONFIG, DEFAULT_RECOVERY_CONFIG, DEFAULT_RETRY_CONFIG, + DEFAULT_SCHEDULER_CONFIG, DEFAULT_SHUTDOWN_CONFIG, type ExecutorConfig, type ExecutorStats, type RecoveryConfig, type RecoveryStats, type RetryConfig, + type SchedulerConfig, type ShutdownConfig, type ShutdownOptions, type TaskEventSubscriber, diff --git a/packages/taskflow/src/persistence/event-log.ts b/packages/taskflow/src/persistence/event-log.ts index 8c0c3ba6..a548a3d6 100644 --- a/packages/taskflow/src/persistence/event-log.ts +++ b/packages/taskflow/src/persistence/event-log.ts @@ -1,6 +1,8 @@ import { createHash } from "node:crypto"; +import { createReadStream } from "node:fs"; import fs from "node:fs/promises"; import path from "node:path"; +import { createInterface, type Interface } from "node:readline"; import { canonicalize } from "json-canonicalize"; import { EventLogError } from "@/core/errors"; import type { TaskStatus } from "@/core/types"; @@ -207,12 +209,222 @@ export class EventLog { */ async readEntriesFromCheckpoint( checkpoint: number, + limit: number = 1000, ): Promise { - const entries = await this.readEntries(this.config.eventLogPath); - return entries.filter((entry) => { - const eventLogPath = entry as EventLogEvent; - return eventLogPath.seq > checkpoint; - }); + const entries: EventLogEntry[] = []; + + const stream = this.createStreamReader(); + if (!stream) return entries; + + try { + // read entries from checkpoint position + for await (const line of stream.readline) { + if (!line.trim()) continue; + + // parse entry + try { + const entry = JSON.parse(line) as EventLogEvent; + + // skip entries already processed + if (entry.seq <= checkpoint) continue; + + entries.push(entry); + + // stop early once we have enough entries + if (entries.length >= limit) break; + } catch { + this.malformedEntriesSkipped++; + this.hooks?.incrementCounter( + TaskMetrics.EVENTLOG_MALFORMED_SKIPPED, + 1, + ); + } + } + } finally { + stream.close(); + } + + return entries; + } + + /** + * Read entries starting from a byte offset + */ + async readEntriesFromByteOffset( + byteOffset: number, + limit: number = 1000, + ): Promise<{ entries: EventLogEntry[]; newByteOffset: number }> { + const entries: EventLogEntry[] = []; + let currentOffset = byteOffset; + + // check if rotation happened + const rotationResult = await this.handleRotationIfNeeded(byteOffset, limit); + if (rotationResult) { + // rotation detected, read from rotated file first + entries.push(...rotationResult.entries); + if (entries.length >= limit) { + return { + entries: entries.slice(0, limit), + newByteOffset: rotationResult.newByteOffset, + }; + } + // continue reading from current file + currentOffset = 0; + limit -= entries.length; + } + + const stream = this.createStreamReaderFromOffset(currentOffset); + if (!stream) return { entries, newByteOffset: currentOffset }; + + try { + for await (const line of stream.readline) { + // compute line bytes + const lineBytes = Buffer.byteLength(line, "utf-8") + 1; + + if (!line.trim()) { + currentOffset += lineBytes; + continue; + } + + try { + const entry = JSON.parse(line) as EventLogEvent; + entries.push(entry); + currentOffset += lineBytes; + + // stop early once we have enough entries + if (entries.length >= limit) break; + } catch { + // malformed line, skip but still advance offset + currentOffset += lineBytes; + this.malformedEntriesSkipped++; + this.hooks?.incrementCounter( + TaskMetrics.EVENTLOG_MALFORMED_SKIPPED, + 1, + ); + } + } + } finally { + stream.close(); + } + + return { entries, newByteOffset: currentOffset }; + } + + /** + * Check if rotation happened and read remaining entries from rotated file + */ + private async handleRotationIfNeeded( + byteOffset: number, + limit: number, + ): Promise<{ entries: EventLogEntry[]; newByteOffset: number } | null> { + if (byteOffset === 0) return null; + + try { + const stats = await fs.stat(this.config.eventLogPath); + + // no rotation, return null + if (stats.size >= byteOffset) return null; + + // get rotated file path + const rotatedPath = `${this.config.eventLogPath}.1`; + + try { + await fs.access(rotatedPath); + } catch { + // log rotation was detected but rotated file not found + this.hooks?.log({ + severity: "warn", + message: + "Rotation detected but rotated file not found, resetting offset", + attributes: { byteOffset, currentFileSize: stats.size }, + }); + return { entries: [], newByteOffset: 0 }; + } + + // read entries from rotated file + const entries: EventLogEntry[] = []; + + const fileStream = createReadStream(rotatedPath, { + encoding: "utf-8", + start: byteOffset, + }); + + const rl = createInterface({ + input: fileStream, + crlfDelay: Infinity, + }); + + try { + for await (const line of rl) { + if (!line.trim()) continue; + + try { + const entry = JSON.parse(line) as EventLogEvent; + entries.push(entry); + if (entries.length >= limit) break; + } catch { + this.malformedEntriesSkipped++; + } + } + } finally { + rl.close(); + fileStream.destroy(); + } + + this.hooks?.log({ + severity: "info", + message: "Read entries from rotated file after rotation", + attributes: { entriesRead: entries.length, fromOffset: byteOffset }, + }); + + // return entries, signal to continue from offset 0 in current file + return { entries, newByteOffset: 0 }; + } catch { + return { entries: [], newByteOffset: 0 }; + } + } + + private createStreamReader(): { + readline: Interface; + close: () => void; + } | null { + return this.createStreamReaderFromOffset(0); + } + + private createStreamReaderFromOffset(byteOffset: number): { + readline: Interface; + close: () => void; + } | null { + try { + // check if file exists + const fileStream = createReadStream(this.config.eventLogPath, { + encoding: "utf-8", + start: byteOffset, + }); + + // handle file not found error + fileStream.on("error", (err) => { + if ((err as NodeJS.ErrnoException).code === "ENOENT") { + // file not found error, destroy stream + fileStream.destroy(); + } + }); + + const rl = createInterface({ + input: fileStream, + crlfDelay: Infinity, + }); + + return { + readline: rl, + close: () => { + rl.close(); + fileStream.destroy(); + }, + }; + } catch { + return null; + } } /** @@ -346,6 +558,31 @@ export class EventLog { }; } + /** + * Sync all buffered writes to disk + */ + async sync(): Promise { + // wait for ongoing rotation + await this.rotationLock; + + if (this.fileHandle) { + try { + await this.saveCheckpoint(); + await this.fileHandle.sync(); + } catch (error) { + // handle file closed error + if (error instanceof Error && error.message.includes("file closed")) { + this.hooks?.log({ + severity: "warn", + message: "File handle closed during sync, likely due to rotation", + }); + return; + } + throw error; + } + } + } + /** * Close the event log * @param deleteFiles - Whether to delete the log files diff --git a/packages/taskflow/src/persistence/repository/sqlite/connector.ts b/packages/taskflow/src/persistence/repository/sqlite/connector.ts index a6618aa3..654573c9 100644 --- a/packages/taskflow/src/persistence/repository/sqlite/connector.ts +++ b/packages/taskflow/src/persistence/repository/sqlite/connector.ts @@ -1,7 +1,8 @@ +import { createHash } from "node:crypto"; import fs from "node:fs"; import path from "node:path"; import { fileURLToPath } from "node:url"; -import Database from "better-sqlite3"; +import Database, { type Statement } from "better-sqlite3"; import type { IdempotencyKey, TaskId } from "@/core/branded"; import { RepositoryError } from "@/core/errors"; import type { TaskStatus } from "@/core/types"; @@ -28,6 +29,19 @@ const DEFAULT_RETRY_CONFIG = { maxDelayMs: 1000, }; +/** + * Cached prepared statements + */ +interface PreparedStatements { + insertTask: Statement; + updateTaskStart: Statement; + updateTaskComplete: Statement; + updateTaskError: Statement; + updateTaskCancelled: Statement; + updateTaskHeartbeat: Statement; + insertTaskEvent: Statement; +} + /** * SQLite Connector * @@ -38,6 +52,7 @@ export class SQLiteConnector { private db: Database.Database; private _isInitialized = false; private hooks: TaskSystemHooks; + private statements: PreparedStatements | null = null; constructor(config: SQLiteConfig, hooks: TaskSystemHooks = noopHooks) { this.db = new Database(config.database ?? "./.taskflow/sqlite.db"); @@ -50,14 +65,22 @@ export class SQLiteConnector { /** * Initialize the database - * Enables WAL mode and run migrations + * Enables WAL mode, runs migrations, and prepares statements */ async initialize(): Promise { // enable WAL mode for better performance this.db.pragma("journal_mode = WAL"); + // disable foreign key enforcement (consistency via event ordering) + this.db.pragma("foreign_keys = OFF"); + // optimize for concurrent reads + this.db.pragma("synchronous = NORMAL"); // run migrations await this.runMigrations(); + + // prepare all statements once for reuse + this.prepareStatements(); + this._isInitialized = true; this.hooks.log({ @@ -70,7 +93,38 @@ export class SQLiteConnector { } /** - * Execute a batch of events in a transaction + * Prepare all SQL statements for reuse + */ + private prepareStatements(): void { + this.statements = { + insertTask: this.db.prepare(` + INSERT OR IGNORE INTO tasks (task_id, name, status, type, idempotency_key, user_id, input_data, execution_options, created_at, last_heartbeat_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `), + updateTaskStart: this.db.prepare(` + UPDATE tasks SET status = ?, started_at = ?, last_heartbeat_at = ? WHERE task_id = ? + `), + updateTaskComplete: this.db.prepare(` + UPDATE tasks SET status = ?, completed_at = ?, result = ? WHERE task_id = ? + `), + updateTaskError: this.db.prepare(` + UPDATE tasks SET status = ?, completed_at = ?, error = ?, attempt = attempt + 1 WHERE task_id = ? + `), + updateTaskCancelled: this.db.prepare(` + UPDATE tasks SET status = ?, completed_at = ?, error = ? WHERE task_id = ? + `), + updateTaskHeartbeat: this.db.prepare(` + UPDATE tasks SET last_heartbeat_at = ? WHERE task_id = ? + `), + insertTaskEvent: this.db.prepare(` + INSERT OR IGNORE INTO task_events (entry_id, task_id, seq, type, timestamp, payload) + VALUES (?, ?, ?, ?, ?, ?) + `), + }; + } + + /** + * Execute a batch of events in a transaction using bulk operations */ async executeBatch(batch: EventLogEntry[]): Promise { if (batch.length === 0) return; @@ -79,9 +133,7 @@ export class SQLiteConnector { await this.withRetry(async () => { const transaction = this.db.transaction((entries: EventLogEntry[]) => { - for (const entry of entries) { - this.executeEntry(entry); - } + this.executeBulkOperations(entries); }); transaction(batch); }, "executeBatch"); @@ -98,6 +150,273 @@ export class SQLiteConnector { ); } + /** + * Execute bulk operations for a batch of entries + */ + private executeBulkOperations(entries: EventLogEntry[]): void { + // collect entries by type + const taskCreated: Array<{ entry: EventLogEntry; seq: number }> = []; + const taskStart: Array<{ entry: EventLogEntry; seq: number }> = []; + const taskComplete: Array<{ entry: EventLogEntry; seq: number }> = []; + const taskError: Array<{ entry: EventLogEntry; seq: number }> = []; + const taskCancelled: Array<{ entry: EventLogEntry; seq: number }> = []; + const taskProgress: Array<{ entry: EventLogEntry; seq: number }> = []; + const taskHeartbeat: EventLogEntry[] = []; + const taskCustom: Array<{ entry: EventLogEntry; seq: number }> = []; + + for (const entry of entries) { + const seq = (entry as EventLogEntry & { seq?: number }).seq ?? 0; + switch (entry.type) { + case "TASK_CREATED": + taskCreated.push({ entry, seq }); + break; + case "TASK_START": + taskStart.push({ entry, seq }); + break; + case "TASK_COMPLETE": + taskComplete.push({ entry, seq }); + break; + case "TASK_ERROR": + taskError.push({ entry, seq }); + break; + case "TASK_CANCELLED": + taskCancelled.push({ entry, seq }); + break; + case "TASK_PROGRESS": + taskProgress.push({ entry, seq }); + break; + case "TASK_HEARTBEAT": + taskHeartbeat.push(entry); + break; + case "TASK_CUSTOM": + taskCustom.push({ entry, seq }); + break; + } + } + + // execute bulk operations for each type + if (taskCreated.length > 0) this.bulkInsertTasks(taskCreated); + if (taskStart.length > 0) this.bulkUpdateTaskStart(taskStart); + if (taskComplete.length > 0) this.bulkUpdateTaskComplete(taskComplete); + if (taskError.length > 0) this.bulkUpdateTaskError(taskError); + if (taskCancelled.length > 0) this.bulkUpdateTaskCancelled(taskCancelled); + if (taskProgress.length > 0) this.bulkUpdateTaskProgress(taskProgress); + if (taskHeartbeat.length > 0) this.bulkUpdateHeartbeat(taskHeartbeat); + if (taskCustom.length > 0) this.bulkUpdateTaskCustom(taskCustom); + } + + /** + * Bulk insert tasks and their created events + */ + private bulkInsertTasks( + items: Array<{ entry: EventLogEntry; seq: number }>, + ): void { + for (const { entry, seq } of items) { + this.statements!.insertTask.run( + entry.taskId, + entry.name, + "created", + entry.taskType, + entry.idempotencyKey, + entry.userId ?? null, + entry.input ? JSON.stringify(entry.input) : null, + entry.executionOptions ? JSON.stringify(entry.executionOptions) : null, + new Date(entry.timestamp).toISOString(), + new Date(entry.timestamp).toISOString(), + ); + this.insertTaskEvent(entry.taskId, "TASK_CREATED", seq, entry.timestamp, { + name: entry.name, + taskType: entry.taskType, + idempotencyKey: entry.idempotencyKey, + userId: entry.userId, + input: entry.input, + }); + } + } + + /** + * Bulk update tasks to running status + */ + private bulkUpdateTaskStart( + items: Array<{ entry: EventLogEntry; seq: number }>, + ): void { + if (items.length === 0) return; + + const taskIds = items.map((i) => i.entry.taskId); + const timestamp = new Date().toISOString(); + const placeholders = taskIds.map(() => "?").join(","); + + this.db + .prepare(` + UPDATE tasks SET status = 'running', started_at = ?, last_heartbeat_at = ? + WHERE task_id IN (${placeholders}) + `) + .run(timestamp, timestamp, ...taskIds); + + // insert events + for (const { entry, seq } of items) { + this.insertTaskEvent(entry.taskId, "TASK_START", seq, entry.timestamp); + } + } + + /** + * Bulk update tasks to completed status + */ + private bulkUpdateTaskComplete( + items: Array<{ entry: EventLogEntry; seq: number }>, + ): void { + if (items.length === 0) return; + + // we need individual updates due to different results + for (const { entry, seq } of items) { + this.statements!.updateTaskComplete.run( + "completed", + new Date(entry.timestamp).toISOString(), + entry.result ? JSON.stringify(entry.result) : null, + entry.taskId, + ); + this.insertTaskEvent( + entry.taskId, + "TASK_COMPLETE", + seq, + entry.timestamp, + { + result: entry.result, + }, + ); + } + } + + /** + * Bulk update tasks to failed status + */ + private bulkUpdateTaskError( + items: Array<{ entry: EventLogEntry; seq: number }>, + ): void { + if (items.length === 0) return; + + // we need individual updates due to different error messages + for (const { entry, seq } of items) { + this.statements!.updateTaskError.run( + "failed", + new Date(entry.timestamp).toISOString(), + entry.error ?? null, + entry.taskId, + ); + this.insertTaskEvent(entry.taskId, "TASK_ERROR", seq, entry.timestamp, { + error: entry.error, + }); + } + } + + /** + * Bulk update tasks to cancelled status + */ + private bulkUpdateTaskCancelled( + items: Array<{ entry: EventLogEntry; seq: number }>, + ): void { + if (items.length === 0) return; + + const taskIds = items.map((i) => i.entry.taskId); + const timestamp = new Date().toISOString(); + const placeholders = taskIds.map(() => "?").join(","); + + this.db + .prepare(` + UPDATE tasks SET status = 'cancelled', completed_at = ? + WHERE task_id IN (${placeholders}) + `) + .run(timestamp, ...taskIds); + + for (const { entry, seq } of items) { + this.insertTaskEvent( + entry.taskId, + "TASK_CANCELLED", + seq, + entry.timestamp, + { + error: entry.error, + }, + ); + } + } + + /** + * Bulk update task progress (heartbeat + event) + */ + private bulkUpdateTaskProgress( + items: Array<{ entry: EventLogEntry; seq: number }>, + ): void { + if (items.length === 0) return; + + const taskIds = items.map((i) => i.entry.taskId); + const timestamp = new Date().toISOString(); + const placeholders = taskIds.map(() => "?").join(","); + + this.db + .prepare(` + UPDATE tasks SET last_heartbeat_at = ? + WHERE task_id IN (${placeholders}) + `) + .run(timestamp, ...taskIds); + + for (const { entry, seq } of items) { + this.insertTaskEvent( + entry.taskId, + "TASK_PROGRESS", + seq, + entry.timestamp, + { + ...entry.payload, + }, + ); + } + } + + /** + * Bulk update heartbeats (no event insertion) + */ + private bulkUpdateHeartbeat(entries: EventLogEntry[]): void { + if (entries.length === 0) return; + + const taskIds = entries.map((e) => e.taskId); + const timestamp = new Date().toISOString(); + const placeholders = taskIds.map(() => "?").join(","); + + this.db + .prepare(` + UPDATE tasks SET last_heartbeat_at = ? + WHERE task_id IN (${placeholders}) + `) + .run(timestamp, ...taskIds); + } + + /** + * Bulk update custom events + */ + private bulkUpdateTaskCustom( + items: Array<{ entry: EventLogEntry; seq: number }>, + ): void { + if (items.length === 0) return; + + const taskIds = items.map((i) => i.entry.taskId); + const timestamp = new Date().toISOString(); + const placeholders = taskIds.map(() => "?").join(","); + + this.db + .prepare(` + UPDATE tasks SET last_heartbeat_at = ? + WHERE task_id IN (${placeholders}) + `) + .run(timestamp, ...taskIds); + + for (const { entry, seq } of items) { + this.insertTaskEvent(entry.taskId, "TASK_CUSTOM", seq, entry.timestamp, { + ...entry.payload, + }); + } + } + /** * Close the database connection */ @@ -243,176 +562,27 @@ export class SQLiteConnector { ); } - private executeEntry(entry: EventLogEntry): void { - switch (entry.type) { - case "TASK_CREATED": - this.executeTaskCreated(entry); - break; - case "TASK_START": - this.executeTaskStart(entry); - break; - case "TASK_COMPLETE": - this.executeTaskComplete(entry); - break; - case "TASK_ERROR": - this.executeTaskError(entry); - break; - case "TASK_PROGRESS": - this.executeTaskProgress(entry); - break; - case "TASK_CANCELLED": - this.executeTaskCancelled(entry); - break; - case "TASK_HEARTBEAT": - this.executeTaskHeartbeat(entry); - break; - case "TASK_CUSTOM": - this.executeTaskCustom(entry); - break; - default: - throw new Error(`Unsupported event type: ${entry.type}`); - } - } - - private executeTaskCreated(entry: EventLogEntry): void { - const stmt = this.db.prepare(` - INSERT INTO tasks (task_id, name, status, type, idempotency_key, user_id, input_data, execution_options, created_at, last_heartbeat_at) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - `); - stmt.run( - entry.taskId, - entry.name, - "created", - entry.taskType, - entry.idempotencyKey, - entry.userId ?? null, - entry.input ? JSON.stringify(entry.input) : null, - entry.executionOptions ? JSON.stringify(entry.executionOptions) : null, - new Date(entry.timestamp).toISOString(), - new Date(entry.timestamp).toISOString(), - ); - - this.insertTaskEvent(entry.taskId, "TASK_CREATED", entry.timestamp, { - name: entry.name, - taskType: entry.taskType, - idempotencyKey: entry.idempotencyKey, - userId: entry.userId, - input: entry.input, - }); - } - - private executeTaskStart(entry: EventLogEntry): void { - const stmt = this.db.prepare(` - UPDATE tasks SET status = ?, started_at = ?, last_heartbeat_at = ? WHERE task_id = ? - `); - stmt.run( - "running", - new Date(entry.timestamp).toISOString(), - new Date(entry.timestamp).toISOString(), - entry.taskId, - ); - - this.insertTaskEvent(entry.taskId, "TASK_START", entry.timestamp); - } - - private executeTaskComplete(entry: EventLogEntry): void { - const stmt = this.db.prepare(` - UPDATE tasks SET status = ?, completed_at = ?, result = ? WHERE task_id = ? - `); - stmt.run( - "completed", - new Date(entry.timestamp).toISOString(), - entry.result ? JSON.stringify(entry.result) : null, - entry.taskId, - ); - - this.insertTaskEvent(entry.taskId, "TASK_COMPLETE", entry.timestamp, { - result: entry.result, - }); - } - - private executeTaskError(entry: EventLogEntry): void { - const stmt = this.db.prepare(` - UPDATE tasks SET status = ?, completed_at = ?, error = ?, attempt = attempt + 1 WHERE task_id = ? - `); - stmt.run( - "failed", - new Date(entry.timestamp).toISOString(), - entry.error ?? null, - entry.taskId, - ); - - this.insertTaskEvent(entry.taskId, "TASK_ERROR", entry.timestamp, { - error: entry.error, - }); - } - - private executeTaskCancelled(entry: EventLogEntry): void { - const stmt = this.db.prepare(` - UPDATE tasks SET status = ?, completed_at = ?, error = ? WHERE task_id = ? - `); - stmt.run( - "cancelled", - new Date(entry.timestamp).toISOString(), - entry.error ?? null, - entry.taskId, - ); - - this.insertTaskEvent(entry.taskId, "TASK_CANCELLED", entry.timestamp, { - error: entry.error, - }); - } - - private executeTaskProgress(entry: EventLogEntry): void { - const stmt = this.db.prepare(` - UPDATE tasks SET last_heartbeat_at = ? WHERE task_id = ? - `); - stmt.run(new Date(entry.timestamp).toISOString(), entry.taskId); - - this.insertTaskEvent(entry.taskId, "TASK_PROGRESS", entry.timestamp, { - ...entry.payload, - }); - } - - private executeTaskHeartbeat(entry: EventLogEntry): void { - // only update heartbeat, do NOT insert into task_events - const stmt = this.db.prepare(` - UPDATE tasks SET last_heartbeat_at = ? WHERE task_id = ? - `); - stmt.run(new Date(entry.timestamp).toISOString(), entry.taskId); - } - - private executeTaskCustom(entry: EventLogEntry): void { - const stmt = this.db.prepare(` - UPDATE tasks SET last_heartbeat_at = ? WHERE task_id = ? - `); - stmt.run(new Date(entry.timestamp).toISOString(), entry.taskId); - - this.insertTaskEvent(entry.taskId, "TASK_CUSTOM", entry.timestamp, { - ...entry.payload, - }); - } - + /** + * Insert a task event with deterministic entry_id + * Uses the global event log seq for both entry_id hash and task_events.seq + * This avoids expensive SELECT MAX(seq) query + */ private insertTaskEvent( taskId: string, type: EventLogEntryType, + globalSeq: number, timestampMs: number, payload?: Record, ): void { - // get next sequence number for this task - const seqStmt = this.db.prepare(` - SELECT COALESCE(MAX(seq), 0) + 1 as nextSeq FROM task_events WHERE task_id = ? - `); - const { nextSeq } = seqStmt.get(taskId) as { nextSeq: number }; + const hash = createHash("sha256") + .update(`${taskId}:${globalSeq}`) + .digest("hex"); + const entryId = `${hash.slice(0, 8)}-${hash.slice(8, 12)}-${hash.slice(12, 16)}-${hash.slice(16, 20)}-${hash.slice(20, 32)}`; - const eventStmt = this.db.prepare(` - INSERT INTO task_events (entry_id, task_id, seq, type, timestamp, payload) - VALUES (?, ?, ?, ?, ?, ?) - `); - eventStmt.run( - crypto.randomUUID(), + this.statements!.insertTaskEvent.run( + entryId, taskId, - nextSeq, + globalSeq, type, new Date(timestampMs).toISOString(), payload ? JSON.stringify(payload) : null, From e9a74aee845abc62de1c7c2122ed341576d91b36 Mon Sep 17 00:00:00 2001 From: ditadi Date: Mon, 2 Feb 2026 22:33:09 +0000 Subject: [PATCH 12/13] docs(taskflow): add interactive demos --- packages/taskflow/demo/chaos-demo.ts | 868 +++++++++++++++++++ packages/taskflow/demo/interactive-demo.ts | 957 +++++++++++++++++++++ packages/taskflow/demo/showcase-demo.ts | 876 +++++++++++++++++++ packages/taskflow/demo/stress-test.ts | 439 ++++++++++ 4 files changed, 3140 insertions(+) create mode 100644 packages/taskflow/demo/chaos-demo.ts create mode 100644 packages/taskflow/demo/interactive-demo.ts create mode 100644 packages/taskflow/demo/showcase-demo.ts create mode 100644 packages/taskflow/demo/stress-test.ts diff --git a/packages/taskflow/demo/chaos-demo.ts b/packages/taskflow/demo/chaos-demo.ts new file mode 100644 index 00000000..fdf6d57d --- /dev/null +++ b/packages/taskflow/demo/chaos-demo.ts @@ -0,0 +1,868 @@ +/** + * TaskFlow Chaos & Reliability Demo + * + * Demonstrates TaskFlow's resilience features with realistic scenarios: + * + * SCENARIOS: + * 1. BASELINE - Normal operation, establish throughput baseline + * 2. BURST - Sudden spike → backpressure & rate limiting + * 3. FAILURES - Random failures → retry with exponential backoff + * 4. HANGING - Stuck tasks → stale detection & recovery + * 5. CHECKPOINT - Mid-task failures → smart recovery from checkpoint + * 6. DLQ - Poison pills → DLQ management & retry + * 7. DRAIN - Graceful shutdown with pending work + * + * FEATURES DEMONSTRATED: + * ✓ Backpressure (queue full, rate limits) + * ✓ Retry with exponential backoff + * ✓ Stale task detection & recovery + * ✓ Smart recovery from checkpoints + * ✓ Dead Letter Queue management + * ✓ Graceful degradation under load + * + * Environment variables: + * DEMO_SCENARIO_DURATION=8000 Duration per scenario in ms + * DEMO_MAX_CONCURRENT=15 Max concurrent executions + * + * Run with: npx tsx demo/chaos-demo.ts + */ + +import { + TaskSystem, + userId, + idempotencyKey, + type TaskSystemStats, + type TaskHandlerContext, + type RecoveryContext +} from '../src/index.js'; + +// ═══════════════════════════════════════════════════════════════════════════════ +// ANSI Terminal Utilities +// ═══════════════════════════════════════════════════════════════════════════════ + +const ANSI = { + reset: '\x1b[0m', + bold: '\x1b[1m', + dim: '\x1b[2m', + blink: '\x1b[5m', + + black: '\x1b[30m', + red: '\x1b[31m', + green: '\x1b[32m', + yellow: '\x1b[33m', + blue: '\x1b[34m', + magenta: '\x1b[35m', + cyan: '\x1b[36m', + white: '\x1b[37m', + + bgRed: '\x1b[41m', + bgGreen: '\x1b[42m', + bgYellow: '\x1b[43m', + bgBlue: '\x1b[44m', + bgMagenta: '\x1b[45m', + + clearScreen: '\x1b[2J', + cursorHome: '\x1b[H', + hideCursor: '\x1b[?25l', + showCursor: '\x1b[?25h', +}; + +function color(text: string, ...codes: string[]): string { + return codes.join('') + text + ANSI.reset; +} + +function progressBar(value: number, max: number, width: number = 20): string { + const pct = Math.min(1, Math.max(0, max > 0 ? value / max : 0)); + const filled = Math.round(pct * width); + const empty = width - filled; + + let barColor = ANSI.green; + if (pct > 0.9) barColor = ANSI.red; + else if (pct > 0.7) barColor = ANSI.yellow; + + return `${barColor}${'█'.repeat(filled)}${ANSI.dim}${'░'.repeat(empty)}${ANSI.reset}`; +} + +function sparkline(values: number[], width: number = 20): string { + const chars = ['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█']; + const recent = values.slice(-width); + if (recent.length === 0) return color('─'.repeat(width), ANSI.dim); + + const max = Math.max(...recent, 1); + return recent.map(v => { + const idx = Math.floor((v / max) * (chars.length - 1)); + return color(chars[idx], ANSI.cyan); + }).join(''); +} + +function formatDuration(ms: number): string { + if (ms >= 60000) return (ms / 60000).toFixed(1) + 'm'; + if (ms >= 1000) return (ms / 1000).toFixed(1) + 's'; + return ms.toFixed(0) + 'ms'; +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// Scenario Types +// ═══════════════════════════════════════════════════════════════════════════════ + +type Scenario = 'baseline' | 'burst' | 'failures' | 'hanging' | 'checkpoint' | 'dlq' | 'drain'; + +interface ScenarioConfig { + name: string; + icon: string; + color: string; + description: string; + tasksToSubmit: number; + submissionDelay: { min: number; max: number }; +} + +const SCENARIOS: Record = { + baseline: { + name: 'BASELINE', + icon: '📊', + color: ANSI.green, + description: 'Normal operation - establishing throughput baseline', + tasksToSubmit: 40, + submissionDelay: { min: 30, max: 60 }, + }, + burst: { + name: 'BURST', + icon: '🌊', + color: ANSI.blue, + description: 'Sudden traffic spike - testing backpressure & queue limits', + tasksToSubmit: 200, // Many tasks + submissionDelay: { min: 0, max: 5 }, // Almost instant + }, + failures: { + name: 'FAILURES', + icon: '💥', + color: ANSI.red, + description: 'Random failures - testing retry with exponential backoff', + tasksToSubmit: 50, + submissionDelay: { min: 20, max: 40 }, + }, + hanging: { + name: 'SLOW', + icon: '🐢', + color: ANSI.yellow, + description: 'Slow tasks - testing queue buildup & slot exhaustion', + tasksToSubmit: 60, + submissionDelay: { min: 10, max: 20 }, + }, + checkpoint: { + name: 'MIXED', + icon: '🎲', + color: ANSI.magenta, + description: 'Mixed workload - failures during processing with checkpoints', + tasksToSubmit: 40, + submissionDelay: { min: 30, max: 50 }, + }, + dlq: { + name: 'POISON', + icon: '☠️', + color: ANSI.red, + description: 'Poison pills - tasks that always fail, exhaust retries', + tasksToSubmit: 20, + submissionDelay: { min: 50, max: 80 }, + }, + drain: { + name: 'DRAIN', + icon: '🚰', + color: ANSI.cyan, + description: 'Graceful shutdown - draining remaining tasks', + tasksToSubmit: 0, + submissionDelay: { min: 0, max: 0 }, + }, +}; + +interface DemoMetrics { + startTime: number; + currentScenario: Scenario; + scenarioStartTime: number; + scenarioDurationMs: number; + + // Task counts + tasksSubmitted: number; + tasksRejected: number; + + // Scenario-specific events + events: { + backpressureRejections: number; + retriesAttempted: number; + retriesSucceeded: number; + retriesExhausted: number; + staleTasks: number; + recoveredTasks: number; + checkpointRecoveries: number; + dlqAdded: number; + dlqRetried: number; + dlqExpired: number; + }; + + // Time series + throughputHistory: number[]; + failureHistory: number[]; + queueHistory: number[]; + recoveryHistory: number[]; + + // Per-second counters + currentSecond: number; + currentSecondSubmissions: number; + currentSecondFailures: number; + currentSecondRecoveries: number; + + // Peaks + peakQueued: number; + peakExecuting: number; + peakDLQ: number; +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// Dashboard Renderer +// ═══════════════════════════════════════════════════════════════════════════════ + +function renderDashboard(stats: TaskSystemStats, metrics: DemoMetrics): string { + const lines: string[] = []; + const elapsed = Date.now() - metrics.startTime; + const scenarioElapsed = Date.now() - metrics.scenarioStartTime; + const scenarioProgress = Math.min(1, scenarioElapsed / metrics.scenarioDurationMs); + const scenario = SCENARIOS[metrics.currentScenario]; + + // Header + lines.push(''); + lines.push(color('╔══════════════════════════════════════════════════════════════════════════════╗', ANSI.cyan, ANSI.bold)); + lines.push(color('║', ANSI.cyan) + color(' 🎭 TASKFLOW RELIABILITY DEMO 🎭 ', ANSI.yellow, ANSI.bold) + color('║', ANSI.cyan)); + lines.push(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Current Scenario + const scenarioBar = progressBar(scenarioProgress, 1, 15); + const pct = (scenarioProgress * 100).toFixed(0); + lines.push(color('║', ANSI.cyan) + ` ${scenario.icon} ${color(scenario.name, scenario.color, ANSI.bold)} ${scenarioBar} ${pct}% Elapsed: ${color(formatDuration(elapsed), ANSI.white)}`.padEnd(95) + color('║', ANSI.cyan)); + lines.push(color('║', ANSI.cyan) + ` ${color(scenario.description, ANSI.dim)}`.padEnd(87) + color('║', ANSI.cyan)); + + // System Status + const statusColor = stats.system.status === 'running' ? ANSI.green : + stats.system.status === 'degraded' ? ANSI.yellow : ANSI.red; + lines.push(color('║', ANSI.cyan) + ` System: ${color(stats.system.status.toUpperCase(), statusColor, ANSI.bold)}`.padEnd(87) + color('║', ANSI.cyan)); + + lines.push(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Live Metrics Row + const guard = stats.components.guard; + const executor = stats.components.executor; + + lines.push(color('║', ANSI.cyan) + color(' 📈 LIVE METRICS', ANSI.magenta, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + + // Queue and execution + const queueBar = progressBar(stats.tasks.queued, 75, 12); + const execBar = progressBar(stats.tasks.executing, 15, 12); + lines.push(color('║', ANSI.cyan) + ` Queue: ${queueBar} ${color(stats.tasks.queued.toString().padStart(3), ANSI.yellow)} Exec: ${execBar} ${color(stats.tasks.executing.toString().padStart(2), ANSI.cyan)}/15 In-Flight: ${color(stats.tasks.inFlight.toString(), ANSI.white)}`.padEnd(96) + color('║', ANSI.cyan)); + + // Completed/Failed + const successRate = (stats.tasks.successRate ?? 0) * 100; + const rateColor = successRate >= 95 ? ANSI.green : successRate >= 80 ? ANSI.yellow : ANSI.red; + lines.push(color('║', ANSI.cyan) + ` Done: ${color(stats.tasks.totalCompleted.toString(), ANSI.green)} Failed: ${color(stats.tasks.totalFailed.toString(), ANSI.red)} Rate: ${color(successRate.toFixed(0) + '%', rateColor)} Submitted: ${metrics.tasksSubmitted}`.padEnd(96) + color('║', ANSI.cyan)); + + // Sparklines + lines.push(color('║', ANSI.cyan) + ` Throughput: ${sparkline(metrics.throughputHistory, 18)} Queue: ${sparkline(metrics.queueHistory, 18)}`.padEnd(96) + color('║', ANSI.cyan)); + + lines.push(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Feature Demonstration Status + lines.push(color('║', ANSI.cyan) + color(' 🎯 FEATURES DEMONSTRATED', ANSI.magenta, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + + // Row 1: Backpressure & Retries + const bpIcon = metrics.events.backpressureRejections > 0 ? '✓' : '○'; + const bpColor = metrics.events.backpressureRejections > 0 ? ANSI.green : ANSI.dim; + const retryIcon = metrics.events.retriesAttempted > 0 ? '✓' : '○'; + const retryColor = metrics.events.retriesAttempted > 0 ? ANSI.green : ANSI.dim; + + lines.push(color('║', ANSI.cyan) + ` ${color(bpIcon, bpColor)} Backpressure: ${color(metrics.events.backpressureRejections.toString(), ANSI.yellow)} rejected ${color(retryIcon, retryColor)} Retries: ${color(metrics.events.retriesAttempted.toString(), ANSI.yellow)}→${color(metrics.events.retriesSucceeded.toString(), ANSI.green)}→${color(metrics.events.retriesExhausted.toString(), ANSI.red)}`.padEnd(96) + color('║', ANSI.cyan)); + + // Row 2: Recovery & DLQ + const recoveryIcon = metrics.events.recoveredTasks > 0 ? '✓' : '○'; + const recoveryColor = metrics.events.recoveredTasks > 0 ? ANSI.green : ANSI.dim; + const dlqIcon = metrics.events.dlqAdded > 0 ? '✓' : '○'; + const dlqColor = metrics.events.dlqAdded > 0 ? ANSI.green : ANSI.dim; + + lines.push(color('║', ANSI.cyan) + ` ${color(recoveryIcon, recoveryColor)} Recovery: ${color(metrics.events.staleTasks.toString(), ANSI.yellow)} stale → ${color(metrics.events.recoveredTasks.toString(), ANSI.green)} recovered ${color(dlqIcon, dlqColor)} DLQ: ${color(metrics.events.dlqAdded.toString(), ANSI.red)}→${color(metrics.events.dlqRetried.toString(), ANSI.cyan)}`.padEnd(96) + color('║', ANSI.cyan)); + + // Row 3: Checkpoints + const cpIcon = metrics.events.checkpointRecoveries > 0 ? '✓' : '○'; + const cpColor = metrics.events.checkpointRecoveries > 0 ? ANSI.green : ANSI.dim; + lines.push(color('║', ANSI.cyan) + ` ${color(cpIcon, cpColor)} Checkpoint Recovery: ${color(metrics.events.checkpointRecoveries.toString(), ANSI.green)} resumed from checkpoint`.padEnd(87) + color('║', ANSI.cyan)); + + lines.push(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Guard System Details + lines.push(color('║', ANSI.cyan) + color(' 🛡️ GUARD SYSTEM', ANSI.magenta, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + + const admission = guard.admission; + const rejections = admission.rejections.byReason; + lines.push(color('║', ANSI.cyan) + ` Window: ${admission.window.accepted}/${admission.config.maxTasksPerWindow} Rejected: Global:${color((rejections.global_rate_limit || 0).toString(), ANSI.red)} User:${color((rejections.user_rate_limit || 0).toString(), ANSI.red)} Queue:${color((rejections.queue_full || 0).toString(), ANSI.red)}`.padEnd(96) + color('║', ANSI.cyan)); + + // DLQ Status + const dlq = guard.dlq; + const dlqBar = progressBar(dlq.size, 20, 10); + lines.push(color('║', ANSI.cyan) + ` DLQ: ${dlqBar} ${color(dlq.size.toString(), dlq.size > 0 ? ANSI.red : ANSI.green)} entries Age: ${dlq.avgAgeMs > 0 ? formatDuration(dlq.avgAgeMs) : '-'}`.padEnd(96) + color('║', ANSI.cyan)); + + lines.push(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Recovery System + const recovery = stats.components.recovery; + lines.push(color('║', ANSI.cyan) + color(' 🔄 RECOVERY SYSTEM', ANSI.magenta, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + lines.push(color('║', ANSI.cyan) + ` Scanning: ${recovery.background.isScanning ? color('YES', ANSI.yellow) : color('no', ANSI.dim)} Recovered: ${color(recovery.outcomes.background.toString(), ANSI.green)} bg / ${color(recovery.outcomes.user.toString(), ANSI.cyan)} user Failed: ${color(recovery.outcomes.failed.toString(), ANSI.red)}`.padEnd(96) + color('║', ANSI.cyan)); + + if (recovery.outcomes.byMethod.smartRecovery > 0 || recovery.outcomes.byMethod.reexecution > 0) { + lines.push(color('║', ANSI.cyan) + ` Method: Smart:${color(recovery.outcomes.byMethod.smartRecovery.toString(), ANSI.green)} Re-exec:${color(recovery.outcomes.byMethod.reexecution.toString(), ANSI.yellow)}`.padEnd(87) + color('║', ANSI.cyan)); + } + + lines.push(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Executor + lines.push(color('║', ANSI.cyan) + color(' ⚡ EXECUTOR', ANSI.magenta, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + lines.push(color('║', ANSI.cyan) + ` Running: ${executor.current.executing} Heartbeats: ${executor.current.heartbeatsActive} Retries: ${executor.retries.attempted}→${executor.retries.succeeded}→${executor.retries.exhausted}`.padEnd(96) + color('║', ANSI.cyan)); + + lines.push(color('╚══════════════════════════════════════════════════════════════════════════════╝', ANSI.cyan, ANSI.bold)); + lines.push(''); + lines.push(color(' Press Ctrl+C to stop', ANSI.dim)); + + return lines.join('\n'); +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// Configuration +// ═══════════════════════════════════════════════════════════════════════════════ + +interface DemoConfig { + scenarioDurationMs: number; + maxConcurrent: number; +} + +function loadConfig(): DemoConfig { + return { + scenarioDurationMs: parseInt(process.env.DEMO_SCENARIO_DURATION ?? '8000', 10), + maxConcurrent: parseInt(process.env.DEMO_MAX_CONCURRENT ?? '15', 10), + }; +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// Main Demo +// ═══════════════════════════════════════════════════════════════════════════════ + +async function main() { + const config = loadConfig(); + + console.log(ANSI.clearScreen + ANSI.cursorHome + ANSI.hideCursor); + console.log(color('\n Initializing TaskFlow Reliability Demo...', ANSI.cyan, ANSI.bold)); + console.log(color(` Scenarios: BASELINE → BURST → FAILURES → HANGING → CHECKPOINT → DLQ → DRAIN`, ANSI.dim)); + console.log(color(` Concurrency: ${config.maxConcurrent} slots Scenario duration: ${config.scenarioDurationMs}ms`, ANSI.dim)); + + const demoDir = './.taskflow-chaos-demo'; + const { mkdir, rm } = await import('node:fs/promises'); + await rm(demoDir, { recursive: true, force: true }); + await mkdir(demoDir, { recursive: true }); + + // Initialize with tight constraints to trigger resilience features + const taskSystem = new TaskSystem({ + repository: { + type: 'sqlite', + database: `${demoDir}/demo.db` + }, + eventLog: { + eventLogPath: `${demoDir}/event.log`, + maxSizeBytesPerFile: 5_242_880, + maxAgePerFile: 1_800_000, + retentionCount: 2 + }, + guard: { + backpressure: { + windowSizeMs: 30_000, + maxTasksPerWindow: 500, + maxTasksPerUserWindow: 50, + maxQueuedSize: 75 // Small queue to trigger backpressure + }, + slots: { + maxExecutionGlobal: config.maxConcurrent, + maxExecutionPerUser: 5, + slotTimeoutMs: 30_000 + }, + dlq: { + maxSize: 50, + ttlMs: 30_000, // 30 second TTL for demo + cleanupIntervalMs: 5_000, + maxRetries: 2 + }, + recovery: { + maxRecoverySlots: 5, + recoverySlotTimeoutMs: 30_000 + } + }, + executor: { + heartbeatIntervalMs: 2_000, // Fast heartbeats + retry: { + maxAttempts: 3, + initialDelayMs: 100, + maxDelayMs: 1_000, + backoffMultiplier: 2 + } + }, + flush: { + eventLogPath: `${demoDir}/event.log`, + }, + recovery: { + enabled: true, + backgroundPollIntervalMs: 3_000, // Check every 3 seconds + staleThresholdMs: 6_000, // 6 second stale threshold (short for demo) + batchSize: 10, + completionTimeoutMs: 15_000, + heartbeatIntervalMs: 2_000 + }, + shutdown: { + gracePeriodMs: 10_000, + pollIntervalMs: 100 + } + }); + + await taskSystem.initialize(); + console.log(color(' TaskFlow initialized!', ANSI.green, ANSI.bold)); + + // ═══════════════════════════════════════════════════════════════════════════════ + // Task Handlers for Different Scenarios + // ═══════════════════════════════════════════════════════════════════════════════ + + // Track which tasks should exhibit special behavior + const taskBehaviors = new Map(); + let taskCounter = 0; + + // Task with configurable behaviors + const demoTask = taskSystem.registerTask({ + name: 'demo-task', + description: 'Task with configurable behavior for demo scenarios', + type: 'user', + + handler: async function* (input: { id: number }, _ctx: TaskHandlerContext) { + const behavior = taskBehaviors.get(input.id) ?? 'normal'; + + // POISON: Always fails (exhausts all retries) + if (behavior === 'poison') { + yield { type: 'progress', message: 'Starting poison task...', payload: { step: 0 } }; + await sleep(50); + throw new Error(`POISON_PILL: Task ${input.id} always fails - exhausting retries`); + } + + // FAIL: High failure rate with retries + if (behavior === 'fail') { + yield { type: 'progress', message: 'Starting risky task...', payload: { step: 0 } }; + await sleep(100 + Math.random() * 100); + + if (Math.random() < 0.6) { // 60% fail on first attempt + throw new Error(`RANDOM_FAILURE: Task ${input.id} failed - will retry`); + } + + yield { type: 'progress', message: 'Task succeeded', payload: { step: 1 } }; + return { id: input.id, completed: true, behavior }; + } + + // SLOW: Takes longer to complete (fills queue) + if (behavior === 'slow') { + const steps = 5; + for (let step = 1; step <= steps; step++) { + await sleep(400 + Math.random() * 200); // 400-600ms per step = 2-3s total + yield { + type: 'progress', + message: `Slow step ${step}/${steps}`, + payload: { step, total: steps } + }; + } + return { id: input.id, steps, completed: true, behavior }; + } + + // CHECKPOINT_FAIL: Fail mid-way, checkpoints allow smart recovery + if (behavior === 'checkpoint_fail') { + for (let step = 1; step <= 5; step++) { + await sleep(150); + yield { + type: 'progress', + message: `Checkpoint step ${step}/5`, + payload: { step, total: 5, checkpoint: step } + }; + + // 50% chance to fail at step 2 or 3 + if ((step === 2 || step === 3) && Math.random() < 0.5) { + throw new Error(`MID_TASK_FAILURE: Task ${input.id} failed at checkpoint ${step}`); + } + } + return { id: input.id, completed: true, behavior }; + } + + // NORMAL: Quick completion + const steps = 2 + Math.floor(Math.random() * 2); + for (let step = 1; step <= steps; step++) { + await sleep(100 + Math.random() * 100); + yield { + type: 'progress', + message: `Step ${step}/${steps}`, + payload: { step, total: steps } + }; + } + + return { id: input.id, steps, completed: true, behavior }; + }, + + // Smart recovery - resume from checkpoint + recover: async function* (input: { id: number }, ctx: RecoveryContext) { + const lastProgress = ctx.previousEvents + .filter(e => e.type === 'progress') + .pop(); + + const lastStep = (lastProgress?.payload?.checkpoint as number) ?? 0; + + yield { + type: 'recovered', + message: `Recovering task ${input.id} from checkpoint ${lastStep}`, + payload: { reason: ctx.recoveryReason, fromStep: lastStep } + }; + + // Complete remaining steps from checkpoint + for (let step = lastStep + 1; step <= 5; step++) { + await sleep(100); + yield { + type: 'progress', + message: `Resumed: step ${step}/5`, + payload: { step, total: 5, checkpoint: step, recovered: true } + }; + } + + return { id: input.id, recovered: true, fromStep: lastStep }; + } + }); + + // ═══════════════════════════════════════════════════════════════════════════════ + // Metrics + // ═══════════════════════════════════════════════════════════════════════════════ + + const metrics: DemoMetrics = { + startTime: Date.now(), + currentScenario: 'baseline', + scenarioStartTime: Date.now(), + scenarioDurationMs: config.scenarioDurationMs, + + tasksSubmitted: 0, + tasksRejected: 0, + + events: { + backpressureRejections: 0, + retriesAttempted: 0, + retriesSucceeded: 0, + retriesExhausted: 0, + staleTasks: 0, + recoveredTasks: 0, + checkpointRecoveries: 0, + dlqAdded: 0, + dlqRetried: 0, + dlqExpired: 0, + }, + + throughputHistory: [], + failureHistory: [], + queueHistory: [], + recoveryHistory: [], + + currentSecond: Math.floor(Date.now() / 1000), + currentSecondSubmissions: 0, + currentSecondFailures: 0, + currentSecondRecoveries: 0, + + peakQueued: 0, + peakExecuting: 0, + peakDLQ: 0, + }; + + // ═══════════════════════════════════════════════════════════════════════════════ + // Tracking + // ═══════════════════════════════════════════════════════════════════════════════ + + let lastRetryAttempted = 0; + let lastRetrySucceeded = 0; + let lastRetryExhausted = 0; + let lastRecovered = 0; + let lastDlqAdded = 0; + let lastDlqRetried = 0; + + const renderInterval = setInterval(() => { + const stats = taskSystem.getStats(); + + // Track peaks + if (stats.tasks.queued > metrics.peakQueued) metrics.peakQueued = stats.tasks.queued; + if (stats.tasks.executing > metrics.peakExecuting) metrics.peakExecuting = stats.tasks.executing; + if (stats.components.guard.dlq.size > metrics.peakDLQ) metrics.peakDLQ = stats.components.guard.dlq.size; + + // Track retries + const executor = stats.components.executor; + if (executor.retries.attempted > lastRetryAttempted) { + metrics.events.retriesAttempted += executor.retries.attempted - lastRetryAttempted; + } + if (executor.retries.succeeded > lastRetrySucceeded) { + metrics.events.retriesSucceeded += executor.retries.succeeded - lastRetrySucceeded; + } + if (executor.retries.exhausted > lastRetryExhausted) { + metrics.events.retriesExhausted += executor.retries.exhausted - lastRetryExhausted; + } + lastRetryAttempted = executor.retries.attempted; + lastRetrySucceeded = executor.retries.succeeded; + lastRetryExhausted = executor.retries.exhausted; + + // Track recovery + const recovery = stats.components.recovery.outcomes; + if (recovery.background > lastRecovered) { + const delta = recovery.background - lastRecovered; + metrics.events.recoveredTasks += delta; + metrics.events.checkpointRecoveries += recovery.byMethod.smartRecovery - (lastRecovered > 0 ? recovery.byMethod.smartRecovery - delta : 0); + metrics.currentSecondRecoveries += delta; + } + lastRecovered = recovery.background; + + // Track DLQ + const dlq = stats.components.guard.dlq; + if (dlq.totalAdded > lastDlqAdded) { + metrics.events.dlqAdded += dlq.totalAdded - lastDlqAdded; + } + if (dlq.totalRetries > lastDlqRetried) { + metrics.events.dlqRetried += dlq.totalRetries - lastDlqRetried; + } + lastDlqAdded = dlq.totalAdded; + lastDlqRetried = dlq.totalRetries; + metrics.events.dlqExpired = dlq.totalExpired; + + // Track stale (estimated from recovery attempts) + metrics.events.staleTasks = recovery.background + recovery.failed; + + // Update time series + const currentSecond = Math.floor(Date.now() / 1000); + if (currentSecond !== metrics.currentSecond) { + metrics.throughputHistory.push(metrics.currentSecondSubmissions); + metrics.failureHistory.push(metrics.currentSecondFailures); + metrics.queueHistory.push(stats.tasks.queued); + metrics.recoveryHistory.push(metrics.currentSecondRecoveries); + + // Keep last 30 seconds + const maxLen = 30; + if (metrics.throughputHistory.length > maxLen) metrics.throughputHistory.shift(); + if (metrics.failureHistory.length > maxLen) metrics.failureHistory.shift(); + if (metrics.queueHistory.length > maxLen) metrics.queueHistory.shift(); + if (metrics.recoveryHistory.length > maxLen) metrics.recoveryHistory.shift(); + + metrics.currentSecondSubmissions = 0; + metrics.currentSecondFailures = 0; + metrics.currentSecondRecoveries = 0; + metrics.currentSecond = currentSecond; + } + + // Track backpressure rejections from guard stats + const guard = stats.components.guard; + const totalRejections = guard.admission.totals.rejected; + if (totalRejections > metrics.events.backpressureRejections) { + metrics.events.backpressureRejections = totalRejections; + } + + // Render + const dashboard = renderDashboard(stats, metrics); + process.stdout.write(ANSI.cursorHome + dashboard); + }, 100); + + // ═══════════════════════════════════════════════════════════════════════════════ + // Task Submission + // ═══════════════════════════════════════════════════════════════════════════════ + + const activeStreams = new Set>(); + + async function submitTask(behavior: 'normal' | 'fail' | 'slow' | 'checkpoint_fail' | 'poison') { + const id = taskCounter++; + taskBehaviors.set(id, behavior); + + const userIndex = Math.floor(Math.random() * 20); + const user = `user-${userIndex}`; + + try { + const task = await demoTask.run({ + input: { id }, + userId: userId(user), + idempotencyKey: idempotencyKey(`task-${id}-${Date.now()}`) + }); + + metrics.tasksSubmitted++; + metrics.currentSecondSubmissions++; + + // Stream events + const streamPromise = (async () => { + try { + if (task.stream) { + for await (const event of task.stream()) { + if (event.type === 'error') { + metrics.currentSecondFailures++; + } + } + } + } catch { + // Stream closed + } + })(); + + activeStreams.add(streamPromise); + streamPromise.finally(() => activeStreams.delete(streamPromise)); + + } catch { + metrics.tasksRejected++; + } + } + + // ═══════════════════════════════════════════════════════════════════════════════ + // Scenario Runner + // ═══════════════════════════════════════════════════════════════════════════════ + + let running = true; + + async function runScenario(scenario: Scenario): Promise { + metrics.currentScenario = scenario; + metrics.scenarioStartTime = Date.now(); + + const cfg = SCENARIOS[scenario]; + let tasksSubmitted = 0; + + while (running && Date.now() - metrics.scenarioStartTime < config.scenarioDurationMs) { + if (tasksSubmitted >= cfg.tasksToSubmit) { + await sleep(100); + continue; + } + + // Determine task behavior based on scenario + let behavior: 'normal' | 'fail' | 'slow' | 'checkpoint_fail' | 'poison' = 'normal'; + + switch (scenario) { + case 'baseline': + behavior = 'normal'; + break; + case 'burst': + behavior = Math.random() < 0.15 ? 'fail' : 'normal'; // Some failures under load + break; + case 'failures': + behavior = 'fail'; // High failure rate to trigger retries + break; + case 'hanging': // Now "SLOW" scenario + behavior = 'slow'; // Slow tasks to fill queue + break; + case 'checkpoint': + behavior = 'checkpoint_fail'; // Mixed checkpoint failures + break; + case 'dlq': + behavior = 'poison'; // Always fail → exhaust retries + break; + } + + await submitTask(behavior); + tasksSubmitted++; + + // Delay based on scenario + const delay = cfg.submissionDelay.min + + Math.random() * (cfg.submissionDelay.max - cfg.submissionDelay.min); + await sleep(delay); + } + } + + // ═══════════════════════════════════════════════════════════════════════════════ + // Shutdown + // ═══════════════════════════════════════════════════════════════════════════════ + + const shutdown = async () => { + running = false; + metrics.currentScenario = 'drain'; + metrics.scenarioStartTime = Date.now(); + + await Promise.race([ + Promise.all(activeStreams), + sleep(3000) + ]); + + await taskSystem.shutdown({ deleteFiles: true }); + clearInterval(renderInterval); + console.log(ANSI.showCursor); + + // Final Report + const stats = taskSystem.getStats(); + + console.log(''); + console.log(color('╔══════════════════════════════════════════════════════════════════════════════╗', ANSI.cyan, ANSI.bold)); + console.log(color('║', ANSI.cyan) + color(' 📊 FINAL RELIABILITY REPORT ', ANSI.green, ANSI.bold) + color('║', ANSI.cyan)); + console.log(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + const elapsed = Date.now() - metrics.startTime; + console.log(color('║', ANSI.cyan) + ` Duration: ${color(formatDuration(elapsed), ANSI.white, ANSI.bold)} Submitted: ${metrics.tasksSubmitted} Rejected: ${metrics.tasksRejected}`.padEnd(87) + color('║', ANSI.cyan)); + + const successRate = stats.tasks.totalCompleted + stats.tasks.totalFailed > 0 + ? (stats.tasks.totalCompleted / (stats.tasks.totalCompleted + stats.tasks.totalFailed) * 100) + : 0; + console.log(color('║', ANSI.cyan) + ` Completed: ${color(stats.tasks.totalCompleted.toString(), ANSI.green)} Failed: ${color(stats.tasks.totalFailed.toString(), ANSI.red)} Success: ${color(successRate.toFixed(1) + '%', successRate >= 80 ? ANSI.green : ANSI.yellow)}`.padEnd(96) + color('║', ANSI.cyan)); + + console.log(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + color(' FEATURES VERIFIED:', ANSI.yellow, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + + const check = (condition: boolean) => condition ? color('✓', ANSI.green) : color('○', ANSI.dim); + + console.log(color('║', ANSI.cyan) + ` ${check(metrics.events.backpressureRejections > 0)} Backpressure: ${metrics.events.backpressureRejections} tasks rejected under load`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` ${check(metrics.events.retriesSucceeded > 0)} Retry Success: ${metrics.events.retriesAttempted} attempts → ${metrics.events.retriesSucceeded} recovered`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` ${check(metrics.events.recoveredTasks > 0)} Stale Recovery: ${metrics.events.staleTasks} stale → ${metrics.events.recoveredTasks} recovered`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` ${check(metrics.events.checkpointRecoveries > 0)} Smart Recovery: ${metrics.events.checkpointRecoveries} resumed from checkpoint`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` ${check(metrics.events.dlqAdded > 0)} DLQ Management: ${metrics.events.dlqAdded} added → ${metrics.events.dlqRetried} retried`.padEnd(87) + color('║', ANSI.cyan)); + + console.log(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + color(' PEAKS:', ANSI.yellow, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Queue: ${metrics.peakQueued} Executing: ${metrics.peakExecuting} DLQ: ${metrics.peakDLQ}`.padEnd(87) + color('║', ANSI.cyan)); + + console.log(color('╚══════════════════════════════════════════════════════════════════════════════╝', ANSI.cyan, ANSI.bold)); + console.log(''); + + process.exit(0); + }; + + process.on('SIGINT', shutdown); + process.on('SIGTERM', shutdown); + + // ═══════════════════════════════════════════════════════════════════════════════ + // Run All Scenarios + // ═══════════════════════════════════════════════════════════════════════════════ + + const scenarioOrder: Scenario[] = ['baseline', 'burst', 'failures', 'hanging', 'checkpoint', 'dlq']; + + for (const scenario of scenarioOrder) { + if (!running) break; + await runScenario(scenario); + + // Small pause between scenarios + if (running) await sleep(500); + } + + // Wait for completion + console.log(color('\n All scenarios complete. Waiting for tasks to finish...', ANSI.dim)); + + const completionTimeout = 30_000; + const startWait = Date.now(); + + while (running && Date.now() - startWait < completionTimeout) { + const stats = taskSystem.getStats(); + if (stats.tasks.inFlight === 0) break; + await sleep(500); + } + + await shutdown(); +} + +function sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +main().catch(err => { + console.log(ANSI.showCursor); + console.error('Demo error:', err); + process.exit(1); +}); diff --git a/packages/taskflow/demo/interactive-demo.ts b/packages/taskflow/demo/interactive-demo.ts new file mode 100644 index 00000000..31a48b7d --- /dev/null +++ b/packages/taskflow/demo/interactive-demo.ts @@ -0,0 +1,957 @@ +/** + * TaskFlow Interactive Demo + * + * Demonstrates TaskFlow with simulated LLM workloads: + * - Default: 100 users × 20 tasks = 2000 total tasks + * - Default concurrency: 100 slots (matches TaskFlow defaults, configurable via DEMO_MAX_CONCURRENT) + * + * NOTE: This demo simulates LLM latency with `await sleep()` - it does NOT + * hit real backends. Real-world performance depends on your actual backend + * constraints (LLM rate limits, SQL warehouse capacity, etc). + * + * Environment variables: + * DEMO_USERS=100 Number of simulated users + * DEMO_TASKS_PER_USER=20 Tasks per user + * DEMO_MAX_CONCURRENT=100 Max concurrent task executions + * DEMO_CHUNKS_MIN=10 Min streaming chunks per task + * DEMO_CHUNKS_MAX=25 Max streaming chunks per task + * + * Run with: npx tsx demo/interactive-demo.ts + */ + +import { + TaskSystem, + userId, + idempotencyKey, + type TaskSystemStats, + type TaskHandlerContext, + type RecoveryContext +} from '../src/index.js'; + +// ═══════════════════════════════════════════════════════════════════════════════ +// ANSI Terminal Utilities +// ═══════════════════════════════════════════════════════════════════════════════ + +const ANSI = { + // Colors + reset: '\x1b[0m', + bold: '\x1b[1m', + dim: '\x1b[2m', + + // Foreground + black: '\x1b[30m', + red: '\x1b[31m', + green: '\x1b[32m', + yellow: '\x1b[33m', + blue: '\x1b[34m', + magenta: '\x1b[35m', + cyan: '\x1b[36m', + white: '\x1b[37m', + + // Background + bgBlack: '\x1b[40m', + bgRed: '\x1b[41m', + bgGreen: '\x1b[42m', + bgYellow: '\x1b[43m', + bgBlue: '\x1b[44m', + bgMagenta: '\x1b[45m', + bgCyan: '\x1b[46m', + bgWhite: '\x1b[47m', + + // Cursor + clearScreen: '\x1b[2J', + cursorHome: '\x1b[H', + hideCursor: '\x1b[?25l', + showCursor: '\x1b[?25h', + clearLine: '\x1b[2K', +}; + +function color(text: string, ...codes: string[]): string { + return codes.join('') + text + ANSI.reset; +} + +function progressBar(value: number, max: number, width: number = 30): string { + const pct = Math.min(1, Math.max(0, max > 0 ? value / max : 0)); + const filled = Math.round(pct * width); + const empty = width - filled; + + let barColor = ANSI.green; + if (pct > 0.8) barColor = ANSI.red; + else if (pct > 0.6) barColor = ANSI.yellow; + + return `${barColor}${'█'.repeat(filled)}${ANSI.dim}${'░'.repeat(empty)}${ANSI.reset}`; +} + +function sparkline(values: number[], width: number = 20): string { + const chars = ['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█']; + const recent = values.slice(-width); + if (recent.length === 0) return '─'.repeat(width); + + const max = Math.max(...recent, 1); + return recent.map(v => { + const idx = Math.floor((v / max) * (chars.length - 1)); + return color(chars[idx], ANSI.cyan); + }).join(''); +} + +function formatNumber(n: number): string { + if (n >= 1000000) return (n / 1000000).toFixed(1) + 'M'; + if (n >= 1000) return (n / 1000).toFixed(1) + 'K'; + return n.toString(); +} + +function formatDuration(ms: number): string { + if (ms >= 60000) return (ms / 60000).toFixed(1) + 'm'; + if (ms >= 1000) return (ms / 1000).toFixed(1) + 's'; + return ms.toFixed(0) + 'ms'; +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// Dashboard Renderer +// ═══════════════════════════════════════════════════════════════════════════════ + +interface DemoMetrics { + // Simulation state + totalTasks: number; + tasksSubmitted: number; + tasksRejected: number; + totalUsers: number; + startTime: number; + + // Per-user tracking + userTaskCounts: Map; + + // Time series for sparklines + throughputHistory: number[]; + completionHistory: number[]; + dlqHistory: number[]; + executingHistory: number[]; + queuedHistory: number[]; + flushThroughputHistory: number[]; + + // Current second counters + currentSecondSubmissions: number; + currentSecondCompletions: number; + lastSecond: number; + + // Peak values + peakQueued: number; + peakExecuting: number; + peakThroughput: number; + peakCompletionRate: number; + + // Flush tracking + lastFlushCount: number; + lastEntriesFlushed: number; + flushStartedAt: number | null; + flushCompletedAt: number | null; +} + +function renderDashboard(stats: TaskSystemStats, metrics: DemoMetrics): string { + const lines: string[] = []; + + const elapsed = Date.now() - metrics.startTime; + const throughput = elapsed > 0 ? (metrics.tasksSubmitted / (elapsed / 1000)).toFixed(1) : '0'; + const completionRate = elapsed > 0 + ? ((stats.tasks.totalCompleted + stats.tasks.totalFailed) / (elapsed / 1000)).toFixed(1) + : '0'; + + // Header + lines.push(''); + lines.push(color('╔══════════════════════════════════════════════════════════════════════════════╗', ANSI.cyan, ANSI.bold)); + lines.push(color('║', ANSI.cyan) + color(' 🚀 TASKFLOW INTERACTIVE DEMO 🚀 ', ANSI.yellow, ANSI.bold) + color('║', ANSI.cyan)); + lines.push(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // System Status + const statusColor = stats.system.status === 'running' ? ANSI.green : ANSI.yellow; + lines.push(color('║', ANSI.cyan) + ` System: ${color(stats.system.status.toUpperCase(), statusColor, ANSI.bold)} Uptime: ${color(formatDuration(stats.system.uptimeMs || 0), ANSI.white)} Templates: ${color(stats.registry.templates.toString(), ANSI.white)}`.padEnd(87) + color('║', ANSI.cyan)); + lines.push(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Progress Section + lines.push(color('║', ANSI.cyan) + color(' 📊 SIMULATION PROGRESS', ANSI.magenta, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + lines.push(color('║', ANSI.cyan) + ` Tasks: ${color(metrics.tasksSubmitted.toString(), ANSI.green)}/${color(metrics.totalTasks.toString(), ANSI.white)} submitted Waiting: ${color(stats.tasks.waiting.toString(), ANSI.yellow)} Users: ${color(metrics.totalUsers.toString(), ANSI.cyan)} Elapsed: ${color(formatDuration(elapsed), ANSI.yellow)}`.padEnd(87) + color('║', ANSI.cyan)); + + const submissionProgress = progressBar(metrics.tasksSubmitted, metrics.totalTasks, 50); + lines.push(color('║', ANSI.cyan) + ` Submissions: ${submissionProgress} ${((metrics.tasksSubmitted / metrics.totalTasks) * 100).toFixed(0)}%`.padEnd(87) + color('║', ANSI.cyan)); + + const completedTotal = stats.tasks.totalCompleted + stats.tasks.totalFailed + stats.tasks.totalCancelled; + const completionProgress = progressBar(completedTotal, metrics.tasksSubmitted, 50); + lines.push(color('║', ANSI.cyan) + ` Completions: ${completionProgress} ${metrics.tasksSubmitted > 0 ? ((completedTotal / metrics.tasksSubmitted) * 100).toFixed(0) : 0}%`.padEnd(87) + color('║', ANSI.cyan)); + + lines.push(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Task Metrics + lines.push(color('║', ANSI.cyan) + color(' 📈 TASK METRICS', ANSI.magenta, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + lines.push(color('║', ANSI.cyan) + ` ${color('Queued:', ANSI.dim)} ${color(stats.tasks.queued.toString().padStart(5), ANSI.yellow)} ${color('Waiting:', ANSI.dim)} ${color(stats.tasks.waiting.toString().padStart(5), ANSI.yellow)} ${color('Executing:', ANSI.dim)} ${color(stats.tasks.executing.toString().padStart(5), ANSI.cyan)} ${color('In-Flight:', ANSI.dim)} ${color(stats.tasks.inFlight.toString().padStart(5), ANSI.white)}`.padEnd(96) + color('║', ANSI.cyan)); + lines.push(color('║', ANSI.cyan) + ` ${color('Completed:', ANSI.dim)} ${color(stats.tasks.totalCompleted.toString().padStart(5), ANSI.green)} ${color('Failed:', ANSI.dim)} ${color(stats.tasks.totalFailed.toString().padStart(5), ANSI.red)} ${color('Cancelled:', ANSI.dim)} ${color(stats.tasks.totalCancelled.toString().padStart(5), ANSI.yellow)} ${color('Success:', ANSI.dim)} ${color(((stats.tasks.successRate || 0) * 100).toFixed(1) + '%', ANSI.green)}`.padEnd(96) + color('║', ANSI.cyan)); + + // Throughput sparklines + lines.push(color('║', ANSI.cyan) + ` Throughput/s: ${sparkline(metrics.throughputHistory, 40)} ${color(throughput + '/s', ANSI.cyan)}`.padEnd(96) + color('║', ANSI.cyan)); + lines.push(color('║', ANSI.cyan) + ` Completed/s: ${sparkline(metrics.completionHistory, 40)} ${color(completionRate + '/s', ANSI.green)}`.padEnd(96) + color('║', ANSI.cyan)); + + lines.push(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Guard System + const guard = stats.components.guard; + lines.push(color('║', ANSI.cyan) + color(' 🛡️ GUARD SYSTEM (Rate Limiting & Admission Control)', ANSI.magenta, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + + // Backpressure + const admission = guard.admission; + const windowPct = admission.config.maxTasksPerWindow > 0 + ? (admission.window.accepted / admission.config.maxTasksPerWindow * 100).toFixed(0) + : '0'; + lines.push(color('║', ANSI.cyan) + ` ${color('Backpressure:', ANSI.bold)} Window ${color(admission.window.accepted.toString(), ANSI.green)}/${color(admission.config.maxTasksPerWindow.toString(), ANSI.white)} (${windowPct}%) Rejected: ${color(admission.window.rejected.toString(), ANSI.red)}`.padEnd(96) + color('║', ANSI.cyan)); + + // Slots + const slots = guard.slots; + const slotsBar = progressBar(slots.current.inUse, slots.limits.global, 20); + lines.push(color('║', ANSI.cyan) + ` ${color('Exec Slots:', ANSI.bold)} ${slotsBar} ${color(slots.current.inUse.toString(), ANSI.cyan)}/${color(slots.limits.global.toString(), ANSI.white)} in use Waiting: ${color(slots.current.waiting.toString(), ANSI.yellow)}`.padEnd(96) + color('║', ANSI.cyan)); + + // DLQ + const dlq = guard.dlq; + const dlqBar = progressBar(dlq.size, 100, 20); + lines.push(color('║', ANSI.cyan) + ` ${color('DLQ:', ANSI.bold)} ${dlqBar} ${color(dlq.size.toString(), dlq.size > 0 ? ANSI.red : ANSI.green)} entries Retries: ${color(dlq.totalRetries.toString(), ANSI.yellow)} Expired: ${color(dlq.totalExpired.toString(), ANSI.dim)}`.padEnd(96) + color('║', ANSI.cyan)); + lines.push(color('║', ANSI.cyan) + ` DLQ Trend: ${sparkline(metrics.dlqHistory, 40)}`.padEnd(87) + color('║', ANSI.cyan)); + + lines.push(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Executor Stats + const executor = stats.components.executor; + lines.push(color('║', ANSI.cyan) + color(' ⚡ EXECUTOR', ANSI.magenta, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + lines.push(color('║', ANSI.cyan) + ` Executing: ${color(executor.current.executing.toString(), ANSI.cyan)} Heartbeats: ${color(executor.current.heartbeatsActive.toString(), ANSI.dim)} Handler Missing: ${color(executor.outcomes.handlerMissing.toString(), ANSI.red)}`.padEnd(96) + color('║', ANSI.cyan)); + lines.push(color('║', ANSI.cyan) + ` Retries: Attempted ${color(executor.retries.attempted.toString(), ANSI.yellow)} / Succeeded ${color(executor.retries.succeeded.toString(), ANSI.green)} / Exhausted ${color(executor.retries.exhausted.toString(), ANSI.red)}`.padEnd(96) + color('║', ANSI.cyan)); + + lines.push(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Stream Stats + const stream = stats.components.stream; + lines.push(color('║', ANSI.cyan) + color(' 📡 STREAMING', ANSI.magenta, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + lines.push(color('║', ANSI.cyan) + ` Active Streams: ${color(stream.streams.active.toString(), ANSI.cyan)} Listeners: ${color(stream.listeners.total.toString(), ANSI.white)} Events Pushed: ${color(formatNumber(stream.events.pushed), ANSI.green)}`.padEnd(96) + color('║', ANSI.cyan)); + lines.push(color('║', ANSI.cyan) + ` Buffer Events: ${color(stream.buffer.totalEvents.toString(), ANSI.yellow)} Overflows: ${color(stream.buffer.overflows.toString(), stream.buffer.overflows > 0 ? ANSI.red : ANSI.green)}`.padEnd(96) + color('║', ANSI.cyan)); + + lines.push(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // WAL (Event Log) Stats + const eventLog = stats.components.eventLog; + lines.push(color('║', ANSI.cyan) + color(' 📝 WAL (Event Log)', ANSI.magenta, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + lines.push(color('║', ANSI.cyan) + ` Current Seq: ${color(eventLog.sequence.current.toString(), ANSI.cyan)} Total Writes: ${color(formatNumber(eventLog.volume.entriesWritten), ANSI.green)} Rotations: ${color(eventLog.rotation.count.toString(), ANSI.yellow)}`.padEnd(96) + color('║', ANSI.cyan)); + + lines.push(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Flush Stats + const flush = stats.components.flush; + const walWrites = stats.components.eventLog.volume.entriesWritten; + lines.push(color('║', ANSI.cyan) + color(' 💾 FLUSH (Persistence)', ANSI.magenta, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + if (flush.worker) { + const flushPct = walWrites > 0 ? ((flush.worker.totalEntriesFlushed / walWrites) * 100).toFixed(0) : '0'; + const flushThroughput = metrics.flushThroughputHistory.length > 0 + ? metrics.flushThroughputHistory[metrics.flushThroughputHistory.length - 1] + : 0; + lines.push(color('║', ANSI.cyan) + ` Flushed: ${color(formatNumber(flush.worker.totalEntriesFlushed), ANSI.green)}/${color(formatNumber(walWrites), ANSI.white)} (${flushPct}%) Throughput: ${color(flushThroughput + '/s', ANSI.cyan)} Errors: ${color(flush.worker.errorCount.toString(), flush.worker.errorCount > 0 ? ANSI.red : ANSI.green)}`.padEnd(96) + color('║', ANSI.cyan)); + lines.push(color('║', ANSI.cyan) + ` Flush Rate: ${sparkline(metrics.flushThroughputHistory, 40)}`.padEnd(87) + color('║', ANSI.cyan)); + if (flush.worker.lastError) { + lines.push(color('║', ANSI.cyan) + ` ${color('Error:', ANSI.red)} ${color(flush.worker.lastError.substring(0, 60), ANSI.dim)}`.padEnd(96) + color('║', ANSI.cyan)); + } + } else { + lines.push(color('║', ANSI.cyan) + ` ${color('Worker not running (stats pending...)', ANSI.yellow)}`.padEnd(87) + color('║', ANSI.cyan)); + } + + lines.push(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // User Distribution (top 5) + lines.push(color('║', ANSI.cyan) + color(' 👥 USER DISTRIBUTION (Top 5)', ANSI.magenta, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + + const sortedUsers = Array.from(metrics.userTaskCounts.entries()) + .sort((a, b) => b[1].submitted - a[1].submitted) + .slice(0, 5); + + for (const [user, counts] of sortedUsers) { + const userBar = progressBar(counts.completed + counts.failed, counts.submitted, 15); + const userNum = user.replace('user-', '').padStart(2, '0'); + lines.push(color('║', ANSI.cyan) + ` User ${color(userNum, ANSI.cyan)}: ${userBar} S:${color(counts.submitted.toString().padStart(3), ANSI.white)} C:${color(counts.completed.toString().padStart(3), ANSI.green)} F:${color(counts.failed.toString().padStart(3), ANSI.red)}`.padEnd(96) + color('║', ANSI.cyan)); + } + + lines.push(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Rejection Reasons + const rejections = admission.rejections.byReason; + lines.push(color('║', ANSI.cyan) + color(' ⚠️ REJECTION BREAKDOWN', ANSI.magenta, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + lines.push(color('║', ANSI.cyan) + ` Global Rate: ${color((rejections.global_rate_limit || 0).toString(), ANSI.red)} User Rate: ${color((rejections.user_rate_limit || 0).toString(), ANSI.red)} Queue Full: ${color((rejections.queue_full || 0).toString(), ANSI.red)} In DLQ: ${color((rejections.in_dlq || 0).toString(), ANSI.yellow)}`.padEnd(96) + color('║', ANSI.cyan)); + + lines.push(color('╚══════════════════════════════════════════════════════════════════════════════╝', ANSI.cyan, ANSI.bold)); + lines.push(''); + + // Show different hint based on system status + if (stats.system.status === 'shutting_down') { + lines.push(color(' ⏳ Graceful shutdown in progress - flushing remaining events to database...', ANSI.yellow)); + } else { + lines.push(color(' Press Ctrl+C to stop the demo', ANSI.dim)); + } + + return lines.join('\n'); +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// Demo Configuration (Environment Variables) +// ═══════════════════════════════════════════════════════════════════════════════ + +interface DemoConfig { + /** Number of simulated users */ + users: number; + /** Tasks per user */ + tasksPerUser: number; + /** Streaming chunks (events) per task */ + chunksPerTask: { min: number; max: number }; + /** Maximum concurrent task executions */ + maxConcurrent: number; +} + +function loadConfig(): DemoConfig { + return { + users: parseInt(process.env.DEMO_USERS ?? '100', 10), + tasksPerUser: parseInt(process.env.DEMO_TASKS_PER_USER ?? '20', 10), + chunksPerTask: { + min: parseInt(process.env.DEMO_CHUNKS_MIN ?? '10', 10), + max: parseInt(process.env.DEMO_CHUNKS_MAX ?? '25', 10), + }, + maxConcurrent: parseInt(process.env.DEMO_MAX_CONCURRENT ?? '100', 10), + }; +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// Demo Application +// ═══════════════════════════════════════════════════════════════════════════════ + +async function main() { + // Load configuration from environment + const config = loadConfig(); + const totalTasks = config.users * config.tasksPerUser; + + console.log(ANSI.clearScreen + ANSI.cursorHome + ANSI.hideCursor); + console.log(color('\n Initializing TaskFlow...', ANSI.cyan, ANSI.bold)); + console.log(color(` Config: ${config.users} users × ${config.tasksPerUser} tasks = ${totalTasks} total`, ANSI.dim)); + console.log(color(` Concurrency: ${config.maxConcurrent} | Chunks/task: ${config.chunksPerTask.min}-${config.chunksPerTask.max}`, ANSI.dim)); + + // Demo data directory + const demoDir = './.taskflow-demo'; + + // Ensure demo directory exists + const { mkdir, rm } = await import('node:fs/promises'); + await rm(demoDir, { recursive: true, force: true }); + await mkdir(demoDir, { recursive: true }); + + // Initialize TaskSystem with demo configuration + const maxQueueSize = Math.max(5000, totalTasks); + + const taskSystem = new TaskSystem({ + repository: { + type: 'sqlite', + database: `${demoDir}/demo.db` + }, + eventLog: { + eventLogPath: `${demoDir}/event.log`, + maxSizeBytesPerFile: 10_485_760, + maxAgePerFile: 3_600_000, + retentionCount: 3 + }, + guard: { + backpressure: { + windowSizeMs: 60_000, + maxTasksPerWindow: Math.max(5000, totalTasks), + maxTasksPerUserWindow: Math.max(100, config.tasksPerUser * 2), + maxQueuedSize: maxQueueSize + }, + slots: { + maxExecutionGlobal: config.maxConcurrent, + maxExecutionPerUser: 10, // 10 per user + slotTimeoutMs: 60_000 // 60 second timeout + }, + dlq: { + maxSize: 200, + ttlMs: 60_000, // 1 minute for demo + cleanupIntervalMs: 5_000, + maxRetries: 2 + }, + recovery: { + maxRecoverySlots: 5, + recoverySlotTimeoutMs: 30_000 + } + }, + executor: { + heartbeatIntervalMs: 5_000, + retry: { + maxAttempts: 3, + initialDelayMs: 500, + maxDelayMs: 5_000, + backoffMultiplier: 2 + } + }, + flush: { + eventLogPath: `${demoDir}/event.log`, + // Use defaults for dynamic batch sizing (500-5000) and 100ms interval + }, + recovery: { + enabled: true, + backgroundPollIntervalMs: 10_000, + staleThresholdMs: 30_000, + batchSize: 5, + completionTimeoutMs: 30_000, + heartbeatIntervalMs: 5_000 + }, + shutdown: { + gracePeriodMs: 5_000, + pollIntervalMs: 100 + } + }); + + await taskSystem.initialize(); + console.log(color(' TaskFlow initialized!', ANSI.green, ANSI.bold)); + + // ═══════════════════════════════════════════════════════════════════════════════ + // OpenAI-like LLM Task Simulation + // ═══════════════════════════════════════════════════════════════════════════════ + + // Configuration matching realistic OpenAI behavior + const LLM_CONFIG = { + minLatencyMs: 800, // Minimum response latency + maxLatencyMs: 5000, // Maximum response latency + failureRate: 0.02, // 2% random failure rate + timeoutRate: 0.01, // 1% random timeout rate + chunksMin: config.chunksPerTask.min, // From env: DEMO_CHUNKS_MIN + chunksMax: config.chunksPerTask.max, // From env: DEMO_CHUNKS_MAX + tokensPerChunk: 15, // Approximate tokens per chunk + }; + + // Register the "LLM-Agent" task (simulates OpenAI chat completion) + const agentTask = taskSystem.registerTask({ + name: 'LLM-Agent', + description: 'Simulated LLM agent with realistic OpenAI-like latency, streaming, and failure rates', + type: 'user', + + handler: async function* (input: { workId: number; prompt: string; complexity: number }, _context: TaskHandlerContext) { + const { workId, prompt, complexity } = input; + + // Simulate random failures (2% rate) - happens before streaming starts + if (Math.random() < LLM_CONFIG.failureRate) { + throw new Error(`LLM service error: Rate limit exceeded or service unavailable (work #${workId})`); + } + + // Simulate random timeouts (1% rate) + if (Math.random() < LLM_CONFIG.timeoutRate) { + throw new Error(`Request timeout: LLM service took too long to respond (work #${workId})`); + } + + // Calculate realistic latency based on input complexity + // Latency = minLatency + (complexity/maxComplexity) * (maxLatency - minLatency) + random jitter + const inputLength = prompt.length; + const normalizedComplexity = Math.min(inputLength, 2000) / 2000; + const variableLatency = normalizedComplexity * (LLM_CONFIG.maxLatencyMs - LLM_CONFIG.minLatencyMs); + const totalLatency = LLM_CONFIG.minLatencyMs + variableLatency + (Math.random() * 1000 * complexity); + + // Determine number of streaming chunks + const numChunks = LLM_CONFIG.chunksMin + Math.floor(Math.random() * (LLM_CONFIG.chunksMax - LLM_CONFIG.chunksMin)); + const chunkDelay = totalLatency / numChunks; + + // Initial role chunk (like OpenAI's first chunk) + yield { + type: 'progress', + message: 'LLM started', + payload: { + role: 'assistant', + chunkIndex: 0, + totalChunks: numChunks, + tokensGenerated: 0, + progress: 0 + } + }; + + // Intro chunks + const introChunks = [ + "I understand your question.", + " Let me think about this step by step.", + " " + ]; + + for (let i = 0; i < introChunks.length; i++) { + await sleep(chunkDelay); + yield { + type: 'progress', + message: introChunks[i], + payload: { + content: introChunks[i], + chunkIndex: i + 1, + totalChunks: numChunks, + tokensGenerated: (i + 1) * LLM_CONFIG.tokensPerChunk, + progress: Math.min(95, ((i + 1) / numChunks) * 100) + } + }; + } + + // Content chunks (main response) + const remainingChunks = numChunks - introChunks.length - 1; // -1 for final chunk + for (let i = 0; i < remainingChunks; i++) { + await sleep(chunkDelay); + + // Simulate small chance of mid-stream failure (0.5%) + if (Math.random() < 0.005) { + throw new Error(`LLM stream interrupted: Connection reset (work #${workId})`); + } + + const chunkIndex = introChunks.length + i + 1; + yield { + type: 'progress', + message: `Generating response...`, + payload: { + content: `[chunk ${i + 1}/${remainingChunks}]`, + chunkIndex, + totalChunks: numChunks, + tokensGenerated: chunkIndex * LLM_CONFIG.tokensPerChunk, + progress: Math.min(95, (chunkIndex / numChunks) * 100) + } + }; + } + + // Final chunk (completion) + await sleep(chunkDelay); + const totalTokens = numChunks * LLM_CONFIG.tokensPerChunk; + + yield { + type: 'progress', + message: 'Generation complete', + payload: { + finishReason: 'stop', + chunkIndex: numChunks, + totalChunks: numChunks, + tokensGenerated: totalTokens, + progress: 100 + } + }; + + return { + workId, + model: 'gpt-4-mock', + usage: { + promptTokens: Math.floor(prompt.length / 4), + completionTokens: totalTokens, + totalTokens: Math.floor(prompt.length / 4) + totalTokens + }, + finishReason: 'stop', + latencyMs: totalLatency, + processedAt: Date.now() + }; + }, + + recover: async function* (input: { workId: number; prompt: string; complexity: number }, ctx: RecoveryContext) { + // Check last progress to determine recovery point + const lastProgress = ctx.previousEvents + .filter((e: { type: string }) => e.type === 'progress') + .pop(); + + const lastChunk = (lastProgress?.payload?.chunkIndex as number) ?? 0; + const totalChunks = (lastProgress?.payload?.totalChunks as number) ?? 20; + + if (lastProgress?.payload?.finishReason === 'stop') { + yield { type: 'recovered', message: 'Already completed, returning cached result' }; + return { workId: input.workId, result: 'Recovered from completion', processedAt: Date.now() }; + } + + yield { type: 'recovered', message: `Resuming from chunk ${lastChunk}/${totalChunks}` }; + + // Complete remaining chunks + const remainingChunks = totalChunks - lastChunk; + for (let i = 0; i < remainingChunks; i++) { + await sleep(100 + Math.random() * 200); + yield { + type: 'progress', + message: 'Recovered chunk', + payload: { + chunkIndex: lastChunk + i + 1, + totalChunks, + tokensGenerated: (lastChunk + i + 1) * 15, + progress: Math.min(100, ((lastChunk + i + 1) / totalChunks) * 100) + } + }; + } + + return { workId: input.workId, result: 'Recovered', processedAt: Date.now() }; + } + }); + + // Demo metrics + const metrics: DemoMetrics = { + totalTasks, + tasksSubmitted: 0, + tasksRejected: 0, + totalUsers: config.users, + startTime: Date.now(), + userTaskCounts: new Map(), + throughputHistory: [], + completionHistory: [], + dlqHistory: [], + executingHistory: [], + queuedHistory: [], + flushThroughputHistory: [], + currentSecondSubmissions: 0, + currentSecondCompletions: 0, + lastSecond: Math.floor(Date.now() / 1000), + peakQueued: 0, + peakExecuting: 0, + peakThroughput: 0, + peakCompletionRate: 0, + lastFlushCount: 0, + lastEntriesFlushed: 0, + flushStartedAt: null, + flushCompletedAt: null + }; + + // Initialize user tracking + for (let i = 0; i < config.users; i++) { + metrics.userTaskCounts.set(`user-${i}`, { submitted: 0, completed: 0, failed: 0 }); + } + + // Track task completions + const activeStreams = new Set>(); + + // Render loop + let running = true; + const renderInterval = setInterval(() => { + const stats = taskSystem.getStats(); + + // Track peak values + if (stats.tasks.queued > metrics.peakQueued) { + metrics.peakQueued = stats.tasks.queued; + } + if (stats.tasks.executing > metrics.peakExecuting) { + metrics.peakExecuting = stats.tasks.executing; + } + + // Update time series + const currentSecond = Math.floor(Date.now() / 1000); + if (currentSecond !== metrics.lastSecond) { + metrics.throughputHistory.push(metrics.currentSecondSubmissions); + metrics.completionHistory.push(metrics.currentSecondCompletions); + metrics.dlqHistory.push(stats.components.guard.dlq.size); + metrics.executingHistory.push(stats.tasks.executing); + metrics.queuedHistory.push(stats.tasks.queued); + + // Track flush throughput (entries flushed per second) + const currentFlushed = stats.components.flush.worker?.totalEntriesFlushed ?? 0; + const flushDelta = currentFlushed - metrics.lastEntriesFlushed; + metrics.flushThroughputHistory.push(flushDelta); + metrics.lastEntriesFlushed = currentFlushed; + + // Track peak rates + if (metrics.currentSecondSubmissions > metrics.peakThroughput) { + metrics.peakThroughput = metrics.currentSecondSubmissions; + } + if (metrics.currentSecondCompletions > metrics.peakCompletionRate) { + metrics.peakCompletionRate = metrics.currentSecondCompletions; + } + + // Keep only last 60 seconds + if (metrics.throughputHistory.length > 60) metrics.throughputHistory.shift(); + if (metrics.completionHistory.length > 60) metrics.completionHistory.shift(); + if (metrics.dlqHistory.length > 60) metrics.dlqHistory.shift(); + if (metrics.executingHistory.length > 60) metrics.executingHistory.shift(); + if (metrics.queuedHistory.length > 60) metrics.queuedHistory.shift(); + if (metrics.flushThroughputHistory.length > 60) metrics.flushThroughputHistory.shift(); + + metrics.currentSecondSubmissions = 0; + metrics.currentSecondCompletions = 0; + metrics.lastSecond = currentSecond; + } + + // Render dashboard + const dashboard = renderDashboard(stats, metrics); + process.stdout.write(ANSI.cursorHome + dashboard); + }, 100); + + // Submit tasks from multiple users + async function submitTask(userIndex: number, taskIndex: number) { + const user = `user-${userIndex}`; + const userStats = metrics.userTaskCounts.get(user); + if (!userStats) return; + + try { + // Generate a realistic prompt (varying length affects latency) + const prompts = [ + "Explain the key concepts of machine learning in simple terms.", + "Write a Python function to calculate the Fibonacci sequence with memoization and explain how it works step by step.", + "What are the best practices for building scalable microservices architecture? Include considerations for fault tolerance and observability.", + "Summarize the main differences between SQL and NoSQL databases.", + "Help me debug this code that's causing a memory leak in my Node.js application. I'm seeing high memory usage over time.", + ]; + const prompt = prompts[taskIndex % prompts.length]; + + const task = await agentTask.run({ + input: { + workId: taskIndex, + prompt, + complexity: 1 + Math.random() * 2 // Random complexity 1-3 (affects latency jitter) + }, + userId: userId(user), + idempotencyKey: idempotencyKey(`llm-${user}-${taskIndex}-${Date.now()}`) + }); + + userStats.submitted++; + metrics.tasksSubmitted++; + metrics.currentSecondSubmissions++; + + // Stream events in background + const streamPromise = (async () => { + try { + if (task.stream) { + for await (const event of task.stream()) { + if (event.type === 'complete') { + userStats.completed++; + metrics.currentSecondCompletions++; + } else if (event.type === 'error') { + userStats.failed++; + metrics.currentSecondCompletions++; + } + } + } + } catch { + // Stream closed, ignore + } + })(); + + activeStreams.add(streamPromise); + streamPromise.finally(() => activeStreams.delete(streamPromise)); + + } catch { + // Task rejected (backpressure, etc.) + metrics.tasksRejected++; + } + } + + // Simulate users submitting tasks + const userPromises: Promise[] = []; + + for (let userIndex = 0; userIndex < config.users; userIndex++) { + const userPromise = (async () => { + for (let taskNum = 0; taskNum < config.tasksPerUser && metrics.tasksSubmitted < totalTasks; taskNum++) { + if (!running) break; + + const taskIndex = userIndex * config.tasksPerUser + taskNum; + if (taskIndex >= totalTasks) break; + + await submitTask(userIndex, taskIndex); + + // Stagger submissions - slower rate to sustain pressure over time + const delay = 50 + Math.random() * 100; // 50-150ms between submissions per user + await sleep(delay); + } + })(); + + userPromises.push(userPromise); + } + + // Handle shutdown + const shutdown = async () => { + running = false; + + // Wait for active streams to complete (with timeout) + await Promise.race([ + Promise.all(activeStreams), + sleep(2000) + ]); + + // Track flush timing + const flushStartTime = Date.now(); + + // Keep dashboard updating during shutdown to show flush progress + await taskSystem.shutdown({ deleteFiles: true }); + + // Calculate how long graceful shutdown flush took + metrics.flushStartedAt = flushStartTime; + metrics.flushCompletedAt = Date.now(); + + // Now stop the render interval after shutdown is complete + clearInterval(renderInterval); + console.log(ANSI.showCursor); + + // Final stats + const finalStats = taskSystem.getStats(); + const totalDurationMs = Date.now() - metrics.startTime; + const totalDurationS = totalDurationMs / 1000; + + const totalProcessed = finalStats.tasks.totalCompleted + finalStats.tasks.totalFailed; + const successRate = totalProcessed > 0 ? (finalStats.tasks.totalCompleted / totalProcessed) * 100 : 0; + const errorRate = totalProcessed > 0 ? (finalStats.tasks.totalFailed / totalProcessed) * 100 : 0; + + const avgThroughput = totalDurationS > 0 ? metrics.tasksSubmitted / totalDurationS : 0; + const avgCompletionRate = totalDurationS > 0 ? totalProcessed / totalDurationS : 0; + + // Calculate averages from history + const avgExecuting = metrics.executingHistory.length > 0 + ? metrics.executingHistory.reduce((a, b) => a + b, 0) / metrics.executingHistory.length + : 0; + const avgQueued = metrics.queuedHistory.length > 0 + ? metrics.queuedHistory.reduce((a, b) => a + b, 0) / metrics.queuedHistory.length + : 0; + + console.log(''); + console.log(color('╔══════════════════════════════════════════════════════════════════════════════╗', ANSI.cyan, ANSI.bold)); + console.log(color('║', ANSI.cyan) + color(' 📊 FINAL PERFORMANCE REPORT ', ANSI.green, ANSI.bold) + color('║', ANSI.cyan)); + console.log(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Duration + console.log(color('║', ANSI.cyan) + color(' ⏱️ DURATION', ANSI.yellow, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Total Runtime: ${color(formatDuration(totalDurationMs), ANSI.white, ANSI.bold)}`.padEnd(87) + color('║', ANSI.cyan)); + + console.log(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Task Summary + console.log(color('║', ANSI.cyan) + color(' 📋 TASK SUMMARY', ANSI.yellow, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Total Attempted: ${color(String(metrics.tasksSubmitted + metrics.tasksRejected), ANSI.white)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Successfully Queued: ${color(String(metrics.tasksSubmitted), ANSI.green)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Rejected (Backpres.): ${color(String(metrics.tasksRejected), ANSI.red)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Completed: ${color(String(finalStats.tasks.totalCompleted), ANSI.green)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Failed: ${color(String(finalStats.tasks.totalFailed), ANSI.red)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Cancelled: ${color(String(finalStats.tasks.totalCancelled), ANSI.yellow)}`.padEnd(87) + color('║', ANSI.cyan)); + + console.log(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Success/Error Rates + console.log(color('║', ANSI.cyan) + color(' ✅ SUCCESS & ERROR RATES', ANSI.yellow, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + const successColor = successRate >= 95 ? ANSI.green : successRate >= 80 ? ANSI.yellow : ANSI.red; + const errorColor = errorRate <= 5 ? ANSI.green : errorRate <= 20 ? ANSI.yellow : ANSI.red; + console.log(color('║', ANSI.cyan) + ` Success Rate: ${color(successRate.toFixed(2) + '%', successColor, ANSI.bold)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Error Rate: ${color(errorRate.toFixed(2) + '%', errorColor)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Acceptance Rate: ${color(((metrics.tasksSubmitted / (metrics.tasksSubmitted + metrics.tasksRejected)) * 100).toFixed(2) + '%', ANSI.white)}`.padEnd(87) + color('║', ANSI.cyan)); + + console.log(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Throughput + console.log(color('║', ANSI.cyan) + color(' 🚀 THROUGHPUT', ANSI.yellow, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Avg Submission Rate: ${color(avgThroughput.toFixed(1) + ' tasks/s', ANSI.cyan)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Avg Completion Rate: ${color(avgCompletionRate.toFixed(1) + ' tasks/s', ANSI.green)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Peak Submission Rate: ${color(metrics.peakThroughput + ' tasks/s', ANSI.cyan, ANSI.bold)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Peak Completion Rate: ${color(metrics.peakCompletionRate + ' tasks/s', ANSI.green, ANSI.bold)}`.padEnd(87) + color('║', ANSI.cyan)); + + console.log(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Concurrency + console.log(color('║', ANSI.cyan) + color(' ⚡ CONCURRENCY', ANSI.yellow, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Peak Executing: ${color(String(metrics.peakExecuting), ANSI.cyan, ANSI.bold)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Avg Executing: ${color(avgExecuting.toFixed(1), ANSI.cyan)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Peak Queued: ${color(String(metrics.peakQueued), ANSI.yellow, ANSI.bold)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Avg Queued: ${color(avgQueued.toFixed(1), ANSI.yellow)}`.padEnd(87) + color('║', ANSI.cyan)); + + console.log(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Guard System + const guard = finalStats.components.guard; + console.log(color('║', ANSI.cyan) + color(' 🛡️ GUARD SYSTEM', ANSI.yellow, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Total Accepted: ${color(String(guard.admission.totals.accepted), ANSI.green)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Total Rejected: ${color(String(guard.admission.totals.rejected), ANSI.red)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Slot Timeouts: ${color(String(guard.slots.events.timeouts), ANSI.yellow)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` DLQ Total Added: ${color(String(guard.dlq.totalAdded), ANSI.yellow)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` DLQ Total Retried: ${color(String(guard.dlq.totalRetries), ANSI.cyan)}`.padEnd(87) + color('║', ANSI.cyan)); + + console.log(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Retries + const executor = finalStats.components.executor; + console.log(color('║', ANSI.cyan) + color(' 🔄 RETRIES', ANSI.yellow, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Retry Attempts: ${color(String(executor.retries.attempted), ANSI.yellow)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Retry Succeeded: ${color(String(executor.retries.succeeded), ANSI.green)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Retry Exhausted: ${color(String(executor.retries.exhausted), ANSI.red)}`.padEnd(87) + color('║', ANSI.cyan)); + + console.log(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // WAL Statistics + const eventLog = finalStats.components.eventLog; + console.log(color('║', ANSI.cyan) + color(' 📝 WAL (Event Log)', ANSI.yellow, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Current Sequence: ${color(String(eventLog.sequence.current), ANSI.cyan)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Total Writes: ${color(String(eventLog.volume.entriesWritten), ANSI.green)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Rotations: ${color(String(eventLog.rotation.count), ANSI.yellow)}`.padEnd(87) + color('║', ANSI.cyan)); + + console.log(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // Flush Statistics + const flush = finalStats.components.flush; + const walWrites = finalStats.components.eventLog.volume.entriesWritten; + console.log(color('║', ANSI.cyan) + color(' 💾 FLUSH (Persistence)', ANSI.yellow, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + if (flush.worker) { + const flushPct = walWrites > 0 ? ((flush.worker.totalEntriesFlushed / walWrites) * 100).toFixed(1) : '0'; + const avgFlushThroughput = metrics.flushThroughputHistory.length > 0 + ? (metrics.flushThroughputHistory.reduce((a, b) => a + b, 0) / metrics.flushThroughputHistory.length).toFixed(0) + : '0'; + const peakFlushThroughput = metrics.flushThroughputHistory.length > 0 + ? Math.max(...metrics.flushThroughputHistory) + : 0; + + console.log(color('║', ANSI.cyan) + ` Total Flushes: ${color(String(flush.worker.flushCount), ANSI.cyan)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Entries Flushed: ${color(String(flush.worker.totalEntriesFlushed), ANSI.green)} / ${color(String(walWrites), ANSI.white)} (${flushPct}%)`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Avg Throughput: ${color(avgFlushThroughput + ' entries/s', ANSI.cyan)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Peak Throughput: ${color(peakFlushThroughput + ' entries/s', ANSI.cyan, ANSI.bold)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Flush Errors: ${color(String(flush.worker.errorCount), flush.worker.errorCount > 0 ? ANSI.red : ANSI.green)}`.padEnd(87) + color('║', ANSI.cyan)); + + // Graceful shutdown flush timing + if (metrics.flushStartedAt && metrics.flushCompletedAt) { + const shutdownFlushDuration = metrics.flushCompletedAt - metrics.flushStartedAt; + console.log(color('║', ANSI.cyan) + ` Shutdown Drain Time: ${color(formatDuration(shutdownFlushDuration), ANSI.yellow)}`.padEnd(87) + color('║', ANSI.cyan)); + } + + if (flush.worker.lastError) { + console.log(color('║', ANSI.cyan) + ` Last Error: ${color(flush.worker.lastError.substring(0, 50), ANSI.red)}`.padEnd(87) + color('║', ANSI.cyan)); + } + } + + console.log(color('╠══════════════════════════════════════════════════════════════════════════════╣', ANSI.cyan)); + + // User Distribution Summary + console.log(color('║', ANSI.cyan) + color(' 👥 USER DISTRIBUTION', ANSI.yellow, ANSI.bold).padEnd(86) + color('║', ANSI.cyan)); + const userStats = Array.from(metrics.userTaskCounts.values()); + const totalUserSubmitted = userStats.reduce((a, b) => a + b.submitted, 0); + const totalUserCompleted = userStats.reduce((a, b) => a + b.completed, 0); + const totalUserFailed = userStats.reduce((a, b) => a + b.failed, 0); + const avgPerUser = totalUserSubmitted / metrics.totalUsers; + console.log(color('║', ANSI.cyan) + ` Active Users: ${color(String(metrics.totalUsers), ANSI.cyan)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Avg Tasks/User: ${color(avgPerUser.toFixed(1), ANSI.white)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Total User Completed: ${color(String(totalUserCompleted), ANSI.green)}`.padEnd(87) + color('║', ANSI.cyan)); + console.log(color('║', ANSI.cyan) + ` Total User Failed: ${color(String(totalUserFailed), ANSI.red)}`.padEnd(87) + color('║', ANSI.cyan)); + + console.log(color('╚══════════════════════════════════════════════════════════════════════════════╝', ANSI.cyan, ANSI.bold)); + console.log(''); + + process.exit(0); + }; + + process.on('SIGINT', shutdown); + process.on('SIGTERM', shutdown); + + // Wait for all submissions to complete + await Promise.all(userPromises); + + // Wait for all tasks to complete + console.log(color('\n All tasks submitted. Waiting for completion...', ANSI.dim)); + + // Poll until all tasks complete or timeout + const completionTimeout = 120_000; // 2 minutes max wait + const startWait = Date.now(); + + while (running && Date.now() - startWait < completionTimeout) { + const stats = taskSystem.getStats(); + if (stats.tasks.inFlight === 0) { + break; + } + await sleep(500); + } + + // Trigger shutdown + await shutdown(); +} + +function sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +// Run the demo +main().catch(err => { + console.log(ANSI.showCursor); + console.error('Demo error:', err); + process.exit(1); +}); diff --git a/packages/taskflow/demo/showcase-demo.ts b/packages/taskflow/demo/showcase-demo.ts new file mode 100644 index 00000000..ad76e392 --- /dev/null +++ b/packages/taskflow/demo/showcase-demo.ts @@ -0,0 +1,876 @@ +/** + * TaskFlow Showcase Demo + * + * Demonstrates the real value propositions of TaskFlow: + * + * 1. CRASH RECOVERY - Kill the process mid-run, restart, tasks resume from checkpoint + * 2. BACKPRESSURE - System rejects work when overloaded instead of crashing + * 3. FAIR SCHEDULING - Multiple tenants share resources fairly + * 4. IDEMPOTENCY - Same task submitted twice only executes once + * 5. REAL-TIME STREAMING - Progress events streamed per-task + * + * Usage: + * npx tsx demo/showcase-demo.ts [scenario] + * + * Scenarios: + * recovery - Demonstrates crash recovery (run twice, kill first run with Ctrl+C) + * backpressure - Shows rejection under overload + * fairness - Shows fair scheduling across tenants + * idempotency - Shows duplicate task prevention + * streaming - Shows real-time event streaming per task + * all - Runs all scenarios sequentially (default) + */ + +import { + TaskSystem, + userId, + idempotencyKey, + type TaskHandlerContext, +} from '../src/index.js'; +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import * as readline from 'node:readline'; + +// ============================================================================ +// CONFIGURATION +// ============================================================================ + +const DB_PATH = '.taskflow-showcase'; +const COLORS = { + reset: '\x1b[0m', + bright: '\x1b[1m', + dim: '\x1b[2m', + red: '\x1b[31m', + green: '\x1b[32m', + yellow: '\x1b[33m', + blue: '\x1b[34m', + magenta: '\x1b[35m', + cyan: '\x1b[36m', + white: '\x1b[37m', + bgBlue: '\x1b[44m', + bgGreen: '\x1b[42m', + bgYellow: '\x1b[43m', + bgRed: '\x1b[41m', +}; + +function log(color: string, prefix: string, message: string) { + const timestamp = new Date().toISOString().substring(11, 23); + console.log(`${COLORS.dim}[${timestamp}]${COLORS.reset} ${color}${prefix}${COLORS.reset} ${message}`); +} + +function header(title: string) { + console.log('\n' + '='.repeat(70)); + console.log(`${COLORS.bright}${COLORS.cyan} ${title}${COLORS.reset}`); + console.log('='.repeat(70) + '\n'); +} + +function subheader(title: string) { + console.log(`\n${COLORS.yellow}> ${title}${COLORS.reset}\n`); +} + +async function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +async function waitForKeypress(message: string): Promise { + console.log(`\n${COLORS.bgBlue}${COLORS.white} ${message} ${COLORS.reset}\n`); + return new Promise((resolve) => { + const rl = readline.createInterface({ input: process.stdin, output: process.stdout }); + process.stdin.setRawMode?.(true); + process.stdin.once('data', () => { + process.stdin.setRawMode?.(false); + rl.close(); + resolve(); + }); + }); +} + +// ============================================================================ +// SCENARIO 1: CRASH RECOVERY +// ============================================================================ + +const TASK_STEPS = [ + 'Received request', + 'Planning approach', + 'Analyzing context', + 'Searching documents', + 'Reading results', + 'Querying database', + 'Processing data', + 'Generating insights', + 'Formatting response', + 'Done', +]; + +async function scenarioRecovery(): Promise { + header('SCENARIO 1: CRASH RECOVERY'); + + console.log(`${COLORS.dim}TaskFlow has two types of task recovery:${COLORS.reset}\n`); + console.log(` ${COLORS.cyan}Background tasks${COLORS.reset} - Auto-recover when server restarts`); + console.log(` ${COLORS.yellow}User tasks${COLORS.reset} - Recover when user reconnects\n`); + + const demoDir = path.join(DB_PATH, 'recovery'); + const isResume = fs.existsSync(path.join(demoDir, 'demo.db')); + + if (!isResume) { + fs.mkdirSync(demoDir, { recursive: true }); + } + + const taskSystem = new TaskSystem({ + repository: { type: 'sqlite', database: `${demoDir}/demo.db` }, + eventLog: { eventLogPath: `${demoDir}/event.log` }, + guard: { slots: { maxExecutionGlobal: 2 } }, + flush: { flushIntervalMs: 200 }, + recovery: { + enabled: true, + backgroundPollIntervalMs: 1000, + staleThresholdMs: 3000, + }, + }); + + // BACKGROUND TASK - auto-recovers on server restart + const backgroundTask = taskSystem.registerTask({ + name: 'background-report', + description: 'Background task that auto-recovers', + type: 'background', + + handler: async function* (_input: { name: string }, _context: TaskHandlerContext) { + const state: string[] = []; + for (let i = 0; i < TASK_STEPS.length; i++) { + state.push(TASK_STEPS[i]); + yield { type: 'progress', payload: { stepIndex: i, state: [...state] } }; + console.log(` ${COLORS.cyan}[Background]${COLORS.reset} Step ${i + 1}/${TASK_STEPS.length}: ${TASK_STEPS[i]}`); + await sleep(500); + } + return { state, completed: true }; + }, + + recover: async function* (_input: { name: string }, context: any) { + const prevEvents = context.previousEvents || []; + const progressEvents = prevEvents.filter((e: any) => e.type === 'progress'); + + let state: string[] = []; + let lastStepIndex = -1; + if (progressEvents.length > 0) { + const last = progressEvents[progressEvents.length - 1]; + state = last.payload?.state || []; + lastStepIndex = last.payload?.stepIndex ?? -1; + } + + const skipped = lastStepIndex + 1; + const remaining = TASK_STEPS.length - skipped; + + console.log(`\n ${COLORS.green}[Background] RECOVERED${COLORS.reset}`); + console.log(` ${COLORS.dim}├─ Restored ${skipped} steps from checkpoint (NOT re-executed!)${COLORS.reset}`); + console.log(` ${COLORS.dim}├─ Last checkpoint: "${TASK_STEPS[lastStepIndex]}"${COLORS.reset}`); + console.log(` ${COLORS.dim}└─ Continuing with ${remaining} remaining steps...${COLORS.reset}\n`); + + for (let i = lastStepIndex + 1; i < TASK_STEPS.length; i++) { + await sleep(300); + state.push(TASK_STEPS[i]); + yield { type: 'progress', payload: { stepIndex: i, state: [...state], recovered: true } }; + console.log(` ${COLORS.green}[Background]${COLORS.reset} Step ${i + 1}/${TASK_STEPS.length}: ${TASK_STEPS[i]}`); + } + return { state, completed: true }; + }, + }); + + // USER TASK - recovers when user reconnects with same idempotency key + const userTask = taskSystem.registerTask({ + name: 'user-chat', + description: 'User task that recovers on reconnect', + type: 'user', + + handler: async function* (_input: { name: string }, _context: TaskHandlerContext) { + const state: string[] = []; + for (let i = 0; i < TASK_STEPS.length; i++) { + state.push(TASK_STEPS[i]); + yield { type: 'progress', payload: { stepIndex: i, state: [...state] } }; + console.log(` ${COLORS.yellow}[User Chat]${COLORS.reset} Step ${i + 1}/${TASK_STEPS.length}: ${TASK_STEPS[i]}`); + await sleep(500); + } + return { state, completed: true }; + }, + + recover: async function* (_input: { name: string }, context: any) { + const prevEvents = context.previousEvents || []; + const progressEvents = prevEvents.filter((e: any) => e.type === 'progress'); + + let state: string[] = []; + let lastStepIndex = -1; + if (progressEvents.length > 0) { + const last = progressEvents[progressEvents.length - 1]; + state = last.payload?.state || []; + lastStepIndex = last.payload?.stepIndex ?? -1; + } + + const skipped = lastStepIndex + 1; + const remaining = TASK_STEPS.length - skipped; + + console.log(`\n ${COLORS.green}[User Chat] RECOVERED${COLORS.reset}`); + console.log(` ${COLORS.dim}├─ Restored ${skipped} steps from checkpoint (NOT re-executed!)${COLORS.reset}`); + console.log(` ${COLORS.dim}├─ Last checkpoint: "${TASK_STEPS[lastStepIndex]}"${COLORS.reset}`); + console.log(` ${COLORS.dim}└─ Continuing with ${remaining} remaining steps...${COLORS.reset}\n`); + + for (let i = lastStepIndex + 1; i < TASK_STEPS.length; i++) { + await sleep(300); + state.push(TASK_STEPS[i]); + yield { type: 'progress', payload: { stepIndex: i, state: [...state], recovered: true } }; + console.log(` ${COLORS.green}[User Chat]${COLORS.reset} Step ${i + 1}/${TASK_STEPS.length}: ${TASK_STEPS[i]}`); + } + return { state, completed: true }; + }, + }); + + await taskSystem.initialize(); + + if (!isResume) { + // === FIRST RUN === + console.log(`${COLORS.bgYellow}${COLORS.bright} Press Ctrl+C to simulate a crash! ${COLORS.reset}\n`); + + // Start both tasks + await backgroundTask.run({ + input: { name: 'Q3 Report' }, + userId: userId('system'), + idempotencyKey: idempotencyKey('bg-report'), + }); + + await userTask.run({ + input: { name: 'User Analysis' }, + userId: userId('user-123'), + idempotencyKey: idempotencyKey('user-chat-123'), + }); + + // Wait for completion + while (true) { + const stats = taskSystem.getStats(); + if (stats.tasks.inFlight === 0) break; + await sleep(300); + } + + console.log(`\n${COLORS.green}Both tasks completed successfully.${COLORS.reset}`); + console.log(`${COLORS.dim}Run again - nothing to recover.${COLORS.reset}\n`); + + } else { + // === SECOND RUN (RECOVERY) === + console.log(`${COLORS.bgGreen}${COLORS.white}${COLORS.bright} RECOVERY MODE ${COLORS.reset}\n`); + + console.log(`${COLORS.cyan}1. Background task:${COLORS.reset} Auto-recovering on startup...\n`); + + // Wait for background task recovery + const startWait = Date.now(); + while (Date.now() - startWait < 15000) { + const stats = taskSystem.getStats(); + if (stats.components.recovery.outcomes.background > 0 && stats.tasks.inFlight === 0) { + break; + } + await sleep(300); + } + + console.log(`\n${COLORS.green}Background task recovered automatically!${COLORS.reset}\n`); + + // Wait for user to press Enter + await waitForKeypress('Press Enter to simulate user reconnection...'); + + console.log(`${COLORS.yellow}2. User task:${COLORS.reset} Recovering on reconnection...\n`); + + await userTask.run({ + input: { name: 'User Analysis' }, + userId: userId('user-123'), + idempotencyKey: idempotencyKey('user-chat-123'), + }); + + // Wait for user task to complete + while (true) { + const stats = taskSystem.getStats(); + if (stats.tasks.inFlight === 0) break; + await sleep(300); + } + + console.log(`\n${COLORS.green}${COLORS.bright}All tasks recovered!${COLORS.reset}`); + console.log(`${COLORS.dim}Key point: Steps before crash were NOT re-executed.${COLORS.reset}\n`); + } + + await taskSystem.shutdown(); + + if (isResume) { + fs.rmSync(demoDir, { recursive: true, force: true }); + } +} + +// ============================================================================ +// SCENARIO 2: BACKPRESSURE +// ============================================================================ + +async function scenarioBackpressure(): Promise { + header('SCENARIO 2: BACKPRESSURE'); + + console.log(`${COLORS.dim}TaskFlow protects your system from overload with two modes:${COLORS.reset}\n`); + + const demoDir = path.join(DB_PATH, 'backpressure'); + fs.rmSync(demoDir, { recursive: true, force: true }); + fs.mkdirSync(demoDir, { recursive: true }); + + // === PART 1: With Timeout (Wait Pattern) === + subheader('1. Wait for Capacity (with timeout)'); + + console.log(`${COLORS.dim}When queue is full, tasks wait until capacity is available.${COLORS.reset}\n`); + + const taskSystem1 = new TaskSystem({ + repository: { type: 'sqlite', database: `${demoDir}/demo1.db` }, + eventLog: { eventLogPath: `${demoDir}/event1.log` }, + guard: { + slots: { maxExecutionGlobal: 2 }, + backpressure: { + maxQueuedSize: 3, + queueWaitTimeoutMs: 30000, // Wait up to 30s for capacity + windowSizeMs: 60_000, + maxTasksPerWindow: 10000, + maxTasksPerUserWindow: 10000, + }, + }, + flush: { flushIntervalMs: 200 }, + }); + + const waitTask = taskSystem1.registerTask({ + name: 'wait-task', + description: 'Task for wait demo', + type: 'user', + handler: async () => { + await sleep(500); + return { ok: true }; + }, + }); + + await taskSystem1.initialize(); + + console.log(`${COLORS.bright}Submitting 8 tasks (queue capacity: 3):${COLORS.reset}\n`); + + const startTime = Date.now(); + for (let i = 1; i <= 8; i++) { + const taskStart = Date.now(); + await waitTask.run({ + input: {}, + userId: userId('user'), + idempotencyKey: idempotencyKey(`wait-${i}`), + }); + const waited = Date.now() - taskStart; + if (waited > 10) { + console.log(` Task ${i}: ${COLORS.yellow}Waited ${waited}ms${COLORS.reset} for capacity`); + } else { + console.log(` Task ${i}: ${COLORS.green}Accepted immediately${COLORS.reset}`); + } + } + + const totalTime = ((Date.now() - startTime) / 1000).toFixed(1); + console.log(`\n${COLORS.cyan}All 8 tasks accepted over ${totalTime}s (system throttled intake).${COLORS.reset}`); + + while (taskSystem1.getStats().tasks.inFlight > 0) await sleep(100); + await taskSystem1.shutdown(); + + // === PART 2: Without Timeout (Rejection Pattern) === + subheader('2. Immediate Rejection (no timeout)'); + + console.log(`${COLORS.dim}When queue is full, excess tasks are rejected immediately.${COLORS.reset}\n`); + + const taskSystem2 = new TaskSystem({ + repository: { type: 'sqlite', database: `${demoDir}/demo2.db` }, + eventLog: { eventLogPath: `${demoDir}/event2.log` }, + guard: { + slots: { maxExecutionGlobal: 2 }, + backpressure: { + maxQueuedSize: 3, + queueWaitTimeoutMs: 0, // Reject immediately + windowSizeMs: 60_000, + maxTasksPerWindow: 10000, + maxTasksPerUserWindow: 10000, + }, + }, + flush: { flushIntervalMs: 200 }, + }); + + const rejectTask = taskSystem2.registerTask({ + name: 'reject-task', + description: 'Task for rejection demo', + type: 'user', + handler: async () => { + await sleep(2000); + return { ok: true }; + }, + }); + + await taskSystem2.initialize(); + + console.log(`${COLORS.bright}Submitting 8 tasks (queue capacity: 3):${COLORS.reset}\n`); + + let accepted = 0; + let rejected = 0; + + for (let i = 1; i <= 8; i++) { + try { + await rejectTask.run({ + input: {}, + userId: userId('user'), + idempotencyKey: idempotencyKey(`reject-${i}`), + }); + accepted++; + console.log(` Task ${i}: ${COLORS.green}Accepted${COLORS.reset}`); + } catch { + rejected++; + console.log(` Task ${i}: ${COLORS.red}Rejected${COLORS.reset} - queue full`); + } + } + + console.log(`\n${COLORS.bright}Result:${COLORS.reset} ${COLORS.green}${accepted} accepted${COLORS.reset}, ${COLORS.red}${rejected} rejected${COLORS.reset}`); + console.log(`\n${COLORS.cyan}System protected itself from overload by rejecting excess tasks.${COLORS.reset}`); + + await taskSystem2.shutdown({ force: true }); + fs.rmSync(demoDir, { recursive: true, force: true }); +} + +// ============================================================================ +// SCENARIO 3: FAIR SCHEDULING +// ============================================================================ + +async function scenarioFairness(): Promise { + header('SCENARIO 3: FAIR SCHEDULING'); + + console.log(`${COLORS.dim}This demonstrates TaskFlow's tenant fairness.`); + console.log(`When multiple tenants compete for resources, each gets a fair share`); + console.log(`- no single tenant can monopolize the system.${COLORS.reset}\n`); + + const demoDir = path.join(DB_PATH, 'fairness'); + fs.rmSync(demoDir, { recursive: true, force: true }); + fs.mkdirSync(demoDir, { recursive: true }); + + // Track per-tenant execution + const tenantExecuting: Record = { + 'greedy': 0, + 'tenant-A': 0, + 'tenant-B': 0, + 'tenant-C': 0, + }; + const tenantCompleted: Record = { + 'greedy': 0, + 'tenant-A': 0, + 'tenant-B': 0, + 'tenant-C': 0, + }; + + const taskSystem = new TaskSystem({ + repository: { + type: 'sqlite', + database: `${demoDir}/demo.db`, + }, + eventLog: { + eventLogPath: `${demoDir}/event.log`, + }, + guard: { + slots: { + maxExecutionGlobal: 10, + maxExecutionPerUser: 3, // Each tenant limited to 3 concurrent + }, + }, + flush: { + flushIntervalMs: 200, + }, + }); + + const tenantTask = taskSystem.registerTask({ + name: 'tenant-task', + description: 'Task for fairness demo', + type: 'user', + handler: async (input: { tenant: string }) => { + tenantExecuting[input.tenant]++; + await sleep(500); // 500ms tasks + tenantExecuting[input.tenant]--; + tenantCompleted[input.tenant]++; + return { done: true }; + }, + }); + + await taskSystem.initialize(); + + subheader('Scenario: Greedy tenant submits 30 tasks, three normal tenants submit 5 each'); + console.log(`${COLORS.dim}Configuration: 10 global slots, max 3 per tenant`); + console.log(`Watch how slots are distributed fairly across tenants.${COLORS.reset}\n`); + + // Submit all tasks without waiting + const submissions: Promise[] = []; + + // Greedy tenant tries to hog resources (30 tasks) + for (let i = 0; i < 30; i++) { + submissions.push(tenantTask.run({ + input: { tenant: 'greedy' }, + userId: userId('greedy'), + idempotencyKey: idempotencyKey(`greedy-${i}`), + })); + } + + // Normal tenants submit reasonable workloads (5 each) + for (const tenant of ['tenant-A', 'tenant-B', 'tenant-C']) { + for (let i = 0; i < 5; i++) { + submissions.push(tenantTask.run({ + input: { tenant }, + userId: userId(tenant), + idempotencyKey: idempotencyKey(`${tenant}-${i}`), + })); + } + } + + // Wait for all submissions + await Promise.all(submissions); + log(COLORS.blue, '[SUBMITTED]', '45 tasks submitted (30 greedy + 15 normal)'); + + console.log(`\n${COLORS.yellow} Tenant Executing Completed${COLORS.reset}`); + console.log(` ${'─'.repeat(38)}`); + + // Monitor execution distribution + const totalTasks = 45; + let lastLine = ''; + while (true) { + const stats = taskSystem.getStats(); + const completed = stats.tasks.totalCompleted; + + // Build status line showing per-tenant execution + const lines = [ + ` ${COLORS.red}greedy${COLORS.reset} ${tenantExecuting['greedy'].toString().padStart(2)} ${tenantCompleted['greedy'].toString().padStart(2)}/30`, + ` ${COLORS.green}tenant-A${COLORS.reset} ${tenantExecuting['tenant-A'].toString().padStart(2)} ${tenantCompleted['tenant-A'].toString().padStart(2)}/5`, + ` ${COLORS.green}tenant-B${COLORS.reset} ${tenantExecuting['tenant-B'].toString().padStart(2)} ${tenantCompleted['tenant-B'].toString().padStart(2)}/5`, + ` ${COLORS.green}tenant-C${COLORS.reset} ${tenantExecuting['tenant-C'].toString().padStart(2)} ${tenantCompleted['tenant-C'].toString().padStart(2)}/5`, + ].join('\n'); + + // Only print if changed + if (lines !== lastLine) { + // Move cursor up and clear lines + if (lastLine) { + process.stdout.write('\x1b[4A\x1b[0J'); + } + console.log(lines); + lastLine = lines; + } + + if (completed >= totalTasks) break; + await sleep(100); + } + + console.log(`\n${COLORS.cyan}Key observations:${COLORS.reset}`); + console.log(` - Greedy tenant was limited to max 3 concurrent (not all 10 slots)`); + console.log(` - Normal tenants got their fair share despite greedy tenant's 30 tasks`); + console.log(` - All tenants completed without starvation`); + + await taskSystem.shutdown(); + fs.rmSync(demoDir, { recursive: true, force: true }); +} + +// ============================================================================ +// SCENARIO 4: IDEMPOTENCY +// ============================================================================ + +async function scenarioIdempotency(): Promise { + header('SCENARIO 4: IDEMPOTENCY'); + + console.log(`${COLORS.dim}This demonstrates TaskFlow's idempotency guarantee.`); + console.log(`Submitting the same task ID multiple times only executes once.`); + console.log(`Critical for "at-least-once" delivery systems.${COLORS.reset}\n`); + + const demoDir = path.join(DB_PATH, 'idempotency'); + fs.rmSync(demoDir, { recursive: true, force: true }); + fs.mkdirSync(demoDir, { recursive: true }); + + const taskSystem = new TaskSystem({ + repository: { + type: 'sqlite', + database: `${demoDir}/demo.db`, + }, + eventLog: { + eventLogPath: `${demoDir}/event.log`, + }, + guard: { + slots: { maxExecutionGlobal: 10 }, + }, + flush: { + flushIntervalMs: 200, + }, + }); + + let executionCount = 0; + + const paymentTask = taskSystem.registerTask({ + name: 'payment', + description: 'Simulated payment processing', + type: 'user', + handler: async (input: { amount: number; to: string }) => { + executionCount++; + log(COLORS.magenta, '[EXECUTE]', `Processing payment #${executionCount}: $${input.amount} to ${input.to}`); + await sleep(500); + return { transactionId: `txn-${Date.now()}`, amount: input.amount }; + }, + }); + + await taskSystem.initialize(); + + subheader('Simulating network retry scenario...'); + console.log(`${COLORS.dim}Client sends payment request, doesn't get response, retries 5 times.`); + console.log(`Without idempotency: 5 payments processed!`); + console.log(`With TaskFlow: Only 1 payment processed.${COLORS.reset}\n`); + + const paymentKey = idempotencyKey('payment-order-12345'); // Same key for all attempts + + // Simulate client retrying same request + for (let attempt = 1; attempt <= 5; attempt++) { + try { + const task = await paymentTask.run({ + input: { amount: 99.99, to: 'merchant-456' }, + userId: userId('user-123'), + idempotencyKey: paymentKey, // Same key every time! + }); + log(COLORS.green, `[ATTEMPT ${attempt}]`, `Task accepted: ${task.id.slice(0, 12)}`); + } catch (error: any) { + if (error.message.includes('already exists') || error.message.includes('duplicate')) { + log(COLORS.yellow, `[ATTEMPT ${attempt}]`, 'Duplicate detected - task already submitted'); + } else { + log(COLORS.red, `[ATTEMPT ${attempt}]`, `Error: ${error.message}`); + } + } + + await sleep(200); // Small delay between retries + } + + // Wait for task to complete + await sleep(1500); + + subheader('Results'); + console.log(` Submission attempts: ${COLORS.yellow}5${COLORS.reset}`); + console.log(` Actual executions: ${COLORS.green}${executionCount}${COLORS.reset}`); + console.log(`\n${COLORS.cyan}Key insight: Despite 5 submission attempts, payment only processed once.`); + console.log(`The customer won't be charged 5 times!${COLORS.reset}`); + + await taskSystem.shutdown(); + fs.rmSync(demoDir, { recursive: true, force: true }); +} + +// ============================================================================ +// SCENARIO 5: REAL-TIME STREAMING +// ============================================================================ + +async function scenarioStreaming(): Promise { + header('SCENARIO 5: PARTIAL PROGRESS (LLM AGENT)'); + + console.log(`${COLORS.dim}Most task systems only track: pending → complete (0 or 1).`); + console.log(`TaskFlow tracks partial progress - each step is persisted.`); + console.log(`If crashed, the agent can resume with full conversation history.${COLORS.reset}\n`); + + const demoDir = path.join(DB_PATH, 'streaming'); + fs.rmSync(demoDir, { recursive: true, force: true }); + fs.mkdirSync(demoDir, { recursive: true }); + + const taskSystem = new TaskSystem({ + repository: { type: 'sqlite', database: `${demoDir}/demo.db` }, + eventLog: { eventLogPath: `${demoDir}/event.log` }, + guard: { slots: { maxExecutionGlobal: 3 } }, + flush: { flushIntervalMs: 200 }, + }); + + // Simulated LLM agent that processes a user request in multiple steps + const agentTask = taskSystem.registerTask({ + name: 'llm-agent', + description: 'LLM agent that researches and answers questions', + type: 'user', + + handler: async function* (input: { question: string }, _context: TaskHandlerContext) { + const conversation: Array<{ role: string; content: string; toolCall?: any }> = []; + + // Step 1: User message + conversation.push({ role: 'user', content: input.question }); + yield { + type: 'progress', + message: 'Received user question', + payload: { step: 'user_input', conversation: [...conversation] }, + }; + await sleep(300); + + // Step 2: Agent thinks and decides to search + conversation.push({ + role: 'assistant', + content: 'I need to search for current information about this.', + toolCall: { name: 'web_search', args: { query: 'Databricks stock price 2024' } }, + }); + yield { + type: 'progress', + message: 'Agent deciding to search...', + payload: { step: 'tool_call', tool: 'web_search', conversation: [...conversation] }, + }; + await sleep(800); + + // Step 3: Tool result comes back + conversation.push({ + role: 'tool', + content: 'Databricks valued at $43B after Series I funding in 2023. Private company.', + }); + yield { + type: 'progress', + message: 'Search results received', + payload: { step: 'tool_result', conversation: [...conversation] }, + }; + await sleep(500); + + // Step 4: Agent calls another tool + conversation.push({ + role: 'assistant', + content: 'Let me get more details about their recent performance.', + toolCall: { name: 'web_search', args: { query: 'Databricks revenue 2024' } }, + }); + yield { + type: 'progress', + message: 'Agent searching for more info...', + payload: { step: 'tool_call', tool: 'web_search', conversation: [...conversation] }, + }; + await sleep(800); + + // Step 5: Second tool result + conversation.push({ + role: 'tool', + content: 'Databricks reported $1.6B ARR in 2023, growing 50% YoY.', + }); + yield { + type: 'progress', + message: 'Additional results received', + payload: { step: 'tool_result', conversation: [...conversation] }, + }; + await sleep(500); + + // Step 6: Final response + conversation.push({ + role: 'assistant', + content: 'Databricks is a private company valued at $43B. They reported $1.6B ARR in 2023 with 50% growth.', + }); + yield { + type: 'progress', + message: 'Agent generating final response', + payload: { step: 'response', conversation: [...conversation] }, + }; + + return { conversation, success: true }; + }, + }); + + await taskSystem.initialize(); + + subheader('User asks: "What is Databricks stock price?"'); + console.log(`${COLORS.dim}Watch the agent think, call tools, and build up conversation state.${COLORS.reset}`); + console.log(`${COLORS.dim}Each step is persisted - if crashed, agent resumes with full history.${COLORS.reset}\n`); + + const task = await agentTask.run({ + input: { question: 'What is Databricks stock price?' }, + userId: userId('user-123'), + idempotencyKey: idempotencyKey('agent-conv-001'), + }) as any; + + // Show the conversation building up in real-time + for await (const event of task.stream()) { + if (event.type === 'progress' && event.payload?.step) { + const p = event.payload; + const lastMsg = p.conversation[p.conversation.length - 1]; + + switch (p.step) { + case 'user_input': + console.log(` ${COLORS.blue}USER:${COLORS.reset} "${lastMsg.content}"`); + break; + case 'tool_call': + console.log(` ${COLORS.yellow}AGENT:${COLORS.reset} ${lastMsg.content}`); + console.log(` ${COLORS.magenta}→ calling ${lastMsg.toolCall.name}(${JSON.stringify(lastMsg.toolCall.args)})${COLORS.reset}`); + break; + case 'tool_result': + console.log(` ${COLORS.cyan}TOOL:${COLORS.reset} ${lastMsg.content}`); + break; + case 'response': + console.log(` ${COLORS.green}AGENT:${COLORS.reset} ${lastMsg.content}`); + break; + } + console.log(` ${COLORS.dim}[checkpoint: ${p.conversation.length} messages saved]${COLORS.reset}\n`); + } + + if (event.type === 'complete') { + break; + } + } + + console.log(`${COLORS.cyan}Key insight: Each step persists the full conversation history.`); + console.log(`If the agent crashes after tool_call, recovery has the full context`); + console.log(`to retry just that tool call - not restart the whole conversation.${COLORS.reset}`); + + await taskSystem.shutdown(); + fs.rmSync(demoDir, { recursive: true, force: true }); +} + +// ============================================================================ +// MAIN +// ============================================================================ + +async function main(): Promise { + const scenario = process.argv[2] || 'all'; + + console.log(`\n${COLORS.bgBlue}${COLORS.white}${COLORS.bright}`); + console.log(' +================================================================+ '); + console.log(' | TASKFLOW SHOWCASE DEMO | '); + console.log(' | Demonstrating Real-World Value Propositions | '); + console.log(' +================================================================+ '); + console.log(`${COLORS.reset}\n`); + + // Ensure demo directory exists + fs.mkdirSync(DB_PATH, { recursive: true }); + + try { + switch (scenario) { + case 'recovery': + await scenarioRecovery(); + break; + case 'backpressure': + await scenarioBackpressure(); + break; + case 'fairness': + await scenarioFairness(); + break; + case 'idempotency': + await scenarioIdempotency(); + break; + case 'streaming': + await scenarioStreaming(); + break; + case 'all': + await scenarioIdempotency(); + await waitForKeypress('Press any key for next scenario...'); + + await scenarioBackpressure(); + await waitForKeypress('Press any key for next scenario...'); + + await scenarioFairness(); + await waitForKeypress('Press any key for next scenario...'); + + await scenarioStreaming(); + await waitForKeypress('Press any key for recovery demo (interactive)...'); + + await scenarioRecovery(); + break; + default: + console.log(`Unknown scenario: ${scenario}`); + console.log('Available: recovery, backpressure, fairness, idempotency, streaming, all'); + process.exit(1); + } + + console.log(`\n${COLORS.green}${COLORS.bright}Demo complete!${COLORS.reset}\n`); + } catch (error) { + if ((error as any).code === 'ERR_USE_AFTER_CLOSE') { + // Expected when user presses Ctrl+C during waitForKeypress + console.log(`\n${COLORS.yellow}Demo interrupted.${COLORS.reset}\n`); + } else { + throw error; + } + } + + // Cleanup + fs.rmSync(DB_PATH, { recursive: true, force: true }); +} + +main().catch(console.error); diff --git a/packages/taskflow/demo/stress-test.ts b/packages/taskflow/demo/stress-test.ts new file mode 100644 index 00000000..3c3e3770 --- /dev/null +++ b/packages/taskflow/demo/stress-test.ts @@ -0,0 +1,439 @@ +/** + * TaskFlow Stress Test + * + * Pushes TaskFlow to its limits with fire-and-forget submission: + * - 100% CAPACITY: System saturated, queue full + * - 150% CAPACITY: Overloaded, rejections start + * - 200% CAPACITY: Heavy overload, massive rejections + * - 300% CAPACITY: Extreme overload, system under siege + * + * Usage: + * npx tsx demo/stress-test.ts + * + * Environment: + * STRESS_CONCURRENCY=50 Max concurrent tasks + * STRESS_QUEUE=100 Max queue size + */ + +import { + TaskSystem, + userId, + idempotencyKey, +} from '../src/index.js'; +import * as fs from 'node:fs'; + +// ============================================================================ +// Configuration +// ============================================================================ + +const CONFIG = { + concurrency: parseInt(process.env.STRESS_CONCURRENCY ?? '50', 10), + queueSize: parseInt(process.env.STRESS_QUEUE ?? '100', 10), + taskDurationMs: { min: 500, max: 1500 }, // Tasks take 500-1500ms (avg 1000ms) +}; + +// Calculate throughput based on average task duration +const AVG_TASK_DURATION = (CONFIG.taskDurationMs.min + CONFIG.taskDurationMs.max) / 2; +const MAX_THROUGHPUT = Math.floor((CONFIG.concurrency / AVG_TASK_DURATION) * 1000); + +const COLORS = { + reset: '\x1b[0m', + bold: '\x1b[1m', + dim: '\x1b[2m', + red: '\x1b[31m', + green: '\x1b[32m', + yellow: '\x1b[33m', + blue: '\x1b[34m', + cyan: '\x1b[36m', + magenta: '\x1b[35m', +}; + +function log(message: string) { + const time = new Date().toISOString().substring(11, 23); + console.log(`${COLORS.dim}[${time}]${COLORS.reset} ${message}`); +} + +function header(title: string) { + console.log(`\n${COLORS.bold}${COLORS.cyan}=== ${title} ===${COLORS.reset}\n`); +} + +function sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +// ============================================================================ +// Stress Test +// ============================================================================ + +interface PhaseResult { + name: string; + targetRate: number; + actualRate: number; + submitted: number; + accepted: number; + completed: number; + failed: number; + rejected: number; + peakQueue: number; + peakExecuting: number; + duration: number; +} + +async function main() { + console.log(`\n${COLORS.bold}${COLORS.blue}╔════════════════════════════════════════════════════════════╗${COLORS.reset}`); + console.log(`${COLORS.bold}${COLORS.blue}║ TASKFLOW STRESS TEST ║${COLORS.reset}`); + console.log(`${COLORS.bold}${COLORS.blue}║ (Fire-and-Forget Mode) ║${COLORS.reset}`); + console.log(`${COLORS.bold}${COLORS.blue}╚════════════════════════════════════════════════════════════╝${COLORS.reset}\n`); + + log(`Concurrency: ${CONFIG.concurrency} slots`); + log(`Queue size: ${CONFIG.queueSize}`); + log(`Task duration: ${CONFIG.taskDurationMs.min}-${CONFIG.taskDurationMs.max}ms (avg ${AVG_TASK_DURATION}ms)`); + log(`Max throughput: ~${MAX_THROUGHPUT} tasks/sec`); + log(`Mode: ${COLORS.red}FIRE-AND-FORGET${COLORS.reset} (true stress testing)`); + + const demoDir = './.taskflow-stress-test'; + fs.rmSync(demoDir, { recursive: true, force: true }); + fs.mkdirSync(demoDir, { recursive: true }); + + const taskSystem = new TaskSystem({ + repository: { type: 'sqlite', database: `${demoDir}/stress.db` }, + eventLog: { eventLogPath: `${demoDir}/event.log` }, + guard: { + backpressure: { + maxQueuedSize: CONFIG.queueSize, + queueWaitTimeoutMs: 0, // Immediate rejection when queue full + windowSizeMs: 60_000, + maxTasksPerWindow: 100000, + maxTasksPerUserWindow: 50000, + }, + slots: { + maxExecutionGlobal: CONFIG.concurrency, + maxExecutionPerUser: 20, + }, + }, + executor: { + retry: { + maxAttempts: 3, + initialDelayMs: 20, + maxDelayMs: 200, + backoffMultiplier: 2, + }, + }, + flush: { flushIntervalMs: 50 }, + }); + + let failureRate = 0; + let taskId = 0; + + const stressTask = taskSystem.registerTask({ + name: 'stress-task', + description: 'Variable-duration task for stress testing', + type: 'user', + handler: async function* () { + if (Math.random() < failureRate) { + throw new Error('Simulated failure'); + } + // Random duration between min and max + const duration = CONFIG.taskDurationMs.min + + Math.random() * (CONFIG.taskDurationMs.max - CONFIG.taskDurationMs.min); + await sleep(duration); + yield { type: 'progress', payload: { done: true } }; + return { ok: true }; + }, + }); + + await taskSystem.initialize(); + log('TaskFlow initialized\n'); + + const results: PhaseResult[] = []; + + /** + * Run a phase with FIRE-AND-FORGET submission + * Tasks are submitted without awaiting - this truly stresses the system + */ + async function runPhase( + name: string, + targetRate: number, + durationSec: number, + options: { failRate?: number } = {} + ): Promise { + header(name); + + failureRate = options.failRate ?? 0; + + const capacityPct = Math.round((targetRate / MAX_THROUGHPUT) * 100); + log(`Target: ${targetRate} tasks/sec (${capacityPct}% of max throughput)`); + if (options.failRate) { + log(`Failure rate: ${Math.round(options.failRate * 100)}%`); + } + + const startStats = taskSystem.getStats(); + const startCompleted = startStats.tasks.totalCompleted; + const startFailed = startStats.tasks.totalFailed; + + let submitted = 0; + let accepted = 0; + let rejected = 0; + let peakQueue = 0; + let peakExecuting = 0; + + const startTime = Date.now(); + const endTime = startTime + (durationSec * 1000); + + // Interval between submissions to achieve target rate + const intervalMs = 1000 / targetRate; + + // Real-time display + const displayInterval = setInterval(() => { + const stats = taskSystem.getStats(); + const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); + const completed = stats.tasks.totalCompleted - startCompleted; + const failed = stats.tasks.totalFailed - startFailed; + const actualRate = submitted > 0 ? Math.round(submitted / parseFloat(elapsed)) : 0; + const acceptRate = submitted > 0 ? Math.round((accepted / submitted) * 100) : 100; + + // Track peaks + if (stats.tasks.queued > peakQueue) peakQueue = stats.tasks.queued; + if (stats.tasks.executing > peakExecuting) peakExecuting = stats.tasks.executing; + + process.stdout.write('\r' + + ` ${COLORS.dim}${elapsed.padStart(5)}s${COLORS.reset} │ ` + + `Q: ${COLORS.yellow}${stats.tasks.queued.toString().padStart(3)}${COLORS.reset}/${CONFIG.queueSize} │ ` + + `Ex: ${COLORS.green}${stats.tasks.executing.toString().padStart(2)}${COLORS.reset}/${CONFIG.concurrency} │ ` + + `Rate: ${COLORS.cyan}${actualRate.toString().padStart(4)}${COLORS.reset}/s │ ` + + `Acc: ${acceptRate >= 90 ? COLORS.green : acceptRate >= 50 ? COLORS.yellow : COLORS.red}${acceptRate.toString().padStart(3)}%${COLORS.reset} │ ` + + `Rej: ${COLORS.red}${rejected.toString().padStart(5)}${COLORS.reset} │ ` + + `Done: ${COLORS.green}${completed.toString().padStart(5)}${COLORS.reset}` + + (failed > 0 ? ` │ Fail: ${COLORS.red}${failed}${COLORS.reset}` : '') + + ' ' + ); + }, 100); + + // FIRE-AND-FORGET: Submit tasks without waiting for acceptance + // Use a tight loop with minimal delay to achieve target rate + const submissionLoop = async () => { + while (Date.now() < endTime) { + const id = taskId++; + submitted++; + + // Fire without awaiting - track via callbacks + stressTask.run({ + input: {}, + userId: userId(`user-${id % 50}`), + idempotencyKey: idempotencyKey(`stress-${id}`), + }).then(() => { + accepted++; + }).catch(() => { + rejected++; + }); + + // Tiny delay to spread submissions (not waiting for task acceptance) + await sleep(intervalMs); + } + }; + + await submissionLoop(); + + // Wait for in-flight tasks to complete + process.stdout.write('\n'); + log('Draining in-flight tasks...'); + + const drainStart = Date.now(); + const drainTimeout = 30000; // 30 second drain timeout + + while (Date.now() - drainStart < drainTimeout) { + const stats = taskSystem.getStats(); + if (stats.tasks.inFlight === 0) break; + + // Update peaks during drain + if (stats.tasks.queued > peakQueue) peakQueue = stats.tasks.queued; + if (stats.tasks.executing > peakExecuting) peakExecuting = stats.tasks.executing; + + process.stdout.write(`\r Draining: ${stats.tasks.inFlight} in-flight (Q:${stats.tasks.queued} Ex:${stats.tasks.executing}) `); + await sleep(100); + } + + clearInterval(displayInterval); + process.stdout.write('\r' + ' '.repeat(100) + '\r'); + + const endStats = taskSystem.getStats(); + const duration = (Date.now() - startTime) / 1000; + const actualRate = Math.round(submitted / durationSec); + + const result: PhaseResult = { + name, + targetRate, + actualRate, + submitted, + accepted, + completed: endStats.tasks.totalCompleted - startCompleted, + failed: endStats.tasks.totalFailed - startFailed, + rejected, + peakQueue, + peakExecuting, + duration, + }; + + // Summary + const acceptPct = submitted > 0 ? Math.round((accepted / submitted) * 100) : 0; + const rejectPct = submitted > 0 ? Math.round((rejected / submitted) * 100) : 0; + + log(`Submitted: ${COLORS.cyan}${submitted}${COLORS.reset} at ${actualRate}/s (target: ${targetRate}/s)`); + log(`Accepted: ${COLORS.green}${accepted}${COLORS.reset} (${acceptPct}%) │ Rejected: ${COLORS.red}${rejected}${COLORS.reset} (${rejectPct}%)`); + log(`Peak: Queue ${COLORS.yellow}${peakQueue}/${CONFIG.queueSize}${COLORS.reset}, Exec ${COLORS.green}${peakExecuting}/${CONFIG.concurrency}${COLORS.reset}`); + + if (result.failed > 0) { + log(`Task failures: ${COLORS.red}${result.failed}${COLORS.reset} (retried and exhausted)`); + } + + return result; + } + + // ========== PHASE 1: WARMUP (50% capacity) ========== + const warmup = await runPhase('WARMUP - 50% Capacity', Math.floor(MAX_THROUGHPUT * 0.5), 10); + results.push(warmup); + log(`${COLORS.green}✓ System warmed up${COLORS.reset}`); + + // ========== PHASE 2: 100% CAPACITY ========== + const full = await runPhase('SATURATE - 100% Capacity', MAX_THROUGHPUT, 15); + results.push(full); + + if (full.peakQueue >= CONFIG.queueSize * 0.8) { + log(`${COLORS.green}✓ Queue saturated (${full.peakQueue}/${CONFIG.queueSize})${COLORS.reset}`); + } else { + log(`${COLORS.yellow}⚠ Queue not fully saturated (${full.peakQueue}/${CONFIG.queueSize})${COLORS.reset}`); + } + + // ========== PHASE 3: 150% CAPACITY ========== + const overload = await runPhase('OVERLOAD - 150% Capacity', Math.floor(MAX_THROUGHPUT * 1.5), 15); + results.push(overload); + + if (overload.rejected > overload.submitted * 0.2) { + log(`${COLORS.green}✓ Backpressure active - ${Math.round(overload.rejected / overload.submitted * 100)}% rejected${COLORS.reset}`); + } else { + log(`${COLORS.yellow}⚠ Expected more rejections at 150% capacity${COLORS.reset}`); + } + + // ========== PHASE 4: 200% CAPACITY ========== + const heavy = await runPhase('HEAVY - 200% Capacity', Math.floor(MAX_THROUGHPUT * 2), 15); + results.push(heavy); + + if (heavy.rejected > heavy.submitted * 0.4) { + log(`${COLORS.green}✓ Heavy backpressure - ${Math.round(heavy.rejected / heavy.submitted * 100)}% rejected${COLORS.reset}`); + } + + // ========== PHASE 5: 300% CAPACITY (EXTREME) ========== + const extreme = await runPhase('EXTREME - 300% Capacity', Math.floor(MAX_THROUGHPUT * 3), 15); + results.push(extreme); + + if (extreme.rejected > extreme.submitted * 0.5) { + log(`${COLORS.green}✓ System survived extreme load - ${Math.round(extreme.rejected / extreme.submitted * 100)}% rejected${COLORS.reset}`); + } + + // ========== PHASE 6: FAILURES UNDER LOAD ========== + const failures = await runPhase('CHAOS - 150% + 30% Failures', Math.floor(MAX_THROUGHPUT * 1.5), 15, { failRate: 0.3 }); + results.push(failures); + + const executor = taskSystem.getStats().components.executor; + if (executor.retries.succeeded > 0) { + log(`${COLORS.green}✓ Retries recovered ${executor.retries.succeeded} tasks${COLORS.reset}`); + } + + // ========== FINAL RESULTS ========== + header('FINAL RESULTS'); + + const totalSubmitted = results.reduce((s, r) => s + r.submitted, 0); + const totalAccepted = results.reduce((s, r) => s + r.accepted, 0); + const totalRejected = results.reduce((s, r) => s + r.rejected, 0); + const totalDuration = results.reduce((s, r) => s + r.duration, 0); + + const finalStats = taskSystem.getStats(); + const successRate = finalStats.tasks.totalCompleted + finalStats.tasks.totalFailed > 0 + ? (finalStats.tasks.totalCompleted / (finalStats.tasks.totalCompleted + finalStats.tasks.totalFailed) * 100) + : 0; + + console.log(` ${COLORS.bold}Duration:${COLORS.reset} ${totalDuration.toFixed(1)}s`); + console.log(` ${COLORS.bold}Submitted:${COLORS.reset} ${totalSubmitted}`); + console.log(` ${COLORS.bold}Accepted:${COLORS.reset} ${COLORS.green}${totalAccepted}${COLORS.reset} (${Math.round(totalAccepted / totalSubmitted * 100)}%)`); + console.log(` ${COLORS.bold}Rejected:${COLORS.reset} ${COLORS.red}${totalRejected}${COLORS.reset} (${Math.round(totalRejected / totalSubmitted * 100)}%)`); + console.log(` ${COLORS.bold}Completed:${COLORS.reset} ${COLORS.green}${finalStats.tasks.totalCompleted}${COLORS.reset}`); + console.log(` ${COLORS.bold}Failed:${COLORS.reset} ${COLORS.red}${finalStats.tasks.totalFailed}${COLORS.reset}`); + console.log(` ${COLORS.bold}Success Rate:${COLORS.reset} ${successRate.toFixed(1)}%`); + console.log(''); + console.log(` ${COLORS.bold}Retries:${COLORS.reset} ${executor.retries.attempted} attempted → ${executor.retries.succeeded} recovered`); + console.log(` ${COLORS.bold}Peak Queue:${COLORS.reset} ${Math.max(...results.map(r => r.peakQueue))}/${CONFIG.queueSize}`); + console.log(` ${COLORS.bold}Peak Exec:${COLORS.reset} ${Math.max(...results.map(r => r.peakExecuting))}/${CONFIG.concurrency}`); + + // Per-phase summary + header('PHASE BREAKDOWN'); + + console.log(` ${'Phase'.padEnd(28)} ${'Target'.padStart(8)} ${'Actual'.padStart(8)} ${'Accept'.padStart(8)} ${'Reject'.padStart(8)} ${'Rej%'.padStart(6)}`); + console.log(` ${'-'.repeat(75)}`); + + for (const r of results) { + const rejPct = r.submitted > 0 ? Math.round(r.rejected / r.submitted * 100) : 0; + const rejColor = rejPct > 50 ? COLORS.red : rejPct > 20 ? COLORS.yellow : COLORS.green; + console.log( + ` ${r.name.padEnd(28)} ` + + `${(r.targetRate + '/s').padStart(8)} ` + + `${(r.actualRate + '/s').padStart(8)} ` + + `${r.accepted.toString().padStart(8)} ` + + `${r.rejected.toString().padStart(8)} ` + + `${rejColor}${(rejPct + '%').padStart(6)}${COLORS.reset}` + ); + } + + // Status + header('TEST STATUS'); + + const peakQueue = Math.max(...results.map(r => r.peakQueue)); + const checks = [ + { + name: 'Queue Saturation', + pass: peakQueue >= CONFIG.queueSize * 0.95, + detail: `Peak ${peakQueue}/${CONFIG.queueSize} (${Math.round(peakQueue / CONFIG.queueSize * 100)}%)` + }, + { + name: 'Backpressure', + pass: totalRejected > totalSubmitted * 0.15, + detail: `${totalRejected} rejected (${Math.round(totalRejected / totalSubmitted * 100)}%)` + }, + { + name: 'Stability', + pass: finalStats.tasks.inFlight === 0, + detail: finalStats.tasks.inFlight === 0 ? 'All tasks drained' : `${finalStats.tasks.inFlight} still in-flight` + }, + { + name: 'Retries', + pass: executor.retries.succeeded > 0, + detail: `${executor.retries.succeeded}/${executor.retries.attempted} recovered` + }, + { + name: 'Extreme Survival', + pass: extreme.accepted > 0 && extreme.peakExecuting >= CONFIG.concurrency * 0.9, + detail: `Handled ${extreme.accepted} at 300% load` + }, + ]; + + let allPassed = true; + for (const check of checks) { + const icon = check.pass ? `${COLORS.green}✓${COLORS.reset}` : `${COLORS.red}✗${COLORS.reset}`; + console.log(` ${icon} ${check.name}: ${check.detail}`); + if (!check.pass) allPassed = false; + } + + console.log(''); + if (allPassed) { + console.log(` ${COLORS.green}${COLORS.bold}STRESS TEST PASSED${COLORS.reset}\n`); + } else { + console.log(` ${COLORS.yellow}${COLORS.bold}STRESS TEST COMPLETED WITH WARNINGS${COLORS.reset}\n`); + } + + await taskSystem.shutdown({ deleteFiles: true }); + fs.rmSync(demoDir, { recursive: true, force: true }); +} + +main().catch(err => { + console.error('Stress test error:', err); + process.exit(1); +}); From 282287a2f66e7eb0ae1ec16965a9d53288838483 Mon Sep 17 00:00:00 2001 From: ditadi Date: Mon, 2 Feb 2026 22:43:05 +0000 Subject: [PATCH 13/13] fix(taskflow): add recently-completed cache to prevent duplicates --- packages/taskflow/src/execution/recovery.ts | 56 +++++++++++++++++-- packages/taskflow/src/execution/system.ts | 36 ++++++++++++ packages/taskflow/src/guard/backpressure.ts | 11 +++- packages/taskflow/src/guard/guard.ts | 5 +- packages/taskflow/src/guard/types.ts | 3 + .../taskflow/src/persistence/event-log.ts | 2 + 6 files changed, 102 insertions(+), 11 deletions(-) diff --git a/packages/taskflow/src/execution/recovery.ts b/packages/taskflow/src/execution/recovery.ts index 65a9443c..c5efb2e9 100644 --- a/packages/taskflow/src/execution/recovery.ts +++ b/packages/taskflow/src/execution/recovery.ts @@ -42,6 +42,8 @@ export interface TaskRecoveryDeps { executor: TaskExecutor; /** function to get task definition by name */ getDefinition: (taskName: string) => TaskDefinition | undefined; + /** event log for persisting recovery events */ + appendEvent: (event: TaskEvent) => void; } /** @@ -130,7 +132,15 @@ export class TaskRecovery { this.deps.guard.acquireRecoverySlot(); try { - for await (const event of this.recoverStaleTask(task)) { + // stream existing events from DB + for await (const event of this.streamFromDB(task)) { + this.deps.streamManager.push(task.idempotencyKey, event); + } + + // run recovery handler + for await (const event of this.runRecoveryHandler(task)) { + // persist new events to event log to ensure durability + this.deps.appendEvent(event); this.deps.streamManager.push(task.idempotencyKey, event); } this.backgroundTasksRecovered++; @@ -171,7 +181,15 @@ export class TaskRecovery { */ async *recoverUserTask(task: Task): AsyncGenerator { try { - for await (const event of this.recoverStaleTask(task)) { + // stream existing events from DB + for await (const event of this.streamFromDB(task)) { + yield event; + } + + // run recovery handler + for await (const event of this.runRecoveryHandler(task)) { + // persist new events to event log to ensure durability + this.deps.appendEvent(event); yield event; } this.userTasksRecovered++; @@ -190,6 +208,23 @@ export class TaskRecovery { */ async *recoverStaleTask( task: Task, + ): AsyncGenerator { + // yield events from db + for await (const event of this.streamFromDB(task)) { + yield event; + } + + // run recovery handler + for await (const event of this.runRecoveryHandler(task)) { + yield event; + } + } + + /** + * Run recovery handler and yield only NEW events + */ + async *runRecoveryHandler( + task: Task, ): AsyncGenerator { const definition = this.deps.getDefinition(task.name); @@ -201,11 +236,10 @@ export class TaskRecovery { ); } - // stream previous events from database + // load previous events from db const previousEvents: TaskEvent[] = []; for await (const event of this.streamFromDB(task)) { previousEvents.push(event); - yield event; } // create event context @@ -258,8 +292,18 @@ export class TaskRecovery { // yield events from recovery/re-execution if (isAsyncGenerator(result)) { - for await (const event of result) { - yield this.enrichEvent(event, context); + // iterate and capture the return value + let iterResult = await result.next(); + while (!iterResult.done) { + yield this.enrichEvent(iterResult.value, context); + iterResult = await result.next(); + } + // generator returned a value - emit complete event + if (iterResult.value !== undefined) { + yield this.enrichEvent( + { type: "complete", result: iterResult.value }, + context, + ); } } else { const value = await result; diff --git a/packages/taskflow/src/execution/system.ts b/packages/taskflow/src/execution/system.ts index 15874451..30909527 100644 --- a/packages/taskflow/src/execution/system.ts +++ b/packages/taskflow/src/execution/system.ts @@ -94,6 +94,8 @@ export class TaskSystem { // queues private readonly pendingQueue: Map = new Map(); private readonly runningTasks: Map = new Map(); + private readonly recentlyCompletedTasks: Map = + new Map(); // executor tick private executorInterval: ReturnType | null = null; @@ -115,9 +117,17 @@ export class TaskSystem { // initialize components this.eventLog = new EventLog(this.config.eventLog ?? {}, hooks); + + // ensure flush uses the same event log file as the event log + const flushEventLogPath = + this.config.flush?.eventLogPath ?? + this.config.eventLog?.eventLogPath ?? + "./.taskflow/event.log"; + this.flush = new Flush( { ...this.config.flush, + eventLogPath: flushEventLogPath, repository: this.config.repository ?? { type: "sqlite", database: "./.taskflow/sqlite.db", @@ -165,6 +175,7 @@ export class TaskSystem { streamManager: this.streamManager, executor: this.executor, getDefinition: (name) => this.definitions.get(name), + appendEvent: (event) => this.eventLog.appendEvent(event), }, this.hooks, ); @@ -271,6 +282,7 @@ export class TaskSystem { } this.runningTasks.clear(); + this.recentlyCompletedTasks.clear(); this.streamManager.clearAll(); // sync event log to disk before shutting down flush (drain remaining events) @@ -406,6 +418,14 @@ export class TaskSystem { return this.attachStream(pendingTask, taskIdempotencyKey); } + // check recently completed tasks (not yet flushed to DB) + const recentlyCompleted = + this.recentlyCompletedTasks.get(taskIdempotencyKey); + if (recentlyCompleted) { + this.streamManager.getOrCreate(taskIdempotencyKey); + return this.attachStream(recentlyCompleted, taskIdempotencyKey); + } + // check database for recovery - client-side retry if (params.idempotencyKey) { this.streamManager.getOrCreate(taskIdempotencyKey); @@ -459,6 +479,10 @@ export class TaskSystem { timestamp: Date.now(), }; + // write to event log first to ensure durability + await this.eventLog.appendEvent(createdEvent); + + // notify stream subscribers after event log to ensure order this.streamManager.push(taskIdempotencyKey, createdEvent); // add to pending queue @@ -602,6 +626,18 @@ export class TaskSystem { this.guard.releaseExecutionSlot(task); this.runningTasks.delete(task.idempotencyKey); this.streamManager.close(task.idempotencyKey); + + // add to recently completed tasks to prevent duplicates before flush + this.recentlyCompletedTasks.set(task.idempotencyKey, task); + + // cleanup after a delay to ensure DB has the record before we remove from memory + const cleanupDelay = Math.max( + (this.config.flush?.flushIntervalMs ?? 1000) * 3, + 5000, + ); + setTimeout(() => { + this.recentlyCompletedTasks.delete(task.idempotencyKey); + }, cleanupDelay).unref(); } /** diff --git a/packages/taskflow/src/guard/backpressure.ts b/packages/taskflow/src/guard/backpressure.ts index 50f623bc..20c40403 100644 --- a/packages/taskflow/src/guard/backpressure.ts +++ b/packages/taskflow/src/guard/backpressure.ts @@ -166,20 +166,25 @@ export class Backpressure { const startTime = Date.now(); const pollIntervalMs = 50; - while (Date.now() - startTime < timeoutMs) { + // try once, and if not accepted, wait and retry + do { try { this.accept(task, isInDLQ); return; // task accepted successfully } catch (error) { if (error instanceof BackpressureError) { + // if timeout is 0, reject immediately without waiting + if (timeoutMs === 0) { + throw error; + } // queue full or rate limited - wait and retry await new Promise((resolve) => setTimeout(resolve, pollIntervalMs)); continue; } - // other errors (e.g., validation error for DLQ) - rethrow + // other errors - rethrow throw error; } - } + } while (Date.now() - startTime < timeoutMs); // keep trying until timeout or accepted // timeout - throw the backpressure error this.trackRejection("queue_full", task); diff --git a/packages/taskflow/src/guard/guard.ts b/packages/taskflow/src/guard/guard.ts index 589ea9fe..d9d40072 100644 --- a/packages/taskflow/src/guard/guard.ts +++ b/packages/taskflow/src/guard/guard.ts @@ -91,15 +91,16 @@ export class Guard { * Accept a task for processing with waiting for capacity * If queue is full or rate limited, waits until capacity is available * @param task The task to accept - * @param timeoutMs Maximum time to wait for capacity (default: 30s) + * @param timeoutMs Maximum time to wait for capacity (uses config default) * @throws {ValidationError} if task is in DLQ * @throws {BackpressureError} if timeout reached while waiting */ async acceptTaskWithWait(task: Task, timeoutMs?: number): Promise { + const timeout = timeoutMs ?? this.config.backpressure.queueWaitTimeoutMs; await this.backpressure.acceptWithWait( task, this.dlq.has(task.idempotencyKey), - timeoutMs, + timeout, ); } diff --git a/packages/taskflow/src/guard/types.ts b/packages/taskflow/src/guard/types.ts index 4b204863..e2e20420 100644 --- a/packages/taskflow/src/guard/types.ts +++ b/packages/taskflow/src/guard/types.ts @@ -13,6 +13,8 @@ export interface BackpressureConfig { maxTasksPerUserWindow: number; /** Maximum tasks that can be queued globally */ maxQueuedSize: number; + /** Timeout for waiting for queue capacity (0 = immediate rejection) */ + queueWaitTimeoutMs: number; } /** @@ -237,6 +239,7 @@ export const DEFAULT_GUARD_CONFIG: GuardConfig = { maxTasksPerWindow: 5000, maxTasksPerUserWindow: 200, maxQueuedSize: 1000, + queueWaitTimeoutMs: 30_000, // 30 seconds (0 = immediate rejection) }, slots: { maxExecutionGlobal: 100, diff --git a/packages/taskflow/src/persistence/event-log.ts b/packages/taskflow/src/persistence/event-log.ts index a548a3d6..55f2e21b 100644 --- a/packages/taskflow/src/persistence/event-log.ts +++ b/packages/taskflow/src/persistence/event-log.ts @@ -830,6 +830,8 @@ export class EventLog { private async saveCheckpoint(): Promise { const seqFilePath = `${this.config.eventLogPath}.checkpoint`; + const dir = path.dirname(seqFilePath); + await fs.mkdir(dir, { recursive: true }); await fs.writeFile(seqFilePath, this.currentSeq.toString(), "utf8"); }