diff --git a/bin/stasis.js b/bin/stasis.js index be04353..b9ef043 100755 --- a/bin/stasis.js +++ b/bin/stasis.js @@ -22,6 +22,7 @@ function usage(prefix = '') { stasis bundle create path/to/lockfile stasis bundle verify path/to/lockfile stasis advisories path/to/lockfile + stasis prune [path/to/project] `.trim()) process.exit(1) } @@ -73,6 +74,12 @@ if (command === 'run') { process.exitCode = code } else if (command === 'bundle') { usage('bundle command is not implemented yet') +} else if (command === 'prune') { + if (argv.length > 1) usage('Error: prune takes at most one path argument') + const root = argv[0] ? resolve(argv[0]) : process.cwd() + const { prune } = await import('../src/prune.js') + const { removed, validated } = prune({ root }) + console.warn(`[stasis] prune: validated ${validated.length} file(s), removed ${removed.length} file(s)`) } else { usage() } diff --git a/doc/prune.md b/doc/prune.md new file mode 100644 index 0000000..cac31a3 --- /dev/null +++ b/doc/prune.md @@ -0,0 +1,24 @@ +# `stasis prune` + +`stasis prune` (also exported as `@exodus/stasis/prune`) constrains an +installed `node_modules` tree to the files recorded in +`stasis.lock.json`. + +```json +// package.json +{ "scripts": { "postinstall": "stasis prune" } } +``` + +`prune` walks `node_modules`, keeps and verifies (sha512) every file +listed in the lockfile, keeps unverified `package.json` files in +directories the lockfile recognises as a module, prunes everything else +(including `package.json` files under directories the lockfile doesn't +list), and fails if a lockfile-listed file is missing on disk. The walk +planning and disk mutation are separated: any error aborts before a +single `unlink` runs. + +`prune` also rejects up front if pnpm's `enableGlobalVirtualStore` is +turned on (checked via the `npm_config_enable_global_virtual_store` env +var pnpm exports). With the global virtual store enabled `node_modules` +is dominated by symlinks into a shared store, which `prune` skips and +which makes the lockfile-vs-disk comparison meaningless. diff --git a/package.json b/package.json index 70c6091..4fa3d7a 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,8 @@ "./webpack": "./src/webpack.js", "./loader": "./src/loader.js", "./bundle": "./src/bundle.js", - "./lockfile": "./src/lockfile.js" + "./lockfile": "./src/lockfile.js", + "./prune": "./src/prune.js" }, "engines": { "node": ">=24.14.0" diff --git a/src/prune.js b/src/prune.js new file mode 100644 index 0000000..fba009c --- /dev/null +++ b/src/prune.js @@ -0,0 +1,162 @@ +import assert from 'node:assert/strict' +import { + existsSync, + readFileSync, + readdirSync, + rmdirSync, + unlinkSync, +} from 'node:fs' +import { basename, dirname, join, relative, resolve, sep } from 'node:path' + +import { Lockfile } from './lockfile.js' +import { sha512integrity } from './state.util.js' // also runs the posix-sep assertion + +const LOCKFILE = 'stasis.lock.json' + +function assertGlobalVirtualStoreDisabled() { + // pnpm exports its settings as `npm_config_*` env vars to hooks and + // lifecycle scripts. When the global virtual store is enabled, node_modules + // is dominated by symlinks into a shared store; pruning would walk past + // most files (we skip symlinks) and the lockfile-vs-disk comparison stops + // being meaningful. Reject before touching anything. + const env = process.env.npm_config_enable_global_virtual_store + if (env !== undefined && env !== 'false' && env !== '') { + throw new Error( + `stasis prune: enableGlobalVirtualStore must be false, got ${JSON.stringify(env)}`, + ) + } +} + +function loadLockfile(root) { + const path = join(root, LOCKFILE) + assert.ok(existsSync(path), `stasis prune: ${LOCKFILE} not found at ${path}`) + return Lockfile.parse(readFileSync(path, 'utf8')) +} + +function buildExpected(lockfile) { + const expected = new Map() + const knownDirs = new Set() + for (const [dir, { files }] of lockfile.modules) { + if (!dir.includes('node_modules')) continue // workspace sources, not pnpm-managed + knownDirs.add(dir) + for (const [rel, hash] of Object.entries(files)) { + expected.set(`${dir}/${rel}`, hash) + } + } + return { expected, knownDirs } +} + +function* walkFiles(nodeModules) { + if (!existsSync(nodeModules)) return + const stack = [nodeModules] + while (stack.length > 0) { + const dir = stack.pop() + for (const entry of readdirSync(dir, { withFileTypes: true })) { + const full = join(dir, entry.name) + // Skip symlinks: pnpm's flat node_modules symlinks into .pnpm; the file + // bytes are reachable via the symlink targets which we also walk + // directly when we descend into .pnpm. + if (entry.isSymbolicLink()) continue + if (entry.isDirectory()) stack.push(full) + else if (entry.isFile()) yield full + } + } +} + +function pruneEmptyDirs(dir, stopAt) { + while (dir.startsWith(stopAt) && dir !== stopAt && existsSync(dir)) { + let entries + try { + entries = readdirSync(dir) + } catch { + return + } + if (entries.length > 0) return + try { + rmdirSync(dir) + } catch { + return + } + dir = dirname(dir) + } +} + +export function prune({ root = process.cwd() } = {}) { + assertGlobalVirtualStoreDisabled() + root = resolve(root) + const nodeModules = join(root, 'node_modules') + const lockfile = loadLockfile(root) + const { expected, knownDirs } = buildExpected(lockfile) + + // Plan first, mutate later: walk the tree, validate every tracked file, + // collect the deletion list, and verify nothing in the lockfile is + // missing. Bail with a thrown error before touching disk on any failure. + const toRemove = [] + const validated = [] + const kept = [] + const seen = new Set() + const mismatches = [] + + for (const full of walkFiles(nodeModules)) { + const rel = relative(root, full) + const expectedHash = expected.get(rel) + if (expectedHash !== undefined) { + seen.add(rel) + const actual = sha512integrity(readFileSync(full)) + if (actual === expectedHash) { + validated.push(rel) + kept.push(rel) + } else { + mismatches.push({ rel, expected: expectedHash, actual }) + } + continue + } + + // Package.json files are kept (without hash validation, since they may + // not be enumerated in the lockfile) only when their containing dir is + // a module recorded in the lockfile. Stray package.jsons under + // node_modules dirs that the lockfile doesn't know about get pruned + // along with the rest of those modules. + if (basename(rel) === 'package.json' && knownDirs.has(dirname(rel))) { + kept.push(rel) + continue + } + + // Defense-in-depth: the path must resolve inside node_modules (no + // symlink escape, no `..` traversal) before we queue it for deletion. + assert.ok( + full === nodeModules || full.startsWith(`${nodeModules}${sep}`), + `refusing to remove path outside node_modules: ${full}`, + ) + toRemove.push(full) + } + + const missing = [] + for (const [rel] of expected) { + if (!seen.has(rel)) missing.push(rel) + } + + if (mismatches.length > 0) { + const lines = mismatches.map(({ rel, expected: exp, actual }) => + ` ${rel}: expected ${exp}, got ${actual}`).join('\n') + throw new Error(`stasis prune: hash mismatch for ${mismatches.length} file(s):\n${lines}`) + } + if (missing.length > 0) { + const sample = missing.slice(0, 5).join(', ') + const suffix = missing.length > 5 ? `, ... (${missing.length} total)` : '' + throw new Error(`stasis prune: files listed in lockfile are missing on disk: ${sample}${suffix}`) + } + + const removed = [] + const touchedDirs = new Set() + for (const full of toRemove) { + unlinkSync(full) + removed.push(relative(root, full)) + touchedDirs.add(dirname(full)) + } + + const sortedDirs = [...touchedDirs].sort((a, b) => b.length - a.length) + for (const d of sortedDirs) pruneEmptyDirs(d, nodeModules) + + return { removed, validated, kept } +} diff --git a/tests/fixtures/prune-empty/node_modules/.gitkeep b/tests/fixtures/prune-empty/node_modules/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/fixtures/prune/node_modules/extra/index.js b/tests/fixtures/prune/node_modules/extra/index.js new file mode 100644 index 0000000..374d3ea --- /dev/null +++ b/tests/fixtures/prune/node_modules/extra/index.js @@ -0,0 +1 @@ +// stray module not in the lockfile at all, should be pruned diff --git a/tests/fixtures/prune/node_modules/extra/package.json b/tests/fixtures/prune/node_modules/extra/package.json new file mode 100644 index 0000000..d5f1776 --- /dev/null +++ b/tests/fixtures/prune/node_modules/extra/package.json @@ -0,0 +1 @@ +{ "name": "extra", "version": "0.0.0" } diff --git a/tests/fixtures/prune/node_modules/loose/index.js b/tests/fixtures/prune/node_modules/loose/index.js new file mode 100644 index 0000000..ff4d4f5 --- /dev/null +++ b/tests/fixtures/prune/node_modules/loose/index.js @@ -0,0 +1 @@ +export const y = 2 diff --git a/tests/fixtures/prune/node_modules/loose/package.json b/tests/fixtures/prune/node_modules/loose/package.json new file mode 100644 index 0000000..fd28f8a --- /dev/null +++ b/tests/fixtures/prune/node_modules/loose/package.json @@ -0,0 +1 @@ +{ "name": "loose", "version": "0.1.0" } diff --git a/tests/fixtures/prune/node_modules/orphan/nested/file.txt b/tests/fixtures/prune/node_modules/orphan/nested/file.txt new file mode 100644 index 0000000..f53f6a4 --- /dev/null +++ b/tests/fixtures/prune/node_modules/orphan/nested/file.txt @@ -0,0 +1 @@ +junk that should be pruned along with its containing directories diff --git a/tests/fixtures/prune/node_modules/widget/extra.js b/tests/fixtures/prune/node_modules/widget/extra.js new file mode 100644 index 0000000..34f5ab3 --- /dev/null +++ b/tests/fixtures/prune/node_modules/widget/extra.js @@ -0,0 +1 @@ +// not tracked by the lockfile, should be pruned diff --git a/tests/fixtures/prune/node_modules/widget/index.js b/tests/fixtures/prune/node_modules/widget/index.js new file mode 100644 index 0000000..09b76aa --- /dev/null +++ b/tests/fixtures/prune/node_modules/widget/index.js @@ -0,0 +1 @@ +export const x = 1 diff --git a/tests/fixtures/prune/node_modules/widget/package.json b/tests/fixtures/prune/node_modules/widget/package.json new file mode 100644 index 0000000..65abccf --- /dev/null +++ b/tests/fixtures/prune/node_modules/widget/package.json @@ -0,0 +1 @@ +{ "name": "widget", "version": "1.2.3" } diff --git a/tests/fixtures/prune/stasis.lock.json b/tests/fixtures/prune/stasis.lock.json new file mode 100644 index 0000000..47a3636 --- /dev/null +++ b/tests/fixtures/prune/stasis.lock.json @@ -0,0 +1,21 @@ +{ + "version": 0, + "config": { "scope": "node_modules" }, + "modules": { + "node_modules/widget": { + "name": "widget", + "version": "1.2.3", + "files": { + "index.js": "sha512-mQMA1S6FmxKUXIMwPbUbOT2VJNKWVS4RbgMdvu3RSnYrB4FK5mwM+4VuKLa7gMfS20R+LBgx0oPUPGIzjofnLw==", + "package.json": "sha512-bnVBCNPANZjkbhW5uMOrIxMO45hZv9mSOeRl82KcmaMDDVxo5D9GIXGh7OEN97l6JVoxOV6tYM0IV+Kt+MkpEA==" + } + }, + "node_modules/loose": { + "name": "loose", + "version": "0.1.0", + "files": { + "index.js": "sha512-zOU8RwbAiwyOEmLDEiAfhY9C29N6ZmDoNGTJr0vQmP1qNmCefNu4YqQP/25s8meh2L+sywEm9mDInmi3JcTgmQ==" + } + } + } +} diff --git a/tests/prune.test.js b/tests/prune.test.js new file mode 100644 index 0000000..5e73fb6 --- /dev/null +++ b/tests/prune.test.js @@ -0,0 +1,123 @@ +import { test } from 'node:test' +import { cpSync, existsSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { dirname, join } from 'node:path' +import { fileURLToPath } from 'node:url' + +import { prune } from '../src/prune.js' + +const fixtures = join(dirname(fileURLToPath(import.meta.url)), 'fixtures') +const PRUNE_FIXTURE = join(fixtures, 'prune') +const EMPTY_FIXTURE = join(fixtures, 'prune-empty') + +function stage(t, fixture) { + const root = mkdtempSync(join(tmpdir(), 'stasis-prune-')) + cpSync(fixture, root, { recursive: true }) + t.after(() => rmSync(root, { recursive: true, force: true })) + return root +} + +function withEnv(t, name, value) { + const prev = process.env[name] + if (value === undefined) delete process.env[name] + else process.env[name] = value + t.after(() => { + if (prev === undefined) delete process.env[name] + else process.env[name] = prev + }) +} + +test('keeps lockfile files, validates hashes, removes others, cleans empty dirs', (t) => { + const root = stage(t, PRUNE_FIXTURE) + + const { validated, removed, kept } = prune({ root }) + + // listed in the lockfile -> validated against the recorded hash + t.assert.ok(validated.includes('node_modules/widget/index.js')) + t.assert.ok(validated.includes('node_modules/widget/package.json')) + t.assert.ok(validated.includes('node_modules/loose/index.js')) + + // package.json under a module dir that the lockfile lists, but not in + // its `files` map -> kept unvalidated + t.assert.ok(kept.includes('node_modules/loose/package.json')) + t.assert.ok(!validated.includes('node_modules/loose/package.json')) + + // package.json under a module dir the lockfile does NOT list -> pruned + t.assert.ok(removed.includes('node_modules/extra/package.json')) + + // untracked files everywhere -> pruned + t.assert.ok(removed.includes('node_modules/widget/extra.js')) + t.assert.ok(removed.includes('node_modules/extra/index.js')) + t.assert.ok(removed.includes('node_modules/orphan/nested/file.txt')) + + t.assert.ok(existsSync(join(root, 'node_modules/widget/index.js'))) + t.assert.ok(existsSync(join(root, 'node_modules/widget/package.json'))) + t.assert.ok(existsSync(join(root, 'node_modules/loose/index.js'))) + t.assert.ok(existsSync(join(root, 'node_modules/loose/package.json'))) + t.assert.ok(!existsSync(join(root, 'node_modules/widget/extra.js'))) + t.assert.ok(!existsSync(join(root, 'node_modules/extra'))) + t.assert.ok(!existsSync(join(root, 'node_modules/orphan'))) +}) + +test('throws when a tracked file has a wrong hash', (t) => { + const root = stage(t, PRUNE_FIXTURE) + writeFileSync(join(root, 'node_modules/widget/index.js'), 'export const x = 2\n') + t.assert.throws( + () => prune({ root }), + /hash mismatch for 1 file\(s\):\n {2}node_modules\/widget\/index\.js: expected sha512-/u, + ) +}) + +test('does not touch the tree when validation fails', (t) => { + const root = stage(t, PRUNE_FIXTURE) + writeFileSync(join(root, 'node_modules/widget/index.js'), 'export const x = 2\n') + t.assert.throws(() => prune({ root })) + + // Untracked files that would have been pruned must still exist. + t.assert.ok(existsSync(join(root, 'node_modules/widget/extra.js'))) + t.assert.ok(existsSync(join(root, 'node_modules/extra/index.js'))) + t.assert.ok(existsSync(join(root, 'node_modules/orphan/nested/file.txt'))) +}) + +test('throws when a lockfile file is missing on disk', (t) => { + const root = stage(t, PRUNE_FIXTURE) + rmSync(join(root, 'node_modules/widget/index.js')) + t.assert.throws(() => prune({ root }), /missing on disk/u) +}) + +test('does not touch the tree when a tracked file is missing on disk', (t) => { + const root = stage(t, PRUNE_FIXTURE) + rmSync(join(root, 'node_modules/widget/index.js')) + t.assert.throws(() => prune({ root })) + + t.assert.ok(existsSync(join(root, 'node_modules/widget/extra.js'))) + t.assert.ok(existsSync(join(root, 'node_modules/extra/index.js'))) + t.assert.ok(existsSync(join(root, 'node_modules/orphan/nested/file.txt'))) +}) + +test('throws when stasis.lock.json is missing', (t) => { + const root = stage(t, EMPTY_FIXTURE) + t.assert.throws(() => prune({ root }), /stasis\.lock\.json not found/u) +}) + +test('throws when pnpm enableGlobalVirtualStore is enabled', (t) => { + withEnv(t, 'npm_config_enable_global_virtual_store', 'true') + const root = stage(t, PRUNE_FIXTURE) + t.assert.throws(() => prune({ root }), /enableGlobalVirtualStore must be false/u) + + // Nothing on disk should have been touched. + t.assert.ok(existsSync(join(root, 'node_modules/widget/extra.js'))) + t.assert.ok(existsSync(join(root, 'node_modules/extra/index.js'))) +}) + +test('accepts pnpm enableGlobalVirtualStore explicitly set to false', (t) => { + const root = stage(t, PRUNE_FIXTURE) + withEnv(t, 'npm_config_enable_global_virtual_store', 'false') + t.assert.doesNotThrow(() => prune({ root })) +}) + +test('accepts pnpm enableGlobalVirtualStore explicitly set to empty string', (t) => { + const root = stage(t, PRUNE_FIXTURE) + withEnv(t, 'npm_config_enable_global_virtual_store', '') + t.assert.doesNotThrow(() => prune({ root })) +})