From bb16731814a656dca6bfc5f256ef85306368ace8 Mon Sep 17 00:00:00 2001 From: John McLear Date: Sat, 9 May 2026 13:01:46 +0100 Subject: [PATCH 1/2] =?UTF-8?q?feat(7642):=20bin/compactStalePads=20?= =?UTF-8?q?=E2=80=94=20staleness-gated=20bulk=20compaction?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds bin/compactStalePads with --older-than / --keep / --dry-run. Composes listAllPads → getLastEdited → compactPad so hot pads in active timeslider use are left alone and only the cold tail is compacted. Targeting stays a CLI concern; compactPad's API surface is unchanged. Per-pad failures (including a getLastEdited fault) don't stop the run — same error-tolerance shape as compactAllPads. End-to-end test plumbs through the real /api/1.3.1/getLastEdited + compactPad endpoints to lock the adapter contract. Daily-cron variant (cleanup.compactOlderThanDays setting) deferred to a follow-up so this PR stays focused on the on-demand operator tool from the issue's primary acceptance bullet. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 1 + bin/compactStalePads.ts | 307 ++++++++++++++++++++++++++ src/tests/backend/specs/compactPad.ts | 265 ++++++++++++++++++++++ 3 files changed, 573 insertions(+) create mode 100644 bin/compactStalePads.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 8541a34b129..651a53e6d65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - Tier 1 ships in this release. Tiers 2 (manual click), 3 (auto with grace window) and 4 (autonomous in maintenance window) are designed and will land in subsequent releases. - See `doc/admin/updates.md` for full configuration. - **Pad compaction.** New `compactPad` HTTP API plus `bin/compactPad` and `bin/compactAllPads` CLIs to reclaim database space on long-lived pads with heavy edit history (issue #6194). `--keep N` retains the last N revisions; `--dry-run` previews per-pad rev counts before writing. Per-pad failures don't stop the bulk run. + - `bin/compactStalePads` (issue #7642) targets only pads not edited in the last `--older-than N` days, so hot pads in active timeslider use are left alone. Same `--keep` / `--dry-run` shape as `bin/compactAllPads`. Targeting is deliberately a CLI concern — the `compactPad` API surface stays unchanged. - **New packaging targets.** - Etherpad is now published as a **Snap** package. - **Debian (.deb)** packages are built via nfpm with a systemd unit, and a signed apt repository is published to `etherpad.org/apt`. diff --git a/bin/compactStalePads.ts b/bin/compactStalePads.ts new file mode 100644 index 00000000000..7e54edfd01e --- /dev/null +++ b/bin/compactStalePads.ts @@ -0,0 +1,307 @@ +'use strict'; + +/* + * Compact every pad on the instance that has not been edited recently. + * + * Usage: + * node bin/compactStalePads.js --older-than 90 # collapse history on pads not edited in 90 days + * node bin/compactStalePads.js --older-than 90 --keep 50 # keep last 50 revisions + * node bin/compactStalePads.js --older-than 90 --dry-run # list, don't write + * + * Composes `listAllPads` → `getLastEdited` → `compactPad`. Same shape as + * `bin/compactAllPads` (per-pad error tolerance, dry-run, tally), but + * filters by edit-recency before touching anything. Targeting which pads + * to compact is deliberately a CLI concern and not a `compactPad` API + * param — staleness changes from one run to the next, the compaction + * primitive does not. + * + * Destructive — `getEtherpad`-export anything you can't afford to lose + * before running. + * + * Issue #7642: long-lived instances accumulate cold pads whose history + * nobody is navigating any more. Hot pads should be left alone; this + * tool is the brick for reclaiming space on the cold tail. + */ +import path from 'node:path'; +import fs from 'node:fs'; +import process from 'node:process'; + +export type CompactStaleOpts = { + olderThanDays: number; + keepRevisions: number | null; + dryRun: boolean; +}; + +// Minimal interface mirroring the API endpoints the script needs. Tests +// substitute their own implementation that goes through supertest+JWT +// instead of fetch+APIKEY, so the loop logic is exercised against a real +// running server without dragging in apikey-file or fetch setup. +export type CompactStaleApi = { + listAllPads(): Promise; + getLastEdited(padId: string): Promise; + getRevisionsCount(padId: string): Promise; + compactPad(padId: string, keepRevisions: number | null): Promise; +}; + +export type CompactStaleReport = { + total: number; + stale: number; + ok: number; + failed: number; + skippedFresh: number; + totalRevsBefore: number; + totalRevsAfter: number; +}; + +export type CompactStaleLogger = { + info(msg: string): void; + error(msg: string): void; +}; + +const defaultLogger: CompactStaleLogger = { + info: (m) => console.log(m), + error: (m) => console.error(m), +}; + +const DAY_MS = 24 * 60 * 60 * 1000; + +// Pure-ish core: compose listAllPads → getLastEdited → compactPad with +// the same per-pad error tolerance + dry-run + tally as compactAllPads. +// `now` is injected so tests can pin the wall clock. +export const runCompactStale = async ( + api: CompactStaleApi, opts: CompactStaleOpts, + logger: CompactStaleLogger = defaultLogger, + now: () => number = Date.now, +): Promise => { + const cutoff = now() - opts.olderThanDays * DAY_MS; + + let padIds: string[]; + try { + padIds = await api.listAllPads(); + } catch (e: any) { + logger.error(`listAllPads failed: ${e.message ?? e}`); + return { + total: 0, stale: 0, ok: 0, failed: 1, skippedFresh: 0, + totalRevsBefore: 0, totalRevsAfter: 0, + }; + } + + if (padIds.length === 0) { + logger.info('No pads on this instance.'); + return { + total: 0, stale: 0, ok: 0, failed: 0, skippedFresh: 0, + totalRevsBefore: 0, totalRevsAfter: 0, + }; + } + + const strategy = opts.keepRevisions == null + ? 'collapse all history' + : `keep last ${opts.keepRevisions} revisions`; + logger.info( + `Found ${padIds.length} pad(s). Filter: not edited in ` + + `${opts.olderThanDays} day(s). Strategy: ${strategy}` + + `${opts.dryRun ? ' (dry run — no writes)' : ''}.`); + + const report: CompactStaleReport = { + total: padIds.length, stale: 0, ok: 0, failed: 0, skippedFresh: 0, + totalRevsBefore: 0, totalRevsAfter: 0, + }; + + // First pass: figure out which pads are actually stale. A getLastEdited + // failure on a pad is counted as a failure (we can't decide), but does + // not stop the run. + const stalePads: string[] = []; + for (const padId of padIds) { + let lastEdited: number; + try { + lastEdited = await api.getLastEdited(padId); + } catch (e: any) { + logger.error(`${padId}: getLastEdited failed: ${e.message ?? e}`); + report.failed++; + continue; + } + if (lastEdited > cutoff) { + report.skippedFresh++; + continue; + } + stalePads.push(padId); + } + report.stale = stalePads.length; + + if (stalePads.length === 0) { + logger.info( + `No stale pads (${report.skippedFresh} fresh, ${report.failed} unreadable).`); + return report; + } + + logger.info( + `${stalePads.length} stale pad(s) to process ` + + `(${report.skippedFresh} fresh skipped).`); + + for (let i = 0; i < stalePads.length; i++) { + const padId = stalePads[i]; + const idx = `[${i + 1}/${stalePads.length}]`; + + let before: number; + try { + before = await api.getRevisionsCount(padId); + } catch (e: any) { + logger.error(`${idx} ${padId}: getRevisionsCount failed: ${e.message ?? e}`); + report.failed++; + continue; + } + + if (opts.dryRun) { + logger.info(`${idx} ${padId}: ${before + 1} revision(s) — would compact`); + report.totalRevsBefore += before + 1; + continue; + } + + try { + await api.compactPad(padId, opts.keepRevisions); + } catch (e: any) { + logger.error(`${idx} ${padId}: compactPad failed: ${e.message ?? e}`); + report.failed++; + continue; + } + + let after: number | undefined; + try { after = await api.getRevisionsCount(padId); } + catch { /* main op already succeeded; post-count is informational */ } + + if (after != null) { + logger.info(`${idx} ${padId}: ${before + 1} → ${after + 1} revision(s)`); + report.totalRevsBefore += before + 1; + report.totalRevsAfter += after + 1; + } else { + logger.info(`${idx} ${padId}: compacted (post-count unavailable)`); + } + report.ok++; + } + + if (opts.dryRun) { + logger.info(''); + logger.info( + `Dry run complete. ${stalePads.length} stale pad(s), ` + + `${report.totalRevsBefore} total revision(s) — re-run ` + + 'without --dry-run to compact.'); + } else { + logger.info(''); + logger.info( + `Done. ${report.ok} pad(s) compacted, ${report.failed} failed, ` + + `${report.skippedFresh} fresh skipped. ` + + `Revisions: ${report.totalRevsBefore} → ${report.totalRevsAfter} ` + + `(reclaimed ${report.totalRevsBefore - report.totalRevsAfter}).`); + } + + return report; +}; + +export const parseArgs = (argv: string[]): CompactStaleOpts | null => { + const opts: CompactStaleOpts = { + olderThanDays: NaN, keepRevisions: null, dryRun: false, + }; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + if (a === '--dry-run') { + opts.dryRun = true; + } else if (a === '--older-than') { + const v = argv[++i]; + const n = Number(v); + if (!Number.isInteger(n) || n < 0) { + console.error(`--older-than expects a non-negative integer; got ${v}`); + return null; + } + opts.olderThanDays = n; + } else if (a === '--keep') { + const v = argv[++i]; + const n = Number(v); + if (!Number.isInteger(n) || n < 0) { + console.error(`--keep expects a non-negative integer; got ${v}`); + return null; + } + opts.keepRevisions = n; + } else { + return null; + } + } + if (!Number.isFinite(opts.olderThanDays)) { + console.error('--older-than is required'); + return null; + } + return opts; +}; + +const usage = () => { + console.error('Usage:'); + console.error(' node bin/compactStalePads.js --older-than '); + console.error(' node bin/compactStalePads.js --older-than --keep '); + console.error(' node bin/compactStalePads.js --older-than --dry-run'); + process.exit(2); +}; + +const isMain = require.main === module; +if (isMain) { + process.on('unhandledRejection', (err) => { throw err; }); + + const settings = require('ep_etherpad-lite/tests/container/loadSettings').loadSettings(); + const baseURL = `${settings.ssl ? 'https' : 'http'}://${settings.ip}:${settings.port}`; + + const apiGet = async (p: string): Promise => { + const r = await fetch(baseURL + p); + if (!r.ok) throw new Error(`HTTP ${r.status} ${r.statusText}`); + return r.json(); + }; + const apiPost = async (p: string): Promise => { + const r = await fetch(baseURL + p, {method: 'POST'}); + if (!r.ok) throw new Error(`HTTP ${r.status} ${r.statusText}`); + return r.json(); + }; + + const opts = parseArgs(process.argv.slice(2)); + if (!opts) usage(); + + const apikey = fs.readFileSync( + path.join(__dirname, '../APIKEY.txt'), {encoding: 'utf-8'}).trim(); + + // Bind the abstract API to fetch + APIKEY auth for the CLI shell. + const cliApi: CompactStaleApi = { + async listAllPads() { + const apiInfo = await apiGet('/api/'); + const apiVersion: string | undefined = apiInfo.currentVersion; + if (!apiVersion) throw new Error('No version set in API'); + (cliApi as any)._apiVersion = apiVersion; + const r = await apiGet(`/api/${apiVersion}/listAllPads?apikey=${apikey}`); + if (r.code !== 0) throw new Error(JSON.stringify(r)); + return r.data.padIDs ?? []; + }, + async getLastEdited(padId: string) { + const v = (cliApi as any)._apiVersion; + const r = await apiGet( + `/api/${v}/getLastEdited?apikey=${apikey}` + + `&padID=${encodeURIComponent(padId)}`); + if (r.code !== 0) throw new Error(JSON.stringify(r)); + return r.data.lastEdited; + }, + async getRevisionsCount(padId: string) { + const v = (cliApi as any)._apiVersion; + const r = await apiGet( + `/api/${v}/getRevisionsCount?apikey=${apikey}` + + `&padID=${encodeURIComponent(padId)}`); + if (r.code !== 0) throw new Error(JSON.stringify(r)); + return r.data.revisions; + }, + async compactPad(padId: string, keepRevisions: number | null) { + const v = (cliApi as any)._apiVersion; + const params = new URLSearchParams({apikey, padID: padId}); + if (keepRevisions != null) params.set('keepRevisions', String(keepRevisions)); + const r = await apiPost(`/api/${v}/compactPad?${params.toString()}`); + if (r.code !== 0) throw new Error(JSON.stringify(r)); + }, + }; + + (async () => { + const report = await runCompactStale(cliApi, opts!); + if (report.failed > 0) process.exit(1); + })(); +} diff --git a/src/tests/backend/specs/compactPad.ts b/src/tests/backend/specs/compactPad.ts index 96886d1ad23..c98b75f6fd7 100644 --- a/src/tests/backend/specs/compactPad.ts +++ b/src/tests/backend/specs/compactPad.ts @@ -344,4 +344,269 @@ describe(__filename, function () { assert.ok(reB.getHeadRevisionNumber() <= 1); }); }); + + // Coverage for the staleness-gated bulk loop in + // bin/compactStalePads.ts (issue #7642). Same pattern as the + // compactAllPads tests above: stub api + `now` injection so we don't + // need real wall-clock drift, plus one end-to-end run through the + // real /api/1.3.1/getLastEdited + compactPad endpoints to prove the + // CLI's adapter shape doesn't lie. + describe('runCompactStale (bin/compactStalePads loop)', function () { + // eslint-disable-next-line @typescript-eslint/no-var-requires + const {runCompactStale, parseArgs} = + require('../../../../bin/compactStalePads'); + + const silent = {info: () => {}, error: () => {}}; + const NOW = 1_700_000_000_000; + const day = 24 * 60 * 60 * 1000; + const fixedNow = () => NOW; + + type StubFails = { + list?: boolean; + lastEdited?: Set; + count?: Set; + compact?: Set; + }; + const makeApi = ( + pads: Array<{id: string, ageDays: number}>, fails: StubFails = {}, + ) => { + const counts = new Map(); + const ages = new Map(); + pads.forEach((p) => { + counts.set(p.id, 5); + ages.set(p.id, NOW - p.ageDays * day); + }); + return { + async listAllPads() { + if (fails.list) throw new Error('boom'); + return pads.map((p) => p.id); + }, + async getLastEdited(padId: string) { + if (fails.lastEdited?.has(padId)) throw new Error('lastEdited-boom'); + const t = ages.get(padId); + if (t == null) throw new Error('unknown pad'); + return t; + }, + async getRevisionsCount(padId: string) { + if (fails.count?.has(padId)) throw new Error('count-boom'); + const c = counts.get(padId); + if (c == null) throw new Error('unknown pad'); + return c; + }, + async compactPad(padId: string, keepRevisions: number | null) { + if (fails.compact?.has(padId)) throw new Error('compact-boom'); + counts.set(padId, + keepRevisions == null ? 0 : Math.min(counts.get(padId)!, keepRevisions)); + }, + }; + }; + + it('parses --older-than / --keep / --dry-run', function () { + assert.deepStrictEqual(parseArgs(['--older-than', '90']), + {olderThanDays: 90, keepRevisions: null, dryRun: false}); + assert.deepStrictEqual(parseArgs(['--older-than', '30', '--keep', '50']), + {olderThanDays: 30, keepRevisions: 50, dryRun: false}); + assert.deepStrictEqual( + parseArgs(['--older-than', '7', '--keep', '10', '--dry-run']), + {olderThanDays: 7, keepRevisions: 10, dryRun: true}); + }); + + it('rejects missing / invalid --older-than and unknown args', function () { + assert.strictEqual(parseArgs([]), null); + assert.strictEqual(parseArgs(['--keep', '10']), null); + assert.strictEqual(parseArgs(['--older-than', 'abc']), null); + assert.strictEqual(parseArgs(['--older-than', '-1']), null); + assert.strictEqual(parseArgs(['--older-than', '7', '--unknown']), null); + }); + + it('only compacts pads older than the cutoff', async function () { + const compacted: string[] = []; + const api = { + async listAllPads() { return ['fresh', 'stale-a', 'stale-b']; }, + async getLastEdited(padId: string) { + if (padId === 'fresh') return NOW - 5 * day; + return NOW - 120 * day; + }, + async getRevisionsCount() { return 3; }, + async compactPad(padId: string) { compacted.push(padId); }, + }; + const report = await runCompactStale(api, + {olderThanDays: 90, keepRevisions: null, dryRun: false}, + silent, fixedNow); + assert.strictEqual(report.total, 3); + assert.strictEqual(report.stale, 2); + assert.strictEqual(report.skippedFresh, 1); + assert.strictEqual(report.ok, 2); + assert.deepStrictEqual(compacted.sort(), ['stale-a', 'stale-b']); + }); + + it('honours --keep N for stale pads', async function () { + const seen: Array<[string, number | null]> = []; + const api = { + async listAllPads() { return ['p1', 'p2']; }, + async getLastEdited() { return NOW - 200 * day; }, + async getRevisionsCount() { return 5; }, + async compactPad(padId: string, k: number | null) { + seen.push([padId, k]); + }, + }; + const report = await runCompactStale(api, + {olderThanDays: 90, keepRevisions: 3, dryRun: false}, + silent, fixedNow); + assert.strictEqual(report.ok, 2); + assert.deepStrictEqual(seen, [['p1', 3], ['p2', 3]]); + }); + + it('--dry-run does not call compactPad on stale pads', async function () { + let compactCalls = 0; + const api = { + async listAllPads() { return ['old-1', 'old-2', 'fresh']; }, + async getLastEdited(padId: string) { + return padId === 'fresh' ? NOW - 1 * day : NOW - 365 * day; + }, + async getRevisionsCount() { return 4; }, + async compactPad() { compactCalls++; }, + }; + const report = await runCompactStale(api, + {olderThanDays: 90, keepRevisions: null, dryRun: true}, + silent, fixedNow); + assert.strictEqual(compactCalls, 0); + assert.strictEqual(report.stale, 2); + assert.strictEqual(report.skippedFresh, 1); + assert.strictEqual(report.totalRevsBefore, 10); // 2 stale × (4+1) + assert.strictEqual(report.totalRevsAfter, 0); + }); + + it('keeps going when one stale pad fails to compact', async function () { + const api = makeApi( + [{id: 'ok-1', ageDays: 100}, {id: 'broken', ageDays: 200}, + {id: 'ok-2', ageDays: 365}], + {compact: new Set(['broken'])}); + const report = await runCompactStale(api, + {olderThanDays: 90, keepRevisions: null, dryRun: false}, + silent, fixedNow); + assert.strictEqual(report.stale, 3); + assert.strictEqual(report.ok, 2); + assert.strictEqual(report.failed, 1); + }); + + it('counts a getLastEdited failure as a failure but keeps going', + async function () { + const api = makeApi( + [{id: 'a', ageDays: 100}, {id: 'unreadable', ageDays: 0}, + {id: 'b', ageDays: 200}], + {lastEdited: new Set(['unreadable'])}); + const report = await runCompactStale(api, + {olderThanDays: 90, keepRevisions: null, dryRun: false}, + silent, fixedNow); + assert.strictEqual(report.total, 3); + assert.strictEqual(report.stale, 2); + assert.strictEqual(report.ok, 2); + assert.strictEqual(report.failed, 1); + }); + + it('reports listAllPads failure without iterating', async function () { + const api = makeApi([{id: 'a', ageDays: 100}], {list: true}); + const report = await runCompactStale(api, + {olderThanDays: 90, keepRevisions: null, dryRun: false}, + silent, fixedNow); + assert.strictEqual(report.total, 0); + assert.strictEqual(report.failed, 1); + }); + + it('handles an empty instance', async function () { + const api = makeApi([]); + const report = await runCompactStale(api, + {olderThanDays: 90, keepRevisions: null, dryRun: false}, + silent, fixedNow); + assert.strictEqual(report.total, 0); + assert.strictEqual(report.stale, 0); + assert.strictEqual(report.ok, 0); + assert.strictEqual(report.failed, 0); + }); + + it('handles an instance where every pad is fresh', async function () { + const api = makeApi( + [{id: 'a', ageDays: 1}, {id: 'b', ageDays: 5}]); + const report = await runCompactStale(api, + {olderThanDays: 90, keepRevisions: null, dryRun: false}, + silent, fixedNow); + assert.strictEqual(report.stale, 0); + assert.strictEqual(report.skippedFresh, 2); + assert.strictEqual(report.ok, 0); + }); + + it('--older-than 0 treats every pad as stale', async function () { + const api = makeApi( + [{id: 'a', ageDays: 0}, {id: 'b', ageDays: 0}]); + const report = await runCompactStale(api, + {olderThanDays: 0, keepRevisions: null, dryRun: false}, + silent, fixedNow); + assert.strictEqual(report.stale, 2); + assert.strictEqual(report.ok, 2); + }); + + // Plumbs the loop through the real /api/1.3.1/getLastEdited + + // compactPad endpoints so we know the CLI's adapter shape doesn't + // lie about its contract. Two pads, both old (the test instance + // wall-clock is "now"), with --older-than 0 to force both stale. + it('end-to-end against the real HTTP handler', async function () { + const padA = common.randomString(); + const padB = common.randomString(); + const padObjA = await padManager.getPad(padA); + const padObjB = await padManager.getPad(padB); + for (let i = 0; i < 4; i++) await padObjA.appendText(`a-${i}\n`); + for (let i = 0; i < 4; i++) await padObjB.appendText(`b-${i}\n`); + const beforeA = padObjA.getHeadRevisionNumber(); + const beforeB = padObjB.getHeadRevisionNumber(); + assert.ok(beforeA >= 4 && beforeB >= 4); + + const allowed = new Set([padA, padB]); + const httpApi = { + // Scope to just the pads this test created — the test DB is + // shared across describes. + async listAllPads() { return [padA, padB]; }, + async getLastEdited(padId: string) { + const r = await agent.get( + `/api/1.3.1/getLastEdited?padID=${padId}`) + .set('authorization', await generateJWTToken()) + .expect(200); + if (r.body.code !== 0) throw new Error(JSON.stringify(r.body)); + return r.body.data.lastEdited; + }, + async getRevisionsCount(padId: string) { + const r = await agent.get( + `/api/1.3.1/getRevisionsCount?padID=${padId}`) + .set('authorization', await generateJWTToken()) + .expect(200); + if (r.body.code !== 0) throw new Error(JSON.stringify(r.body)); + return r.body.data.revisions; + }, + async compactPad(padId: string, keepRevisions: number | null) { + assert.ok(allowed.has(padId)); + const url = keepRevisions == null + ? `/api/1.3.1/compactPad?padID=${padId}` + : `/api/1.3.1/compactPad?padID=${padId}&keepRevisions=${keepRevisions}`; + const r = await agent.get(url) + .set('authorization', await generateJWTToken()) + .expect(200); + if (r.body.code !== 0) throw new Error(JSON.stringify(r.body)); + }, + }; + + // --older-than 0 → cutoff == now → both freshly-edited test pads + // are >= cutoff and considered stale. + const report = await runCompactStale(httpApi, + {olderThanDays: 0, keepRevisions: null, dryRun: false}, silent); + assert.strictEqual(report.total, 2); + assert.strictEqual(report.stale, 2); + assert.strictEqual(report.ok, 2); + assert.strictEqual(report.failed, 0); + + const reA = await padManager.getPad(padA); + const reB = await padManager.getPad(padB); + assert.ok(reA.getHeadRevisionNumber() <= 1); + assert.ok(reB.getHeadRevisionNumber() <= 1); + }); + }); }); From d19dc0ecd059639ab2b06f55a20fc78184f7d10a Mon Sep 17 00:00:00 2001 From: John McLear Date: Sat, 9 May 2026 13:09:56 +0100 Subject: [PATCH 2/2] fix(7642): TOCTOU recheck before compaction + admin CLI docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Qodo flagged two real issues: 1. Race window between staleness selection and compaction. On a long bulk run a pad could become active between first-pass filtering and compactPad, which would then kick those sessions. Added a getLastEdited recheck right before each compact call; if the pad is now fresh it's reclassified as skippedFresh rather than failed (the user did the right thing — edited it — and we bow out). 2. doc/cli.md had nothing on pad compaction at all (gap predates this PR; #6194 landed without doc updates). Added a Pad compaction section covering all three CLIs — compactPad, compactAllPads, compactStalePads — so the toolset is discoverable as a unit. Tests cover both the recheck-skip path and a recheck-failure path. Co-Authored-By: Claude Opus 4.7 (1M context) --- bin/compactStalePads.ts | 21 ++++++++++++ doc/cli.md | 39 +++++++++++++++++++++ src/tests/backend/specs/compactPad.ts | 49 +++++++++++++++++++++++++++ 3 files changed, 109 insertions(+) diff --git a/bin/compactStalePads.ts b/bin/compactStalePads.ts index 7e54edfd01e..df52cac3a35 100644 --- a/bin/compactStalePads.ts +++ b/bin/compactStalePads.ts @@ -157,6 +157,27 @@ export const runCompactStale = async ( continue; } + // Re-check staleness right before compacting. Without this the + // first-pass selection is a TOCTOU window: on a long bulk run a + // pad can become active between selection and compaction, and + // compactPad would then kick those sessions. Re-checking here + // shrinks the window to one round-trip and treats the pad as + // freshened (skipped, not failed). + let lastEditedNow: number; + try { + lastEditedNow = await api.getLastEdited(padId); + } catch (e: any) { + logger.error(`${idx} ${padId}: getLastEdited recheck failed: ${e.message ?? e}`); + report.failed++; + continue; + } + if (lastEditedNow > cutoff) { + logger.info(`${idx} ${padId}: edited during run — skipping (now fresh)`); + report.skippedFresh++; + report.stale--; + continue; + } + try { await api.compactPad(padId, opts.keepRevisions); } catch (e: any) { diff --git a/doc/cli.md b/doc/cli.md index 59f2c3ed298..aa46bb65a17 100644 --- a/doc/cli.md +++ b/doc/cli.md @@ -27,3 +27,42 @@ In this example we migrate from the old dirty db to the new rustydb engine. So w After that we need to move the data from dirty to rustydb. Therefore, we call `pnpm run --filter bin migrateDB --file1 test1.json --file2 test2.json` with these two files in our root directories. After some time the data should be copied over to the new database. + +## Pad compaction + +Long-lived pads with heavy edit history accumulate revisions in the database. Three CLIs reclaim that space, in increasing scope: + +| Tool | Targets | When to use | +| --- | --- | --- | +| `bin/compactPad.js ` | one pad | you know which pad is fat | +| `bin/compactAllPads.js` | every pad | bulk reclaim across the whole instance | +| `bin/compactStalePads.js --older-than N` | pads not edited in N days | reclaim the cold tail without touching pads still in active use | + +All three are gated on `cleanup.enabled = true` in `settings.json` and are **destructive**: history is collapsed (or trimmed). Export anything you can't afford to lose with `getEtherpad` first. + +Common flags: + +- `--keep N` — retain the last N revisions instead of collapsing all history. +- `--dry-run` — list pads and revision counts without writing. + +### Examples + +```` +# Compact a specific pad, collapsing all history. +node bin/compactPad.js my-pad + +# Keep only the last 50 revisions of one pad. +node bin/compactPad.js my-pad --keep 50 + +# Compact every pad on the instance (per-pad failures don't stop the run). +node bin/compactAllPads.js +node bin/compactAllPads.js --dry-run + +# Compact only pads not edited in the last 90 days, keeping the last 50 revisions. +node bin/compactStalePads.js --older-than 90 --keep 50 +node bin/compactStalePads.js --older-than 90 --dry-run +```` + +`bin/compactStalePads.js` is the right tool for periodic operator runs on long-lived instances — hot pads that users are still navigating in timeslider stay untouched, and only the cold tail is rewritten. Per-pad failures (including a `getLastEdited` fault) are counted but do not abort the bulk run; the exit code reflects whether anything failed. + +See the `compactPad` HTTP API in `doc/api/http_api.md` for the same primitive over the wire (issues #6194, #7642). diff --git a/src/tests/backend/specs/compactPad.ts b/src/tests/backend/specs/compactPad.ts index c98b75f6fd7..426103f1289 100644 --- a/src/tests/backend/specs/compactPad.ts +++ b/src/tests/backend/specs/compactPad.ts @@ -536,6 +536,55 @@ describe(__filename, function () { assert.strictEqual(report.ok, 0); }); + it('skips a pad that gets edited between selection and compaction', + async function () { + // Two getLastEdited calls per pad: the first-pass selection, + // and the right-before-compact recheck. We answer "old" the + // first time and "fresh" the second to simulate an edit + // landing during the run. + const calls: Record = {p1: 0, p2: 0}; + let compactCalls = 0; + const api = { + async listAllPads() { return ['p1', 'p2']; }, + async getLastEdited(padId: string) { + calls[padId]++; + if (padId === 'p1' && calls.p1 === 2) return NOW - 1 * day; + return NOW - 200 * day; + }, + async getRevisionsCount() { return 4; }, + async compactPad() { compactCalls++; }, + }; + const report = await runCompactStale(api, + {olderThanDays: 90, keepRevisions: null, dryRun: false}, + silent, fixedNow); + assert.strictEqual(report.total, 2); + assert.strictEqual(report.stale, 1, 'p1 reclassified to fresh'); + assert.strictEqual(report.skippedFresh, 1); + assert.strictEqual(report.ok, 1); + assert.strictEqual(compactCalls, 1); + }); + + it('counts a getLastEdited recheck failure as a failure', async function () { + let compactCalls = 0; + const callCount: Record = {p1: 0}; + const api = { + async listAllPads() { return ['p1']; }, + async getLastEdited(padId: string) { + callCount[padId]++; + if (callCount[padId] === 2) throw new Error('recheck-boom'); + return NOW - 200 * day; + }, + async getRevisionsCount() { return 5; }, + async compactPad() { compactCalls++; }, + }; + const report = await runCompactStale(api, + {olderThanDays: 90, keepRevisions: null, dryRun: false}, + silent, fixedNow); + assert.strictEqual(report.failed, 1); + assert.strictEqual(report.ok, 0); + assert.strictEqual(compactCalls, 0); + }); + it('--older-than 0 treats every pad as stale', async function () { const api = makeApi( [{id: 'a', ageDays: 0}, {id: 'b', ageDays: 0}]);