diff --git a/packages/agent/src/index.ts b/packages/agent/src/index.ts index 0bd90cf..22e858b 100644 --- a/packages/agent/src/index.ts +++ b/packages/agent/src/index.ts @@ -102,6 +102,53 @@ export { toolRequiresApproval, updateState, } from './lib/conversation-state.js'; +export { HooksManager } from './lib/hooks-manager.js'; +// Hooks system +export { matchesTool } from './lib/hooks-matchers.js'; +export { resolveHooks } from './lib/hooks-resolve.js'; +export { + BUILT_IN_HOOK_NAMES, + BUILT_IN_HOOKS, + PermissionRequestPayloadSchema, + PermissionRequestResultSchema, + PostToolUseFailurePayloadSchema, + PostToolUsePayloadSchema, + PreToolUsePayloadSchema, + PreToolUseResultSchema, + SessionEndPayloadSchema, + SessionStartPayloadSchema, + StopPayloadSchema, + StopResultSchema, + UserPromptSubmitPayloadSchema, + UserPromptSubmitResultSchema, +} from './lib/hooks-schemas.js'; +export type { + AsyncOutput, + BuiltInHookDefinitions, + EmitResult, + HookDefinition, + HookEntry, + HookHandler, + HookRegistry, + HookReturn, + HooksManagerOptions, + InlineHookConfig, + LifecycleHookContext, + PermissionRequestPayload, + PermissionRequestResult, + PostToolUseFailurePayload, + PostToolUsePayload, + PreToolUsePayload, + PreToolUseResult, + SessionEndPayload, + SessionStartPayload, + StopPayload, + StopResult, + ToolMatcher, + UserPromptSubmitPayload, + UserPromptSubmitResult, +} from './lib/hooks-types.js'; +export { HookName } from './lib/hooks-types.js'; export type { GetResponseOptions } from './lib/model-result.js'; export { ModelResult } from './lib/model-result.js'; // Next turn params helpers diff --git a/packages/agent/src/inner-loop/call-model.ts b/packages/agent/src/inner-loop/call-model.ts index 8e933b6..802af88 100644 --- a/packages/agent/src/inner-loop/call-model.ts +++ b/packages/agent/src/inner-loop/call-model.ts @@ -2,6 +2,7 @@ import type { OpenRouterCore } from '@openrouter/sdk/core'; import type { RequestOptions } from '@openrouter/sdk/lib/sdks'; import type { $ZodObject, $ZodShape, infer as zodInfer } from 'zod/v4/core'; import type { CallModelInput } from '../lib/async-params.js'; +import { resolveHooks } from '../lib/hooks-resolve.js'; import type { GetResponseOptions } from '../lib/model-result.js'; import { ModelResult } from '../lib/model-result.js'; import { convertToolsToAPIFormat } from '../lib/tool-executor.js'; @@ -91,6 +92,7 @@ export function callModel< sharedContextSchema, onTurnStart, onTurnEnd, + hooks, ...apiRequest } = request; @@ -152,5 +154,8 @@ export function callModel< ...(onTurnEnd !== undefined && { onTurnEnd, }), + ...(hooks !== undefined && { + hooks: resolveHooks(hooks), + }), } as GetResponseOptions); } diff --git a/packages/agent/src/lib/async-params.ts b/packages/agent/src/lib/async-params.ts index 8856346..0e4314a 100644 --- a/packages/agent/src/lib/async-params.ts +++ b/packages/agent/src/lib/async-params.ts @@ -1,5 +1,7 @@ import type * as models from '@openrouter/sdk/models'; import type { OpenResponsesResult } from '@openrouter/sdk/models'; +import type { HooksManager } from './hooks-manager.js'; +import type { InlineHookConfig } from './hooks-types.js'; import type { Item } from './item-types.js'; import type { ContextInput } from './tool-context.js'; import type { @@ -80,6 +82,8 @@ type BaseCallModelInput< * Receives the turn context and the completed response for that turn */ onTurnEnd?: (context: TurnContext, response: OpenResponsesResult) => void | Promise; + /** Hook system for lifecycle events. Accepts inline config or a HooksManager instance. */ + hooks?: InlineHookConfig | HooksManager; }; /** @@ -180,6 +184,7 @@ export async function resolveAsyncFunctions { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +/** + * Execute a chain of hook handlers sequentially. + * + * Supports: + * - ToolMatcher and filter-based skipping (matcher fails closed: a handler with + * a matcher and no `options.toolName` is skipped) + * - Sync results validated against `options.resultSchema` and collected into `results` + * - Async fire-and-forget via a returned {@link AsyncOutput} -- the handler's + * `work` promise is pushed to `pending` without being awaited; the manager is + * responsible for draining/timing out that work + * - Per-hook mutation piping (driven by {@link MUTATION_FIELD_MAP}) + * - Short-circuit on block/reject fields (non-empty string or `true`) + * - Cooperative abort via `context.signal`: the chain checks + * `signal.aborted` between handlers and bails out when set so + * `abortInflight()` has a deterministic effect on the chain itself (not just + * on handlers that happen to consult the signal). + * + * Payload isolation: the initial payload is shallow-cloned before entering the + * chain so mutation piping (via {@link MUTATION_FIELD_MAP}) doesn't mutate the + * caller's payload at the top level. This is a top-level-only guarantee: + * handlers that directly mutate a nested object (e.g. + * `payload.toolInput.foo = 'bar'`) still reach the caller's original nested + * reference. Handlers MUST return mutations via the documented result fields + * (e.g. `mutatedInput`) rather than mutating nested payload fields in place. + */ +export async function executeHandlerChain( + entries: ReadonlyArray>, + initialPayload: P, + context: LifecycleHookContext, + options: ExecuteChainOptions, +): Promise> { + const results: R[] = []; + const pending: Promise[] = []; + // Only clone when the payload is a plain mutable object. Spreading a + // primitive (e.g. a custom hook's `z.number()` payload) would silently + // produce `{}`; spreading an array reindexes it into an object. For those + // cases we pass the value through untouched so handlers see the original + // typed-`P` value. For plain objects we still clone so the chain can apply + // mutation piping without mutating the caller's payload at the top level. + // Nested fields are shared with the caller; see the function-level docstring + // for the invariant handlers are expected to respect. + let currentPayload: P = isPlainMutableObject(initialPayload) + ? ({ + ...initialPayload, + } as P) + : initialPayload; + let blocked = false; + + const blockField = BLOCK_FIELDS[options.hookName]; + const canBlock = BLOCK_HOOKS.has(options.hookName); + const mutationMap = MUTATION_FIELD_MAP[options.hookName]; + + for (let i = 0; i < entries.length; i++) { + // Cooperative abort: bail out of the chain when abortInflight() has fired. + // Checked before each handler so synchronous chains stop promptly instead + // of running to completion with only advisory notice to each handler. + if (context.signal.aborted) { + break; + } + + const entry = entries[i]; + if (!entry) { + continue; + } + + // Matcher check for tool-scoped hooks. Matchers fail closed: if a matcher + // is registered and no toolName is available for this emit, we skip the + // handler rather than invoking it globally. + if (entry.matcher !== undefined) { + if (options.toolName === undefined) { + continue; + } + if (!matchesTool(entry.matcher, options.toolName)) { + continue; + } + } + + // Filter check + if (entry.filter && !entry.filter(currentPayload)) { + continue; + } + + try { + const returnValue = await entry.handler(currentPayload, context); + + // Async fire-and-forget: the handler has returned a signal describing + // detached work. Track the (optional) work promise for drain/timeout. + if (isAsyncOutput(returnValue)) { + const asyncOutput: AsyncOutput = returnValue; + const trackedWork = trackAsyncWork(asyncOutput, options.hookName); + if (trackedWork !== undefined) { + pending.push(trackedWork); + } + continue; + } + + // Void / undefined / null -- side-effect only, continue + if (returnValue === undefined || returnValue === null) { + continue; + } + + // Validate the result against the schema if one is supplied. A failure + // here is treated like any other handler error: propagated in strict + // mode, logged otherwise. On success, use the parsed output (which may + // differ from the input for schemas with .transform() / .default() / + // .catch() / .coerce) so downstream callers see transformed values. + let result: R; + if (options.resultSchema) { + const validation = safeParse(options.resultSchema, returnValue); + if (!validation.success) { + const err = new Error( + `[HooksManager] Handler ${i} for hook "${options.hookName}" returned an invalid result: ${validation.error.message}`, + ); + if (options.throwOnHandlerError) { + throw err; + } + console.warn(err.message); + continue; + } + result = validation.data as R; + } else { + result = returnValue as R; + } + results.push(result); + + // Apply mutation piping -- only hooks listed in MUTATION_FIELD_MAP participate. + if (mutationMap) { + currentPayload = applyMutations(currentPayload, result, mutationMap); + } + + // Short-circuit on block + if (canBlock && blockField && isBlockTriggered(result, blockField)) { + blocked = true; + break; + } + } catch (error) { + if (options.throwOnHandlerError) { + throw error; + } + console.warn(`[HooksManager] Handler ${i} for hook "${options.hookName}" threw:`, error); + } + } + + return { + results, + pending, + finalPayload: currentPayload, + blocked, + }; +} + +/** + * Given an {@link AsyncOutput} signal, return a Promise that resolves + * when the handler's detached `work` settles OR the timeout fires -- whichever + * is first. Returns `undefined` if there is no work to track. + * + * Rejections of the detached `work` promise are logged as warnings. Note: + * `throwOnHandlerError` governs synchronous handler failures only -- detached + * fire-and-forget work never re-throws here, it just surfaces via the warning. + */ +function trackAsyncWork(output: AsyncOutput, hookName: string): Promise | undefined { + if (output.work === undefined) { + return undefined; + } + const timeout = output.asyncTimeout ?? DEFAULT_ASYNC_TIMEOUT; + + return new Promise((resolve) => { + let settled = false; + const finish = () => { + if (settled) { + return; + } + settled = true; + clearTimeout(timeoutId); + resolve(); + }; + + const timeoutId = setTimeout(finish, timeout); + // In Node.js, Timeout objects expose `.unref()` to remove the reference + // the timer holds on the event loop. Without this, a leaked hook whose + // `work` never settles keeps the process alive for the full + // DEFAULT_ASYNC_TIMEOUT window after the main workload has finished. + // Guarded for browsers / other environments that return a plain number + // from setTimeout and don't expose the method. + if (isUnrefable(timeoutId)) { + timeoutId.unref(); + } + + output.work?.then(finish, (error: unknown) => { + console.warn(`[HooksManager] Async work for hook "${hookName}" rejected:`, error); + finish(); + }); + }); +} + +/** + * Typeguard for the Node.js Timeout object that carries a `.unref()` method. + * Browsers and other environments return a number from setTimeout, which has + * no such method. + */ +function isUnrefable(handle: unknown): handle is { + unref: () => void; +} { + if (typeof handle !== 'object' || handle === null || !('unref' in handle)) { + return false; + } + const candidate: { + unref: unknown; + } = handle; + return typeof candidate.unref === 'function'; +} + +/** + * Apply mutation fields from a result onto the current payload. + */ +function applyMutations(payload: P, result: R, mutationMap: Record): P { + if (typeof result !== 'object' || result === null) { + return payload; + } + + let mutated = payload; + for (const [resultField, payloadField] of Object.entries(mutationMap)) { + if (resultField in result) { + const value = (result as Record)[resultField]; + if (value !== undefined) { + mutated = { + ...mutated, + [payloadField]: value, + }; + } + } + } + return mutated; +} + +/** + * Check if a result triggers a short-circuit block. + * + * A block fires when the field is `=== true` or a non-empty string. Empty + * strings are treated as "no block reason supplied" -- they do NOT trigger a + * short-circuit, which keeps emit consistent with callers that look up the + * first block reason with a truthy check. + */ +function isBlockTriggered(result: R, blockField: string): boolean { + if (typeof result !== 'object' || result === null) { + return false; + } + const value = (result as Record)[blockField]; + if (value === true) { + return true; + } + return typeof value === 'string' && value.length > 0; +} diff --git a/packages/agent/src/lib/hooks-manager.ts b/packages/agent/src/lib/hooks-manager.ts new file mode 100644 index 0000000..1c8169e --- /dev/null +++ b/packages/agent/src/lib/hooks-manager.ts @@ -0,0 +1,315 @@ +import type { $ZodType, infer as zodInfer } from 'zod/v4/core'; +import { safeParse } from 'zod/v4/core'; +import { executeHandlerChain } from './hooks-emit.js'; +import { BUILT_IN_HOOK_NAMES, BUILT_IN_HOOKS, VOID_RESULT_HOOKS } from './hooks-schemas.js'; +import type { + BuiltInHookDefinitions, + EmitResult, + HookEntry, + HookHandler, + HookRegistry, + HooksManagerOptions, + LifecycleHookContext, + ToolMatcher, +} from './hooks-types.js'; + +//#region Types + +type AllHooks = BuiltInHookDefinitions & { + [K in keyof Custom]: { + payload: zodInfer; + result: zodInfer; + }; +}; + +type PayloadOf, K extends keyof H> = H[K]['payload']; +type ResultOf, K extends keyof H> = H[K]['result']; + +interface EntryRegistration { + readonly handler: HookHandler; + readonly matcher?: ToolMatcher; + readonly filter?: (payload: P) => boolean; +} + +/** + * Typeguard: does the payload carry a string `sessionId` field? + * + * Used by `emit()` to prefer the payload's sessionId over the manager's + * stored value when they disagree (e.g., mid-session id changes). + */ +function payloadHasSessionId(payload: unknown): payload is { + sessionId: string; +} { + if (typeof payload !== 'object' || payload === null || !('sessionId' in payload)) { + return false; + } + return typeof payload.sessionId === 'string'; +} + +//#endregion + +/** + * Module-private key used by `resolveHooks` to call the internal + * untyped-registration path without exposing it as a public method on the + * class. See {@link getInternalRegistrar} below. + */ +const INTERNAL_REGISTRAR_KEY: unique symbol = Symbol('HooksManager.internalRegistrar'); + +type InternalRegistrar = (hookName: string, entry: HookEntry) => () => void; + +/** + * Typed, extensible hook system for agent lifecycle events. + * + * Supports both built-in hooks (PreToolUse, PostToolUse, etc.) and + * user-defined custom hooks with full type safety. + */ +export class HooksManager> { + private readonly entries = new Map[]>(); + private readonly pendingAsync = new Set>(); + private readonly customHooks: HookRegistry; + private readonly inflightControllers = new Set(); + private readonly throwOnHandlerError: boolean; + private sessionId = ''; + + constructor(customHooks?: Custom, options?: HooksManagerOptions) { + this.throwOnHandlerError = options?.throwOnHandlerError ?? false; + + // Validate no collisions between custom and built-in hook names. + if (customHooks) { + for (const name of Object.keys(customHooks)) { + if (BUILT_IN_HOOK_NAMES.has(name)) { + throw new Error( + `Custom hook name "${name}" collides with a built-in hook. Choose a different name.`, + ); + } + } + this.customHooks = { + ...customHooks, + }; + } else { + this.customHooks = {}; + } + } + + /** + * Set the session ID used in LifecycleHookContext for all handler invocations. + */ + setSessionId(sessionId: string): void { + this.sessionId = sessionId; + } + + /** + * Register a handler for a hook. Returns an unsubscribe function. + */ + on & string>( + hookName: K, + entry: EntryRegistration, K>, ResultOf, K>>, + ): () => void { + return this._register(hookName, entry as HookEntry); + } + + /** + * Remove a specific handler function from a hook. + * Returns true if found and removed, false otherwise. + */ + off & string>( + hookName: K, + handler: HookHandler, K>, ResultOf, K>>, + ): boolean { + const list = this.entries.get(hookName); + if (!list) { + return false; + } + + const idx = list.findIndex((e) => e.handler === handler); + if (idx === -1) { + return false; + } + + list.splice(idx, 1); + return true; + } + + /** + * Remove all handlers for a specific hook, or all handlers if no name given. + */ + removeAll & string>(hookName?: K): void { + if (hookName) { + this.entries.delete(hookName); + } else { + this.entries.clear(); + } + } + + /** + * Validate the payload (and each handler's result) against the registered + * Zod schemas, invoke matching handlers, and return results. + * + * Payload validation failure is treated according to `throwOnHandlerError`: + * strict mode re-throws, default mode logs a warning and returns an empty + * result without invoking any handlers. + */ + async emit & string>( + hookName: K, + payload: PayloadOf, K>, + emitContext?: { + toolName?: string; + }, + ): Promise, K>, PayloadOf, K>>> { + // Snapshot the entries list so mutations (on()/off()/unsubscribe) triggered + // by a handler during the chain can't shift indices mid-iteration and + // silently skip the next handler, or splice in new handlers that were + // registered after this emit began. + const list = (this.entries.get(hookName) ?? []).slice(); + + const definition = this._definitionFor(hookName); + if (definition) { + const parsed = safeParse(definition.payload, payload); + if (!parsed.success) { + const err = new Error( + `[HooksManager] Invalid payload for hook "${hookName}": ${parsed.error.message}`, + ); + if (this.throwOnHandlerError) { + throw err; + } + console.warn(err.message); + return { + results: [], + pending: [], + finalPayload: payload, + blocked: false, + }; + } + } + + const controller = new AbortController(); + this.inflightControllers.add(controller); + + // Payload sessionId takes precedence over the manager's stored value when + // available, keeping `payload.sessionId` and `ctx.sessionId` in sync even + // if the state's id changes mid-session. + const contextSessionId = payloadHasSessionId(payload) ? payload.sessionId : this.sessionId; + + const context: LifecycleHookContext = { + signal: controller.signal, + hookName, + sessionId: contextSessionId, + }; + + try { + const resultSchema = + definition && !VOID_RESULT_HOOKS.has(hookName) ? definition.result : undefined; + + const result = await executeHandlerChain( + list as ReadonlyArray< + HookEntry, K>, ResultOf, K>> + >, + payload, + context, + { + hookName, + throwOnHandlerError: this.throwOnHandlerError, + toolName: emitContext?.toolName, + resultSchema, + }, + ); + + // Track pending async work for drain(); each promise self-removes once + // it settles so the set can't grow unbounded across many emits. + for (const p of result.pending) { + this.pendingAsync.add(p); + p.finally(() => { + this.pendingAsync.delete(p); + }); + } + + return result; + } finally { + this.inflightControllers.delete(controller); + } + } + + /** + * Await all in-flight async handler work. Used for graceful shutdown. + */ + async drain(): Promise { + while (this.pendingAsync.size > 0) { + const snapshot = Array.from(this.pendingAsync); + await Promise.allSettled(snapshot); + } + } + + /** + * Abort every in-flight `emit()` by firing their context `signal`s. Handlers + * that kick off async work should observe `context.signal` to honor this. + * + * Does not remove pending async work from `pendingAsync` — callers that want + * to wait for handlers to wind down should still call `drain()` afterward. + */ + abortInflight(reason?: unknown): void { + for (const controller of this.inflightControllers) { + controller.abort(reason); + } + } + + /** + * Check if any handlers are registered for a given hook. + */ + hasHandlers(hookName: string): boolean { + const list = this.entries.get(hookName); + return list !== undefined && list.length > 0; + } + + /** + * Internal registration path used by `resolveHooks` to translate an + * InlineHookConfig object into this manager. User code should use `on()`. + */ + [INTERNAL_REGISTRAR_KEY](hookName: string, entry: HookEntry): () => void { + return this._register(hookName, entry); + } + + private _register(hookName: string, entry: HookEntry): () => void { + const list = this.entries.get(hookName) ?? []; + list.push(entry); + this.entries.set(hookName, list); + + return () => { + const current = this.entries.get(hookName); + if (!current) { + return; + } + const idx = current.indexOf(entry); + if (idx !== -1) { + current.splice(idx, 1); + } + }; + } + + private _definitionFor(hookName: string): + | { + payload: $ZodType; + result: $ZodType; + } + | undefined { + const builtIn = BUILT_IN_HOOKS[hookName]; + if (builtIn) { + return builtIn; + } + const custom = this.customHooks[hookName]; + if (custom) { + return custom; + } + return undefined; + } +} + +/** + * Internal: return the private registrar of a HooksManager instance. This + * lets `resolveHooks` register arbitrary hook names (including unknown ones + * from an inline config) without exposing a public bypass of the typed `on()`. + * + * NOT PART OF THE PUBLIC API. + */ +export function getInternalRegistrar(manager: HooksManager): InternalRegistrar { + return manager[INTERNAL_REGISTRAR_KEY].bind(manager); +} diff --git a/packages/agent/src/lib/hooks-matchers.ts b/packages/agent/src/lib/hooks-matchers.ts new file mode 100644 index 0000000..f1a196a --- /dev/null +++ b/packages/agent/src/lib/hooks-matchers.ts @@ -0,0 +1,26 @@ +import type { ToolMatcher } from './hooks-types.js'; + +/** + * Evaluate a ToolMatcher against a tool name. + * + * - `undefined` -> wildcard, matches all tools + * - `string` -> exact match + * - `RegExp` -> `.test(toolName)` + * - `function` -> arbitrary predicate + */ +export function matchesTool(matcher: ToolMatcher | undefined, toolName: string): boolean { + if (matcher === undefined) { + return true; + } + if (typeof matcher === 'string') { + return matcher === toolName; + } + if (matcher instanceof RegExp) { + // RegExps with the /g or /y flag advance `lastIndex` on every `.test()`, + // so successive emits with the same tool name alternate true/false. + // Reset so the matcher behaves statelessly regardless of flag. + matcher.lastIndex = 0; + return matcher.test(toolName); + } + return matcher(toolName); +} diff --git a/packages/agent/src/lib/hooks-resolve.ts b/packages/agent/src/lib/hooks-resolve.ts new file mode 100644 index 0000000..5fb2d9b --- /dev/null +++ b/packages/agent/src/lib/hooks-resolve.ts @@ -0,0 +1,49 @@ +import { getInternalRegistrar, HooksManager } from './hooks-manager.js'; +import { BUILT_IN_HOOK_NAMES } from './hooks-schemas.js'; +import type { HookEntry, InlineHookConfig } from './hooks-types.js'; + +/** + * Normalize a hooks option into a HooksManager instance. + * + * - `undefined` -> `undefined` (no hooks) + * - `HooksManager` -> passthrough + * - Plain object (InlineHookConfig) -> construct HooksManager, register all entries + * + * The inline config surface is typed to only accept built-in hook names, but + * a generic config coerced through `as` can smuggle typos past the compiler + * (e.g. `PretoolUse` vs `PreToolUse`) which would silently never fire. We log + * a warning for any non-built-in key and skip registration for it. Custom + * hooks must be registered through a `HooksManager` instance via `on()`. + */ +export function resolveHooks( + hooks: InlineHookConfig | HooksManager | undefined, +): HooksManager | undefined { + if (!hooks) { + return undefined; + } + + if (hooks instanceof HooksManager) { + return hooks; + } + + // Inline config -> HooksManager. We register through the internal registrar + // so we don't have to constrain `resolveHooks` to the typed `on()` surface + // and so user code can't reach this path. + const manager = new HooksManager(); + const register = getInternalRegistrar(manager); + for (const [hookName, entries] of Object.entries(hooks)) { + if (!entries || !Array.isArray(entries)) { + continue; + } + if (!BUILT_IN_HOOK_NAMES.has(hookName)) { + console.warn( + `[resolveHooks] Ignoring inline hook entry for unknown hook name "${hookName}". Inline config only supports built-in hooks; register custom hooks via a HooksManager instance.`, + ); + continue; + } + for (const entry of entries) { + register(hookName, entry as HookEntry); + } + } + return manager; +} diff --git a/packages/agent/src/lib/hooks-schemas.ts b/packages/agent/src/lib/hooks-schemas.ts new file mode 100644 index 0000000..c7fb3a8 --- /dev/null +++ b/packages/agent/src/lib/hooks-schemas.ts @@ -0,0 +1,159 @@ +import * as z4 from 'zod/v4'; +import type { HookDefinition } from './hooks-types.js'; + +//#region Payload Schemas + +export const PreToolUsePayloadSchema = z4.object({ + toolName: z4.string(), + toolInput: z4.record(z4.string(), z4.unknown()), + sessionId: z4.string(), +}); + +export const PostToolUsePayloadSchema = z4.object({ + toolName: z4.string(), + toolInput: z4.record(z4.string(), z4.unknown()), + toolOutput: z4.unknown(), + durationMs: z4.number(), + sessionId: z4.string(), +}); + +export const PostToolUseFailurePayloadSchema = z4.object({ + toolName: z4.string(), + toolInput: z4.record(z4.string(), z4.unknown()), + error: z4.unknown(), + sessionId: z4.string(), +}); + +export const StopPayloadSchema = z4.object({ + reason: z4.enum([ + 'max_turns', + ]), + sessionId: z4.string(), +}); + +export const PermissionRequestPayloadSchema = z4.object({ + toolName: z4.string(), + toolInput: z4.record(z4.string(), z4.unknown()), + riskLevel: z4.enum([ + 'low', + 'medium', + 'high', + ]), + sessionId: z4.string(), +}); + +export const UserPromptSubmitPayloadSchema = z4.object({ + prompt: z4.string(), + sessionId: z4.string(), +}); + +export const SessionStartPayloadSchema = z4.object({ + sessionId: z4.string(), + config: z4.record(z4.string(), z4.unknown()).optional(), +}); + +export const SessionEndPayloadSchema = z4.object({ + sessionId: z4.string(), + reason: z4.enum([ + 'user', + 'error', + 'max_turns', + 'complete', + ]), +}); + +//#endregion + +//#region Result Schemas + +export const PreToolUseResultSchema = z4.object({ + mutatedInput: z4.record(z4.string(), z4.unknown()).optional(), + block: z4 + .union([ + z4.boolean(), + z4.string(), + ]) + .optional(), +}); + +export const StopResultSchema = z4.object({ + forceResume: z4.boolean().optional(), + appendPrompt: z4.string().optional(), +}); + +export const PermissionRequestResultSchema = z4.object({ + decision: z4.enum([ + 'allow', + 'deny', + 'ask_user', + ]), + reason: z4.string().optional(), +}); + +export const UserPromptSubmitResultSchema = z4.object({ + mutatedPrompt: z4.string().optional(), + reject: z4 + .union([ + z4.boolean(), + z4.string(), + ]) + .optional(), +}); + +const VoidResultSchema = z4.void(); + +//#endregion + +//#region Built-in Hook Registry + +export const BUILT_IN_HOOKS: Record = { + PreToolUse: { + payload: PreToolUsePayloadSchema, + result: PreToolUseResultSchema, + }, + PostToolUse: { + payload: PostToolUsePayloadSchema, + result: VoidResultSchema, + }, + PostToolUseFailure: { + payload: PostToolUseFailurePayloadSchema, + result: VoidResultSchema, + }, + UserPromptSubmit: { + payload: UserPromptSubmitPayloadSchema, + result: UserPromptSubmitResultSchema, + }, + Stop: { + payload: StopPayloadSchema, + result: StopResultSchema, + }, + PermissionRequest: { + payload: PermissionRequestPayloadSchema, + result: PermissionRequestResultSchema, + }, + SessionStart: { + payload: SessionStartPayloadSchema, + result: VoidResultSchema, + }, + SessionEnd: { + payload: SessionEndPayloadSchema, + result: VoidResultSchema, + }, +}; + +export const BUILT_IN_HOOK_NAMES = new Set(Object.keys(BUILT_IN_HOOKS)); + +/** + * Set of built-in hook names whose result schema is `z4.void()`. These hooks + * have no meaningful result object, so the emit pipeline skips result + * validation for them (allowing handlers to return arbitrary values that are + * then collected as opaque results without complaint). + */ +export const VOID_RESULT_HOOKS = new Set([ + 'PostToolUse', + 'PostToolUseFailure', + 'SessionStart', + 'SessionEnd', +]); + +//#endregion diff --git a/packages/agent/src/lib/hooks-types.ts b/packages/agent/src/lib/hooks-types.ts new file mode 100644 index 0000000..ace25c3 --- /dev/null +++ b/packages/agent/src/lib/hooks-types.ts @@ -0,0 +1,335 @@ +import type { $ZodType } from 'zod/v4/core'; + +//#region Hook Names + +export const HookName = { + PreToolUse: 'PreToolUse', + PostToolUse: 'PostToolUse', + PostToolUseFailure: 'PostToolUseFailure', + UserPromptSubmit: 'UserPromptSubmit', + Stop: 'Stop', + PermissionRequest: 'PermissionRequest', + SessionStart: 'SessionStart', + SessionEnd: 'SessionEnd', +} as const; + +export type HookName = (typeof HookName)[keyof typeof HookName]; + +//#endregion + +//#region Core Types + +/** + * A hook definition is a pair of Zod schemas: one for the payload and one for the result. + */ +export interface HookDefinition { + readonly payload: $ZodType; + readonly result: $ZodType; +} + +/** + * A registry maps hook names to their definitions. + */ +export type HookRegistry = Record; + +/** + * Context provided to every lifecycle-hook handler invocation. + * + * - `signal` is aborted if the manager's `abortInflight()` is called while the + * emit is still running. Handlers that kick off background work via + * {@link AsyncOutput} should observe the signal for cancellation. + */ +export interface LifecycleHookContext { + readonly signal: AbortSignal; + readonly hookName: string; + readonly sessionId: string; +} + +/** + * Returned by a handler to signal fire-and-forget mode. + * + * The chain proceeds immediately without waiting for completion. Any background + * work the handler has kicked off should be attached as `work` so the manager + * can track it for `drain()` and enforce the `asyncTimeout`. + * + * The handler is expected to construct and return this object synchronously (or + * resolve to it quickly). The `work` promise is the detached work to track. + */ +export interface AsyncOutput { + readonly async: true; + /** + * Background work the manager should track for `drain()` and time out after + * `asyncTimeout` ms. Omit if there is no work to track. + */ + readonly work?: Promise; + /** Milliseconds before the async handler is aborted. Default: 30000 */ + readonly asyncTimeout?: number; +} + +const DEFAULT_ASYNC_TIMEOUT = 30_000; + +export { DEFAULT_ASYNC_TIMEOUT }; + +/** + * A handler may return a sync result, an async signal, or void. + */ +// biome-ignore lint/suspicious/noConfusingVoidType: void here allows handlers to have no return +export type HookReturn = R | AsyncOutput | void; + +/** + * A hook handler receives the validated payload and context. + */ +export type HookHandler = ( + payload: P, + context: LifecycleHookContext, +) => HookReturn | Promise>; + +/** + * Matcher for tool-scoped hooks. Filters handler invocation by tool name. + */ +export type ToolMatcher = string | RegExp | ((toolName: string) => boolean); + +/** + * An entry registered for a specific hook. + */ +export interface HookEntry { + readonly handler: HookHandler; + readonly matcher?: ToolMatcher; + readonly filter?: (payload: P) => boolean; +} + +/** + * Result of emitting a hook through the handler chain. + */ +export interface EmitResult { + /** Sync results from handlers that returned a hook-specific value. */ + readonly results: R[]; + /** Handles to detached async handler work. */ + readonly pending: Promise[]; + /** The payload after all mutation piping has been applied. */ + readonly finalPayload: P; + /** True if any handler triggered a block/reject short-circuit. */ + readonly blocked: boolean; +} + +//#endregion + +//#region Options + +export interface HooksManagerOptions { + /** + * If true, a throwing handler or a schema-validation failure stops the chain + * and propagates the error. If false (default), the error is logged as a + * warning and execution continues. + */ + readonly throwOnHandlerError?: boolean; +} + +//#endregion + +//#region Payload Types + +export interface PreToolUsePayload { + readonly toolName: string; + readonly toolInput: Record; + readonly sessionId: string; +} + +export interface PostToolUsePayload { + readonly toolName: string; + readonly toolInput: Record; + readonly toolOutput: unknown; + readonly durationMs: number; + readonly sessionId: string; +} + +export interface PostToolUseFailurePayload { + readonly toolName: string; + readonly toolInput: Record; + readonly error: unknown; + readonly sessionId: string; +} + +export interface StopPayload { + readonly reason: 'max_turns'; + readonly sessionId: string; +} + +export interface PermissionRequestPayload { + readonly toolName: string; + readonly toolInput: Record; + readonly riskLevel: 'low' | 'medium' | 'high'; + readonly sessionId: string; +} + +export interface UserPromptSubmitPayload { + readonly prompt: string; + readonly sessionId: string; +} + +export interface SessionStartPayload { + readonly sessionId: string; + readonly config: Record | undefined; +} + +export interface SessionEndPayload { + readonly sessionId: string; + readonly reason: 'user' | 'error' | 'max_turns' | 'complete'; +} + +//#endregion + +//#region Result Types + +export interface PreToolUseResult { + readonly mutatedInput?: Record; + readonly block?: boolean | string; +} + +export interface StopResult { + readonly forceResume?: boolean; + readonly appendPrompt?: string; +} + +export interface PermissionRequestResult { + readonly decision: 'allow' | 'deny' | 'ask_user'; + readonly reason?: string; +} + +export interface UserPromptSubmitResult { + readonly mutatedPrompt?: string; + readonly reject?: boolean | string; +} + +//#endregion + +//#region Built-in Hook Registry (type-level) + +export interface BuiltInHookDefinitions { + PreToolUse: { + payload: PreToolUsePayload; + result: PreToolUseResult; + }; + PostToolUse: { + payload: PostToolUsePayload; + // biome-ignore lint/suspicious/noConfusingVoidType: void signals no meaningful result + result: void; + }; + PostToolUseFailure: { + payload: PostToolUseFailurePayload; + // biome-ignore lint/suspicious/noConfusingVoidType: void signals no meaningful result + result: void; + }; + UserPromptSubmit: { + payload: UserPromptSubmitPayload; + result: UserPromptSubmitResult; + }; + Stop: { + payload: StopPayload; + result: StopResult; + }; + PermissionRequest: { + payload: PermissionRequestPayload; + result: PermissionRequestResult; + }; + SessionStart: { + payload: SessionStartPayload; + // biome-ignore lint/suspicious/noConfusingVoidType: void signals no meaningful result + result: void; + }; + SessionEnd: { + payload: SessionEndPayload; + // biome-ignore lint/suspicious/noConfusingVoidType: void signals no meaningful result + result: void; + }; +} + +//#endregion + +//#region Inline Config + +/** + * Inline hook config passed directly to callModel. + * Only supports built-in hooks. For custom hooks, use a HooksManager instance. + */ +export type InlineHookConfig = { + [K in keyof BuiltInHookDefinitions]?: HookEntry< + BuiltInHookDefinitions[K]['payload'], + BuiltInHookDefinitions[K]['result'] extends void ? void : BuiltInHookDefinitions[K]['result'] + >[]; +}; + +//#endregion + +//#region Helper Types + +/** + * Keys recognized on an {@link AsyncOutput} signal. Used by {@link isAsyncOutput} + * to distinguish a true fire-and-forget marker from a result object that + * happens to carry an `async` field. + */ +const ASYNC_OUTPUT_KEYS = new Set([ + 'async', + 'work', + 'asyncTimeout', +]); + +/** + * Checks if a value is an AsyncOutput signal. + * + * Requires `async === true` AND no foreign keys. A handler that accidentally + * returns `{ async: true, mutatedInput: {...} }` is treated as a result (so + * the mutation is not silently discarded), not as fire-and-forget. + */ +export function isAsyncOutput(value: unknown): value is AsyncOutput { + if (typeof value !== 'object' || value === null || !('async' in value)) { + return false; + } + const candidate = value as { + async: unknown; + }; + if (candidate.async !== true) { + return false; + } + for (const key of Object.keys(value)) { + if (!ASYNC_OUTPUT_KEYS.has(key)) { + return false; + } + } + return true; +} + +/** + * Mutation field mapping for payload piping, per hook name. + * + * The outer key is the hook name; the inner map is from result field name to + * the payload field it replaces. Only hooks listed here support mutation + * piping. Custom hooks opt in by supplying their own entry on the manager + * (currently only the built-in mapping is honored). + */ +export const MUTATION_FIELD_MAP: Record> = { + PreToolUse: { + mutatedInput: 'toolInput', + }, + UserPromptSubmit: { + mutatedPrompt: 'prompt', + }, +}; + +/** + * Hook names that support short-circuit blocking. + */ +export const BLOCK_HOOKS = new Set([ + 'PreToolUse', + 'UserPromptSubmit', +]); + +/** + * Result fields that trigger short-circuit. + */ +export const BLOCK_FIELDS: Record = { + PreToolUse: 'block', + UserPromptSubmit: 'reject', +}; + +//#endregion diff --git a/packages/agent/src/lib/model-result.ts b/packages/agent/src/lib/model-result.ts index c187f72..8be201c 100644 --- a/packages/agent/src/lib/model-result.ts +++ b/packages/agent/src/lib/model-result.ts @@ -16,6 +16,7 @@ import { unsentResultsToAPIFormat, updateState, } from './conversation-state.js'; +import type { HooksManager } from './hooks-manager.js'; import { applyNextTurnParamsToRequest, executeNextTurnParamsFunctions, @@ -113,6 +114,110 @@ function isEventStream(value: unknown): value is EventStream= 0; i--) { + if (isUserStringMessage(arr[i])) { + return i; + } + } + return -1; +} + +/** + * Extract a user-facing prompt string from an input (string or message array), + * and return an applier that writes a mutated prompt back into the same shape. + * + * For structured inputs we look at the LAST user message with a string + * content — this is the most common shape emitted by the SDK's own helpers + * (`normalizeInputToArray`) and matches what a handler would reasonably + * expect to mutate. + * + * Returns `{ prompt: undefined }` when no usable prompt can be extracted; the + * caller should skip the hook in that case. + */ +function extractPromptAndApplier(input: models.InputsUnion): { + prompt: string | undefined; + applyMutated: (mutated: string, original: models.InputsUnion | undefined) => models.InputsUnion; +} { + if (typeof input === 'string') { + return { + prompt: input, + applyMutated: (mutated) => mutated, + }; + } + + if (Array.isArray(input)) { + const targetIndex = findLatestUserStringIndex(input); + + if (targetIndex === -1) { + return { + prompt: undefined, + applyMutated: (_mutated, original) => original ?? input, + }; + } + + const target = input[targetIndex]; + if (!isUserStringMessage(target)) { + return { + prompt: undefined, + applyMutated: (_mutated, original) => original ?? input, + }; + } + + return { + prompt: target.content, + applyMutated: (mutated, original) => { + // Re-derive the target index from the effective base array so an + // arbitrary `original` shape lands the mutation in the correct slot + // rather than the closed-over index from the initial extraction. + const base = Array.isArray(original) ? original : input; + const idx = findLatestUserStringIndex(base); + if (idx === -1) { + return base; + } + const out = [ + ...base, + ]; + const existing = out[idx]; + if (isUserStringMessage(existing)) { + out[idx] = { + ...existing, + content: mutated, + }; + } + return out; + }, + }; + } + + return { + prompt: undefined, + applyMutated: (_mutated, original) => original ?? input, + }; +} + export interface GetResponseOptions< TTools extends readonly Tool[], TShared extends Record = Record, @@ -145,6 +250,8 @@ export interface GetResponseOptions< onTurnStart?: (context: TurnContext) => void | Promise; /** Callback invoked at the end of each tool execution turn */ onTurnEnd?: (context: TurnContext, response: models.OpenResponsesResult) => void | Promise; + /** Hook system for lifecycle events */ + hooks?: HooksManager; } /** @@ -229,8 +336,17 @@ export class ModelResult< // Context store for typed tool context (persists across turns) private contextStore: ToolContextStore | null = null; + // Hook system + private readonly hooksManager: HooksManager | undefined; + // Tracks whether SessionStart has already been emitted, so SessionEnd can be + // guarded to fire only when a matching SessionStart actually succeeded. + // Without this, an exception in initStream before SessionStart would lead to + // a dangling SessionEnd (breaking audit-log / resource-pair contracts). + private sessionStartEmitted = false; + constructor(options: GetResponseOptions) { this.options = options; + this.hooksManager = options.hooks; // Runtime validation: approval decisions require state const hasApprovalDecisions = @@ -552,6 +668,60 @@ export class ModelResult< return true; } + /** + * Inject a user-role message into the conversation state and into the + * accumulated request input, so the next turn picks it up. Used by the + * Stop hook's `appendPrompt` to nudge the model without forcing a resume. + * + * This advances observable state (messages/input change) so the next + * iteration of the execution loop is not a no-op. + */ + private async injectAppendPromptMessage(prompt: string): Promise { + const injectedMessage: models.BaseInputsUnion = { + role: 'user', + content: prompt, + } as models.BaseInputsUnion; + + if (this.currentState) { + // Mutate the in-memory state directly so loop progress is observable + // even when no StateAccessor is configured (forceResume needs state to + // change to avoid looping). Persist when an accessor is available. + const nextMessages = appendToMessages(this.currentState.messages, [ + injectedMessage, + ]); + this.currentState = updateState(this.currentState, { + messages: nextMessages, + }); + if (this.stateAccessor) { + await this.saveStateSafely(); + } + } + + if (this.resolvedRequest) { + const currentInput = this.resolvedRequest.input; + const nextInput: models.InputsUnion = Array.isArray(currentInput) + ? [ + ...currentInput, + injectedMessage, + ] + : currentInput + ? [ + { + role: 'user', + content: currentInput, + } as models.BaseInputsUnion, + injectedMessage, + ] + : [ + injectedMessage, + ]; + this.resolvedRequest = { + ...this.resolvedRequest, + input: nextInput, + }; + } + } + /** * Check if stop conditions are met. * Returns true if execution should stop. @@ -617,6 +787,316 @@ export class ModelResult< return !!tool && !hasExecuteFunction(tool); } + /** + * Shared helper: execute a single tool and emit the full Pre/Post lifecycle + * hooks around it. + * + * Every code path that ultimately calls `executeTool()` for a user-visible + * tool call funnels through here so that PreToolUse/PostToolUse/ + * PostToolUseFailure fire consistently — regardless of whether the tool was + * auto-executed, required approval, or was approved later. + * + * Return shape: + * - `parse_error`: `toolCall.arguments` was a raw JSON string the model + * failed to produce valid JSON for. The caller should use the prebuilt + * FunctionCallOutputItem and not execute the tool. No hooks fire. + * - `hook_blocked`: PreToolUse returned `block` (boolean true or a reason + * string). The caller should synthesize a denied result without invoking + * the tool. The FunctionCallOutputItem is prebuilt for convenience. + * - `execution`: The tool ran. `result` is the ToolExecutionResult. + * `effectiveToolCall` reflects any `mutatedInput` piped by PreToolUse. + */ + private async runToolWithHooks( + tool: Tool, + toolCall: ParsedToolCall, + turnContext: TurnContext, + onPreliminaryResult?: (toolCallId: string, result: unknown) => void, + ): Promise< + | { + type: 'parse_error'; + toolCall: ParsedToolCall; + errorMessage: string; + output: models.FunctionCallOutputItem; + } + | { + type: 'hook_blocked'; + toolCall: ParsedToolCall; + reason: string; + output: models.FunctionCallOutputItem; + } + | { + type: 'execution'; + effectiveToolCall: ParsedToolCall; + result: Awaited>; + } + > { + // Reject raw-string arguments before any hook fires. When the model + // produces invalid JSON, the parser leaves `toolCall.arguments` as the + // raw string; handing that to PreToolUse would either fail payload + // validation (silent no-op in non-strict mode) or deliver a malformed + // `toolInput` to handlers. Fail closed here so every execution path + // (auto-approve, manual approval, approved-on-resume) gets a consistent + // synthetic error without running the tool or firing hooks. + const rawArgs: unknown = toolCall.arguments; + if (typeof rawArgs === 'string') { + const errorMessage = + `Failed to parse tool call arguments for "${toolCall.name}": The model provided invalid JSON. ` + + `Raw arguments received: "${rawArgs}". ` + + 'Please provide valid JSON arguments for this tool call.'; + return { + type: 'parse_error', + toolCall, + errorMessage, + output: { + type: 'function_call_output' as const, + id: `output_${toolCall.id}`, + callId: toolCall.id, + output: JSON.stringify({ + error: errorMessage, + }), + }, + }; + } + + let effectiveToolCall = toolCall; + + // Emit PreToolUse hook -- can block or mutate input. + if (this.hooksManager) { + // Sentinel so downstream can tell "handler returned a replacement" apart + // from "handler didn't touch toolInput": a `??` fallback to `{}` when + // `toolCall.arguments` is null/undefined would otherwise defeat a + // reference-equality check against the original arguments, producing + // `{}` for tools that legitimately distinguish "no args" from "empty + // args". `mutationApplied` is the authoritative signal. + const originalToolInput = (toolCall.arguments ?? {}) as Record; + const preResult = await this.hooksManager.emit( + 'PreToolUse', + { + toolName: toolCall.name, + toolInput: originalToolInput, + sessionId: this.currentState?.id ?? '', + }, + { + toolName: toolCall.name, + }, + ); + + if (preResult.blocked) { + const blockResult = preResult.results.find( + (r) => r && typeof r === 'object' && 'block' in r && r.block, + ); + const reason = + blockResult && typeof blockResult.block === 'string' + ? blockResult.block + : 'Blocked by PreToolUse hook'; + return { + type: 'hook_blocked', + toolCall, + reason, + output: { + type: 'function_call_output' as const, + id: `output_${toolCall.id}`, + callId: toolCall.id, + output: JSON.stringify({ + error: reason, + }), + }, + }; + } + + // Apply mutated input when a handler actually piped a replacement. + // `mutationApplied` is derived from an explicit `mutatedInput` result + // in the handler chain; see executeHandlerChain / applyMutations. + const finalInput = preResult.finalPayload.toolInput; + const mutationApplied = finalInput !== originalToolInput; + if (mutationApplied) { + effectiveToolCall = { + ...toolCall, + arguments: finalInput, + }; + } + } + + // performance.now() gives monotonic, sub-ms precision and is immune to + // system clock jumps, unlike Date.now(). + const startTime = performance.now(); + const result = await executeTool( + tool, + effectiveToolCall, + turnContext, + onPreliminaryResult, + this.contextStore ?? undefined, + this.options.sharedContextSchema, + ); + const durationMs = performance.now() - startTime; + + // Emit PostToolUse or PostToolUseFailure. + if (this.hooksManager) { + if (result.error) { + await this.hooksManager.emit( + 'PostToolUseFailure', + { + toolName: effectiveToolCall.name, + toolInput: (effectiveToolCall.arguments ?? {}) as Record, + error: result.error, + sessionId: this.currentState?.id ?? '', + }, + { + toolName: effectiveToolCall.name, + }, + ); + } else { + await this.hooksManager.emit( + 'PostToolUse', + { + toolName: effectiveToolCall.name, + toolInput: (effectiveToolCall.arguments ?? {}) as Record, + toolOutput: result.result, + durationMs, + sessionId: this.currentState?.id ?? '', + }, + { + toolName: effectiveToolCall.name, + }, + ); + } + } + + return { + type: 'execution', + effectiveToolCall, + result, + }; + } + + /** + * Emit the PermissionRequest hook before the SDK blocks for user approval. + * + * Returns the hook's collective decision: + * - `allow`: the tool should proceed as if auto-approved (skip approval gate) + * - `deny`: the tool should NOT run; caller should produce a denied result + * - `ask_user`: fall through to the existing approval flow (the default) + * + * Last-wins when multiple handlers return conflicting decisions. + */ + private async emitPermissionRequest(toolCall: ParsedToolCall): Promise<{ + decision: 'allow' | 'deny' | 'ask_user'; + reason?: string; + }> { + if (!this.hooksManager) { + return { + decision: 'ask_user', + }; + } + + // Derive risk level from the tool's requireApproval shape: function => 'high' + // (caller actively decides), blanket true => 'medium', otherwise 'low'. + const tool = this.options.tools?.find( + (t) => isClientTool(t) && t.function.name === toolCall.name, + ); + const requireApproval = tool && isClientTool(tool) ? tool.function.requireApproval : undefined; + const riskLevel: 'low' | 'medium' | 'high' = + typeof requireApproval === 'function' ? 'high' : requireApproval === true ? 'medium' : 'low'; + + const emit = await this.hooksManager.emit( + 'PermissionRequest', + { + toolName: toolCall.name, + toolInput: (toolCall.arguments ?? {}) as Record, + riskLevel, + sessionId: this.currentState?.id ?? '', + }, + { + toolName: toolCall.name, + }, + ); + + // Last-wins: if multiple handlers disagree, the most recently registered + // handler dictates the outcome. This is documented and intentional — + // callers that want stricter semantics should register a single final + // handler (or use `throwOnHandlerError` to surface conflicts in tests). + const last = emit.results.at(-1); + if ( + last && + typeof last === 'object' && + 'decision' in last && + (last.decision === 'allow' || last.decision === 'deny' || last.decision === 'ask_user') + ) { + const out: { + decision: 'allow' | 'deny' | 'ask_user'; + reason?: string; + } = { + decision: last.decision, + }; + if ('reason' in last && typeof last.reason === 'string') { + out.reason = last.reason; + } + return out; + } + return { + decision: 'ask_user', + }; + } + + /** + * Run the UserPromptSubmit hook, supporting both string and structured + * inputs. If a handler returns a mutated prompt, the returned object + * applies the mutation back to the original input shape (string in = + * string out, message array in = message array out with the latest user + * text replaced). + * + * Throws if any handler rejects the prompt. + * + * Returns `undefined` when the hook does nothing, or when no usable prompt + * could be extracted from a structured input (handler is skipped and a + * one-time `console.warn` in dev builds explains why). + */ + private async maybeRunUserPromptSubmit(currentInput: models.InputsUnion | undefined): Promise< + | { + applyTo: (original: models.InputsUnion | undefined) => models.InputsUnion; + } + | undefined + > { + if (!this.hooksManager || currentInput === undefined) { + return undefined; + } + + const { prompt, applyMutated } = extractPromptAndApplier(currentInput); + if (prompt === undefined) { + if (process.env['NODE_ENV'] !== 'production') { + console.warn( + '[UserPromptSubmit] Could not extract a user prompt from structured input; skipping hook.', + ); + } + return undefined; + } + + const emit = await this.hooksManager.emit('UserPromptSubmit', { + prompt, + sessionId: this.currentState?.id ?? '', + }); + + if (emit.blocked) { + const rejectResult = emit.results.find( + (r) => r && typeof r === 'object' && 'reject' in r && r.reject, + ); + const reason = + rejectResult && typeof rejectResult.reject === 'string' + ? rejectResult.reject + : 'Prompt rejected by hook'; + throw new Error(reason); + } + + const mutated = emit.finalPayload.prompt; + if (mutated === prompt) { + return undefined; + } + + return { + applyTo: (original: models.InputsUnion | undefined) => applyMutated(mutated, original), + }; + } + /** * Execute tools that can auto-execute (don't require approval) in parallel. * @@ -634,15 +1114,28 @@ export class ModelResult< return null; } - const result = await executeTool( + // Route through runToolWithHooks so PreToolUse/PostToolUse fire even on + // the auto-approve path. `runToolWithHooks` also fails closed on raw + // JSON-parse failures so hooks never see a malformed payload. + const hookOutcome = await this.runToolWithHooks( tool, tc as ParsedToolCall, turnContext, - undefined, - this.contextStore ?? undefined, - this.options.sharedContextSchema, ); + if (hookOutcome.type === 'parse_error') { + this.broadcastToolResult(tc.id, { + error: hookOutcome.errorMessage, + } as InferToolOutputsUnion); + return createRejectedResult(tc.id, String(tc.name), hookOutcome.errorMessage); + } + + if (hookOutcome.type === 'hook_blocked') { + return createRejectedResult(tc.id, String(tc.name), hookOutcome.reason); + } + + const result = hookOutcome.result; + if (result.error) { return createRejectedResult(tc.id, String(tc.name), result.error.message); } @@ -700,37 +1193,87 @@ export class ModelResult< // context is handled via contextStore, not on TurnContext }; - const { requiresApproval: needsApproval, autoExecute } = await partitionToolCalls( - toolCalls as ParsedToolCall[], - this.options.tools, - turnContext, - this.requireApprovalFn ?? undefined, - ); + const { requiresApproval: needsApproval, autoExecute: autoExecuteInitial } = + await partitionToolCalls( + toolCalls as ParsedToolCall[], + this.options.tools, + turnContext, + this.requireApprovalFn ?? undefined, + ); - if (needsApproval.length === 0) { - return false; + // Seed the auto-execute list with anything that didn't need approval. The + // PermissionRequest hook may promote more calls into this bucket (allow) + // or synthesize pre-denied results (deny) below. + const autoExecute: ParsedToolCall[] = [ + ...autoExecuteInitial, + ]; + const preDeniedResults: UnsentToolResult[] = []; + const stillPending: ParsedToolCall[] = []; + + // Run the PermissionRequest hook for each tool that needs approval. + // This lets hooks short-circuit the approval flow in either direction. + if (this.hooksManager && needsApproval.length > 0) { + for (const tc of needsApproval) { + const { decision, reason } = await this.emitPermissionRequest(tc as ParsedToolCall); + if (decision === 'allow') { + autoExecute.push(tc); + } else if (decision === 'deny') { + preDeniedResults.push( + createRejectedResult( + tc.id, + String(tc.name), + reason ?? 'Denied by PermissionRequest hook', + ) as UnsentToolResult, + ); + } else { + stillPending.push(tc); + } + } + } else { + stillPending.push(...needsApproval); } - // Validate: approval requires state accessor - if (!this.stateAccessor) { - const toolNames = needsApproval.map((tc) => tc.name).join(', '); + // Validate: approval requires state accessor when any tool needs a gate + // or is being pre-denied by the hook. + if (!this.stateAccessor && (stillPending.length > 0 || preDeniedResults.length > 0)) { + const toolNames = stillPending.map((tc) => tc.name).join(', '); throw new Error( - `Tool(s) require approval but no state accessor is configured: ${toolNames}. ` + + `Tool(s) require approval but no state accessor is configured: ${toolNames || '(hook-denied tools)'}. ` + 'Provide a StateAccessor via the "state" parameter to enable approval workflows.', ); } - // Execute auto-approve tools + // Execute auto-approve tools (includes any promoted by hook "allow"). const unsentResults = await this.executeAutoApproveTools(autoExecute, turnContext); - // Save state with pending approvals + // Combine pre-denied results (from hook "deny") with executed results. + const combinedResults: UnsentToolResult[] = [ + ...unsentResults, + ...preDeniedResults, + ]; + + if (stillPending.length === 0) { + // Nothing needs human approval. Persist the results we already have so + // the caller's normal flow can pick them up on the next turn without + // re-executing the tools. This path can be reached even when no tool + // originally required approval (the hook said "allow" on everything) + // or when the hook denied everything. + if (this.stateAccessor && combinedResults.length > 0) { + await this.saveStateSafely({ + unsentToolResults: combinedResults, + }); + } + return false; + } + + // Save state with pending approvals (only reached when stillPending > 0). const stateUpdates: Partial, 'id' | 'createdAt' | 'updatedAt'>> = { - pendingToolCalls: needsApproval, + pendingToolCalls: stillPending, status: 'awaiting_approval', }; - if (unsentResults.length > 0) { - stateUpdates.unsentToolResults = unsentResults; + if (combinedResults.length > 0) { + stateUpdates.unsentToolResults = combinedResults; } await this.saveStateSafely(stateUpdates); @@ -758,33 +1301,6 @@ export class ModelResult< return null; } - // Check if arguments failed to parse (remained as string instead of object) - const args: unknown = toolCall.arguments; - if (typeof args === 'string') { - const rawArgs = args; - const errorMessage = - `Failed to parse tool call arguments for "${toolCall.name}": The model provided invalid JSON. ` + - `Raw arguments received: "${rawArgs}". ` + - 'Please provide valid JSON arguments for this tool call.'; - - this.broadcastToolResult(toolCall.id, { - error: errorMessage, - } as InferToolOutputsUnion); - - return { - type: 'parse_error' as const, - toolCall, - output: { - type: 'function_call_output' as const, - id: `output_${toolCall.id}`, - callId: toolCall.id, - output: JSON.stringify({ - error: errorMessage, - }), - }, - }; - } - const preliminaryResultsForCall: InferToolEventsUnion[] = []; const hasBroadcaster = this.toolEventBroadcaster || this.turnBroadcaster; @@ -796,20 +1312,31 @@ export class ModelResult< } : undefined; - const result = await executeTool( + // Run the tool through the full Pre/Post lifecycle hooks. The helper + // fails closed on a JSON-parse failure in toolCall.arguments so hooks + // never see a malformed payload; the caller below handles that case + // via the shared `parse_error` / `hook_blocked` branch. + const executed = await this.runToolWithHooks( tool, toolCall, turnContext, onPreliminaryResult, - this.contextStore ?? undefined, - this.options.sharedContextSchema, ); + if (executed.type === 'parse_error') { + this.broadcastToolResult(toolCall.id, { + error: executed.errorMessage, + } as InferToolOutputsUnion); + return executed; + } + if (executed.type === 'hook_blocked') { + return executed; + } return { type: 'execution' as const, - toolCall, + toolCall: executed.effectiveToolCall, tool, - result, + result: executed.result, preliminaryResultsForCall, }; }); @@ -828,6 +1355,7 @@ export class ModelResult< const errorMessage = settled.reason instanceof Error ? settled.reason.message : String(settled.reason); + // `runToolWithHooks` is the single point of emission for PostToolUseFailure. this.broadcastToolResult(originalToolCall.id, { error: errorMessage, } as InferToolOutputsUnion); @@ -854,7 +1382,7 @@ export class ModelResult< continue; } - if (value.type === 'parse_error') { + if (value.type === 'parse_error' || value.type === 'hook_blocked') { toolResults.push(value.output); this.turnBroadcaster?.push({ type: 'tool.call_output' as const, @@ -1204,6 +1732,48 @@ export class ModelResult< // Resolve any async functions first let baseRequest = await this.resolveRequestForContext(initialContext); + // Emit SessionStart hook. The `config` payload carries a stable, small + // summary of session-level options so handlers can make routing/auditing + // decisions without the SDK having to promise more than it can deliver. + // If future session config becomes available, extend this object rather + // than introducing a new payload field. + if (this.hooksManager) { + const sessionId = this.currentState?.id ?? ''; + this.hooksManager.setSessionId(sessionId); + await this.hooksManager.emit('SessionStart', { + sessionId, + config: { + hasTools: !!this.options.tools?.length, + hasApproval: + !!this.requireApprovalFn || + !!(this.options.tools ?? []).some( + (t) => + isClientTool(t) && + (t.function.requireApproval === true || + typeof t.function.requireApproval === 'function'), + ), + hasState: !!this.stateAccessor, + }, + }); + this.sessionStartEmitted = true; + } + + // Emit UserPromptSubmit hook BEFORE the stateful input-wrapping block so + // the handler sees the original user-supplied prompt string (and can + // reject or mutate it before any messages are appended). For structured + // (non-string) inputs we extract the latest user-role text content so + // handlers still get a chance to intercept; if nothing suitable is + // found we skip silently and document the limitation in the log below. + if (this.hooksManager) { + const promptResult = await this.maybeRunUserPromptSubmit(baseRequest.input); + if (promptResult) { + baseRequest = { + ...baseRequest, + input: promptResult.applyTo(baseRequest.input), + }; + } + } + // If we have state with existing messages, use those as input if ( this.currentState?.messages && @@ -1289,7 +1859,8 @@ export class ModelResult< // context is handled via contextStore, not on TurnContext }; - // Process approvals - execute the approved tools + // Process approvals - execute the approved tools. Route through + // runToolWithHooks so PreToolUse/PostToolUse fire even on this path. for (const callId of this.approvedToolCalls) { const toolCall = pendingCalls.find((tc) => tc.id === callId); if (!toolCall) { @@ -1307,15 +1878,28 @@ export class ModelResult< continue; } - const result = await executeTool( + const hookOutcome = await this.runToolWithHooks( tool, toolCall as ParsedToolCall, turnContext, - undefined, - this.contextStore ?? undefined, - this.options.sharedContextSchema, ); + if (hookOutcome.type === 'parse_error') { + this.broadcastToolResult(callId, { + error: hookOutcome.errorMessage, + } as InferToolOutputsUnion); + unsentResults.push( + createRejectedResult(callId, String(toolCall.name), hookOutcome.errorMessage), + ); + continue; + } + + if (hookOutcome.type === 'hook_blocked') { + unsentResults.push(createRejectedResult(callId, String(toolCall.name), hookOutcome.reason)); + continue; + } + + const result = hookOutcome.result; if (result.error) { unsentResults.push( createRejectedResult(callId, String(toolCall.name), result.error.message), @@ -1457,140 +2041,238 @@ export class ModelResult< // biome-ignore lint: IIFE used for lazy initialization pattern this.toolExecutionPromise = (async () => { - await this.initStream(); - - // If resuming from approval and still pending, don't continue - if (this.isResumingFromApproval && this.currentState?.status === 'awaiting_approval') { - return; - } - - // Get initial response - let currentResponse = await this.getInitialResponse(); - - // Save initial response to state - await this.saveResponseToState(currentResponse); - - // Check if tools should be executed - const hasToolCalls = currentResponse.output.some( - (item) => hasTypeProperty(item) && item.type === 'function_call', - ); - - if (!this.options.tools?.length || !hasToolCalls) { - this.finalResponse = currentResponse; - await this.markStateComplete(); - return; - } + // SessionEnd/drain must fire on every exit path (success, early return, + // approval pause, interruption, and exceptions), so wrap the body in + // try/catch/finally and track the session-end reason as we go. + // Approval pauses keep reason='complete' because the run hasn't failed — + // it's simply paused awaiting user decisions. + let sessionEndReason: 'user' | 'error' | 'max_turns' | 'complete' = 'complete'; + try { + await this.initStream(); + + // If resuming from approval and still pending, don't continue + if (this.isResumingFromApproval && this.currentState?.status === 'awaiting_approval') { + return; + } - // Extract and check tool calls - const toolCalls = extractToolCallsFromResponse(currentResponse); + // Get initial response + let currentResponse = await this.getInitialResponse(); - // Check for approval requirements - if (await this.handleApprovalCheck(toolCalls, 0, currentResponse)) { - return; // Paused for approval - } - - if (!this.hasExecutableToolCalls(toolCalls)) { - this.finalResponse = currentResponse; - await this.markStateComplete(); - return; - } + // Save initial response to state + await this.saveResponseToState(currentResponse); - // Main execution loop - let currentRound = 0; + // Check if tools should be executed + const hasToolCalls = currentResponse.output.some( + (item) => hasTypeProperty(item) && item.type === 'function_call', + ); - while (true) { - // Check for external interruption - if (await this.checkForInterruption(currentResponse)) { + if (!this.options.tools?.length || !hasToolCalls) { + this.finalResponse = currentResponse; + await this.markStateComplete(); return; } - // Check stop conditions - if (await this.shouldStopExecution()) { - break; - } + // Extract and check tool calls + const toolCalls = extractToolCallsFromResponse(currentResponse); - const currentToolCalls = extractToolCallsFromResponse(currentResponse); - if (currentToolCalls.length === 0) { - break; + // Check for approval requirements + if (await this.handleApprovalCheck(toolCalls, 0, currentResponse)) { + return; // Paused for approval } - // Check for approval requirements - if (await this.handleApprovalCheck(currentToolCalls, currentRound + 1, currentResponse)) { + if (!this.hasExecutableToolCalls(toolCalls)) { + this.finalResponse = currentResponse; + await this.markStateComplete(); return; } - if (!this.hasExecutableToolCalls(currentToolCalls)) { - break; - } + // Main execution loop + let currentRound = 0; + // Cap consecutive forceResume overrides so a misbehaving Stop hook + // cannot spin the loop forever. We pick 3 as a conservative upper bound + // -- it's enough to let a hook gather a couple of follow-up actions but + // small enough that a buggy handler fails fast with a visible warning. + const MAX_FORCE_RESUME_OVERRIDES = 3; + let forceResumeCount = 0; + + while (true) { + // Check for external interruption + if (await this.checkForInterruption(currentResponse)) { + sessionEndReason = 'user'; + return; + } - // Build turn context - const turnNumber = currentRound + 1; - const turnContext: TurnContext = { - numberOfTurns: turnNumber, - }; + // Check stop conditions + if (await this.shouldStopExecution()) { + // Emit Stop hook -- can force resume or inject prompt. + // shouldStopExecution() is driven by stopWhen conditions (default + // stepCountIs), so 'max_turns' is the semantically accurate reason. + if (this.hooksManager) { + const stopResult = await this.hooksManager.emit('Stop', { + reason: 'max_turns' as const, + sessionId: this.currentState?.id ?? '', + }); + + // Honor forceResume if ANY handler returns it, not just the last. + const shouldForceResume = stopResult.results.some( + (r) => r && typeof r === 'object' && 'forceResume' in r && r.forceResume === true, + ); + + // Collect every appendPrompt the handlers supplied (concatenated + // with newlines). appendPrompt is honored independently of + // forceResume so a handler can nudge the next turn without + // forcing a resume. + const appendParts: string[] = []; + for (const r of stopResult.results) { + if ( + r && + typeof r === 'object' && + 'appendPrompt' in r && + typeof r.appendPrompt === 'string' && + r.appendPrompt.length > 0 + ) { + appendParts.push(r.appendPrompt); + } + } + const appendPrompt = appendParts.join('\n'); - await this.options.onTurnStart?.(turnContext); + if (appendPrompt) { + await this.injectAppendPromptMessage(appendPrompt); + } - // Resolve async functions for this turn - await this.resolveAsyncFunctionsForTurn(turnContext); + if (shouldForceResume) { + if (forceResumeCount >= MAX_FORCE_RESUME_OVERRIDES) { + // Don't let the hook loop the engine forever. Log and stop. + console.warn( + `[Stop hook] forceResume honored ${MAX_FORCE_RESUME_OVERRIDES} times without new progress; stopping to prevent an infinite loop.`, + ); + sessionEndReason = 'max_turns'; + break; + } + forceResumeCount++; + // Continue the loop. If appendPrompt was supplied we already + // injected it, which advances state so the stop condition may + // no longer fire on the next iteration. + continue; + } + } + // Stop condition fired and the hook (if any) did not force resume + // -- this is a max_turns-style exit, not a natural completion. + sessionEndReason = 'max_turns'; + break; + } - // Execute tools - const toolResults = await this.executeToolRound(currentToolCalls, turnContext); + const currentToolCalls = extractToolCallsFromResponse(currentResponse); + if (currentToolCalls.length === 0) { + break; + } - // Server-tool output items are already-executed results in the - // response; collect them so toolResults presents a unified list. - const serverToolItems: ToolResultItem[] = []; - for (const item of currentResponse.output) { - if (!hasTypeProperty(item)) { - continue; + // Check for approval requirements + if (await this.handleApprovalCheck(currentToolCalls, currentRound + 1, currentResponse)) { + return; } - if ( - item.type === 'message' || - item.type === 'reasoning' || - item.type === 'function_call' - ) { - continue; + + if (!this.hasExecutableToolCalls(currentToolCalls)) { + break; } - // Everything else is a server-tool output item (web_search_call, - // image_generation_call, file_search_call, or generic - // OutputServerToolItem covering openrouter:datetime and any new - // SDK server tool types). - if (isServerToolResultItem(item)) { - serverToolItems.push(item); + + // Build turn context + const turnNumber = currentRound + 1; + const turnContext: TurnContext = { + numberOfTurns: turnNumber, + }; + + await this.options.onTurnStart?.(turnContext); + + // Resolve async functions for this turn + await this.resolveAsyncFunctionsForTurn(turnContext); + + // Execute tools + const toolResults = await this.executeToolRound(currentToolCalls, turnContext); + + // A tool round with observable progress resets the consecutive + // forceResume counter so a legitimate override earlier in the run + // does not count against a later, independent one. + if (toolResults.length > 0) { + forceResumeCount = 0; } - } - // Track execution round - this.allToolExecutionRounds.push({ - round: currentRound, - toolCalls: currentToolCalls, - response: currentResponse, - toolResults: [ - ...toolResults, - ...serverToolItems, - ], - }); + // Server-tool output items are already-executed results in the + // response; collect them so toolResults presents a unified list. + const serverToolItems: ToolResultItem[] = []; + for (const item of currentResponse.output) { + if (!hasTypeProperty(item)) { + continue; + } + if ( + item.type === 'message' || + item.type === 'reasoning' || + item.type === 'function_call' + ) { + continue; + } + // Everything else is a server-tool output item (web_search_call, + // image_generation_call, file_search_call, or generic + // OutputServerToolItem covering openrouter:datetime and any new + // SDK server tool types). + if (isServerToolResultItem(item)) { + serverToolItems.push(item); + } + } + + // Track execution round + this.allToolExecutionRounds.push({ + round: currentRound, + toolCalls: currentToolCalls, + response: currentResponse, + toolResults: [ + ...toolResults, + ...serverToolItems, + ], + }); - // Save tool results to state - await this.saveToolResultsToState(toolResults); + // Save tool results to state + await this.saveToolResultsToState(toolResults); - // Apply nextTurnParams - await this.applyNextTurnParams(currentToolCalls); + // Apply nextTurnParams + await this.applyNextTurnParams(currentToolCalls); - currentResponse = await this.makeFollowupRequest(currentResponse, toolResults, turnNumber); + currentResponse = await this.makeFollowupRequest( + currentResponse, + toolResults, + turnNumber, + ); + // A fresh response replaces the prior one -- that's new progress, + // so reset consecutive forceResume counting. + forceResumeCount = 0; - await this.options.onTurnEnd?.(turnContext, currentResponse); + await this.options.onTurnEnd?.(turnContext, currentResponse); - // Save new response to state - await this.saveResponseToState(currentResponse); + // Save new response to state + await this.saveResponseToState(currentResponse); - currentRound++; - } + currentRound++; + } - // Validate and finalize - this.validateFinalResponse(currentResponse); - this.finalResponse = currentResponse; - await this.markStateComplete(); + // Validate and finalize + this.validateFinalResponse(currentResponse); + this.finalResponse = currentResponse; + await this.markStateComplete(); + } catch (error) { + sessionEndReason = 'error'; + throw error; + } finally { + // Only emit SessionEnd if SessionStart actually succeeded. Otherwise + // initStream threw before emit, and a dangling SessionEnd would break + // handlers that pair Start/End (audit logs, acquired resources). + if (this.hooksManager && this.sessionStartEmitted) { + await this.hooksManager.emit('SessionEnd', { + sessionId: this.currentState?.id ?? '', + reason: sessionEndReason, + }); + await this.hooksManager.drain(); + } + } })(); return this.toolExecutionPromise; diff --git a/packages/agent/tests/unit/hooks-emit-adversarial.test.ts b/packages/agent/tests/unit/hooks-emit-adversarial.test.ts new file mode 100644 index 0000000..c65578c --- /dev/null +++ b/packages/agent/tests/unit/hooks-emit-adversarial.test.ts @@ -0,0 +1,717 @@ +import { describe, expect, it, vi } from 'vitest'; +import { executeHandlerChain } from '../../src/lib/hooks-emit.js'; +import type { HookEntry, LifecycleHookContext } from '../../src/lib/hooks-types.js'; + +function makeContext(hookName = 'TestHook'): LifecycleHookContext { + return { + signal: new AbortController().signal, + hookName, + sessionId: 'test-session', + }; +} + +describe('executeHandlerChain (adversarial)', () => { + describe('empty and degenerate inputs', () => { + it('returns clean result for empty entries array', async () => { + const result = await executeHandlerChain( + [], + { + v: 1, + }, + makeContext(), + { + hookName: 'Test', + throwOnHandlerError: false, + }, + ); + + expect(result.results).toEqual([]); + expect(result.pending).toEqual([]); + expect(result.blocked).toBe(false); + expect(result.finalPayload).toEqual({ + v: 1, + }); + }); + + it('skips sparse array holes (undefined entries)', async () => { + // biome-ignore lint/suspicious/noSparseArray: intentional sparse array to test hole handling + const entries = [ + , + , + { + handler: vi.fn(), + }, + ] as unknown as HookEntry[]; + + await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + expect(entries[2]!.handler).toHaveBeenCalledOnce(); + }); + }); + + describe('handler return value edge cases', () => { + it('treats null return as void (no result collected)', async () => { + const entries: HookEntry< + unknown, + { + v: number; + } + >[] = [ + { + handler: () => + null as unknown as { + v: number; + }, + }, + ]; + + const result = await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + expect(result.results).toEqual([]); + }); + + it('collects non-object primitive results without mutation crash', async () => { + const entries: HookEntry< + { + toolInput: Record; + }, + number + >[] = [ + { + handler: () => 42, + }, + { + handler: () => 99, + }, + ]; + + const result = await executeHandlerChain( + entries, + { + toolInput: { + a: 1, + }, + }, + makeContext(), + { + hookName: 'PreToolUse', + throwOnHandlerError: false, + }, + ); + + // applyMutations should be a no-op for primitives + expect(result.results).toEqual([ + 42, + 99, + ]); + expect(result.finalPayload.toolInput).toEqual({ + a: 1, + }); + }); + + it('does not trigger block on non-blocking hooks even if result has block field', async () => { + const entries: HookEntry< + unknown, + { + block: boolean; + } + >[] = [ + { + handler: () => ({ + block: true, + }), + }, + ]; + + const result = await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'PostToolUse', // Not a blocking hook + throwOnHandlerError: false, + }); + + expect(result.blocked).toBe(false); + expect(result.results).toEqual([ + { + block: true, + }, + ]); + }); + + it('block: false does not short-circuit PreToolUse', async () => { + const second = vi.fn(() => ({ + block: false, + })); + const entries: HookEntry< + { + toolInput: Record; + }, + { + block: boolean; + } + >[] = [ + { + handler: () => ({ + block: false, + }), + }, + { + handler: second, + }, + ]; + + const result = await executeHandlerChain( + entries, + { + toolInput: {}, + }, + makeContext(), + { + hookName: 'PreToolUse', + throwOnHandlerError: false, + }, + ); + + expect(result.blocked).toBe(false); + expect(second).toHaveBeenCalled(); + }); + + it('block: 0 (falsy number) does not short-circuit', async () => { + const second = vi.fn(); + const entries: HookEntry< + { + toolInput: Record; + }, + { + block: number; + } + >[] = [ + { + handler: () => ({ + block: 0, + }), + }, + { + handler: second, + }, + ]; + + const result = await executeHandlerChain( + entries, + { + toolInput: {}, + }, + makeContext(), + { + hookName: 'PreToolUse', + throwOnHandlerError: false, + }, + ); + + // 0 is neither `true` nor a string, so should not block + expect(result.blocked).toBe(false); + expect(second).toHaveBeenCalled(); + }); + + it('block: "" (empty string) does NOT short-circuit (fails closed without a reason)', async () => { + const second = vi.fn(); + const entries: HookEntry< + { + toolInput: Record; + }, + { + block: string; + } + >[] = [ + { + handler: () => ({ + block: '', + }), + }, + { + handler: second, + }, + ]; + + const result = await executeHandlerChain( + entries, + { + toolInput: {}, + }, + makeContext(), + { + hookName: 'PreToolUse', + throwOnHandlerError: false, + }, + ); + + // An empty string is not a usable block reason; model-result.ts searches + // for the first truthy `block` field to surface a reason and would fall + // through, so emit must also treat this as "no block triggered" for the + // two sides to stay consistent. + expect(result.blocked).toBe(false); + expect(second).toHaveBeenCalled(); + }); + }); + + describe('mutation piping edge cases', () => { + it('mutatedInput: undefined does not overwrite existing toolInput', async () => { + const entries: HookEntry< + { + toolInput: Record; + }, + { + mutatedInput: undefined; + } + >[] = [ + { + handler: () => ({ + mutatedInput: undefined, + }), + }, + ]; + + const result = await executeHandlerChain( + entries, + { + toolInput: { + original: true, + }, + }, + makeContext(), + { + hookName: 'PreToolUse', + throwOnHandlerError: false, + }, + ); + + expect(result.finalPayload.toolInput).toEqual({ + original: true, + }); + }); + + it('mutatedPrompt pipes correctly through UserPromptSubmit', async () => { + const entries: HookEntry< + { + prompt: string; + }, + { + mutatedPrompt: string; + } + >[] = [ + { + handler: () => ({ + mutatedPrompt: 'rewritten-1', + }), + }, + { + handler: (p) => ({ + mutatedPrompt: `${p.prompt}+appended`, + }), + }, + ]; + + const result = await executeHandlerChain( + entries, + { + prompt: 'original', + }, + makeContext(), + { + hookName: 'UserPromptSubmit', + throwOnHandlerError: false, + }, + ); + + // First handler rewrites prompt to 'rewritten-1', second sees 'rewritten-1' as p.prompt + expect(result.finalPayload.prompt).toBe('rewritten-1+appended'); + }); + + it('result with __proto__ key does not cause prototype pollution', async () => { + const entries: HookEntry< + { + toolInput: Record; + }, + Record + >[] = [ + { + handler: () => { + const obj = Object.create(null); + obj['__proto__'] = { + polluted: true, + }; + return obj; + }, + }, + ]; + + const result = await executeHandlerChain( + entries, + { + toolInput: {}, + }, + makeContext(), + { + hookName: 'PreToolUse', + throwOnHandlerError: false, + }, + ); + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + expect(({} as Record)['polluted'] as unknown).toBeUndefined(); + expect(result.results.length).toBe(1); + }); + }); + + describe('filter edge cases', () => { + it('throwing filter is caught in non-strict mode', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + const entries: HookEntry[] = [ + { + filter: () => { + throw new Error('filter boom'); + }, + handler: vi.fn(), + }, + ]; + + // The filter throw happens inside the try block since filter is called + // before handler. Let's verify current behavior. + // Looking at the code: filter is called OUTSIDE the try block! + // This means a throwing filter will propagate. + await expect( + executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }), + ).rejects.toThrow('filter boom'); + + warnSpy.mockRestore(); + }); + + it('throwing filter in strict mode propagates error', async () => { + const entries: HookEntry[] = [ + { + filter: () => { + throw new Error('filter fail'); + }, + handler: vi.fn(), + }, + ]; + + await expect( + executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: true, + }), + ).rejects.toThrow('filter fail'); + }); + }); + + describe('async output edge cases', () => { + it('{ async: true, block: true, work: ... } is treated as a block result, not async', async () => { + // isAsyncOutput requires `async: true` AND no foreign keys. A `block` + // field is foreign to AsyncOutput, so the value is treated as a result + // (so the block/reject/mutation piping isn't silently discarded). + const entries: HookEntry< + { + toolInput: Record; + }, + { + async: true; + block: true; + work: Promise; + } + >[] = [ + { + handler: () => ({ + async: true as const, + block: true, + work: Promise.resolve(), + }), + }, + ]; + + const result = await executeHandlerChain( + entries, + { + toolInput: {}, + }, + makeContext(), + { + hookName: 'PreToolUse', + throwOnHandlerError: false, + }, + ); + + expect(result.pending.length).toBe(0); + expect(result.blocked).toBe(true); + expect(result.results).toHaveLength(1); + }); + + it('{ async: true } without `work` does not push a pending promise', async () => { + const entries: HookEntry< + unknown, + { + async: true; + } + >[] = [ + { + handler: () => ({ + async: true as const, + }), + }, + ]; + + const result = await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + // No `work` => nothing to track; pending stays empty. + expect(result.pending.length).toBe(0); + expect(result.results).toEqual([]); + }); + + it('{ async: "true" } (string) is NOT treated as async', async () => { + const entries: HookEntry< + unknown, + { + async: string; + } + >[] = [ + { + handler: () => ({ + async: 'true', + }), + }, + ]; + + const result = await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + // isAsyncOutput requires async === true (boolean), not "true" (string) + expect(result.pending.length).toBe(0); + expect(result.results).toEqual([ + { + async: 'true', + }, + ]); + }); + }); + + describe('error handling edge cases', () => { + it('handler returning a rejected promise is caught in non-strict mode', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + const secondHandler = vi.fn(); + + const entries: HookEntry[] = [ + { + handler: () => Promise.reject(new Error('async boom')), + }, + { + handler: secondHandler, + }, + ]; + + await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + expect(warnSpy).toHaveBeenCalled(); + expect(secondHandler).toHaveBeenCalled(); + warnSpy.mockRestore(); + }); + + it('non-Error thrown values are caught in non-strict mode', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + + const entries: HookEntry[] = [ + { + handler: () => { + throw 'string error'; + }, + }, + ]; + + await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + expect(warnSpy).toHaveBeenCalled(); + warnSpy.mockRestore(); + }); + + it('non-Error thrown values propagate in strict mode', async () => { + const entries: HookEntry[] = [ + { + handler: () => { + throw 'string error'; + }, + }, + ]; + + await expect( + executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: true, + }), + ).rejects.toBe('string error'); + }); + }); + + describe('matcher + filter interaction', () => { + it('matcher is checked before filter — mismatched matcher skips filter call', async () => { + const filterFn = vi.fn(() => true); + const entries: HookEntry[] = [ + { + matcher: 'Bash', + filter: filterFn, + handler: vi.fn(), + }, + ]; + + await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + toolName: 'ReadFile', + }); + + expect(filterFn).not.toHaveBeenCalled(); + }); + + it('matcher without toolName in options skips the handler (fails closed)', async () => { + const handler = vi.fn(); + const entries: HookEntry[] = [ + { + matcher: 'Bash', + handler, + }, + ]; + + await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + // no toolName + }); + + // A handler with a matcher has declared it cares about tool identity. + // If the caller cannot supply a toolName, we cannot prove the matcher + // applies, so we skip the handler rather than invoking it globally. + expect(handler).not.toHaveBeenCalled(); + }); + }); + + describe('abort signal propagation', () => { + it('bails out of the chain when context.signal aborts mid-chain', async () => { + const controller = new AbortController(); + const firstHandler = vi.fn(async () => { + controller.abort(); + return undefined; + }); + const secondHandler = vi.fn(); + const thirdHandler = vi.fn(); + + const entries: HookEntry[] = [ + { + handler: firstHandler, + }, + { + handler: secondHandler, + }, + { + handler: thirdHandler, + }, + ]; + + await executeHandlerChain( + entries, + {}, + { + signal: controller.signal, + hookName: 'Test', + sessionId: 's', + }, + { + hookName: 'Test', + throwOnHandlerError: false, + }, + ); + + expect(firstHandler).toHaveBeenCalledOnce(); + expect(secondHandler).not.toHaveBeenCalled(); + expect(thirdHandler).not.toHaveBeenCalled(); + }); + + it('skips every handler when signal is already aborted before the chain', async () => { + const controller = new AbortController(); + controller.abort(); + const handler = vi.fn(); + + const entries: HookEntry[] = [ + { + handler, + }, + ]; + + await executeHandlerChain( + entries, + {}, + { + signal: controller.signal, + hookName: 'Test', + sessionId: 's', + }, + { + hookName: 'Test', + throwOnHandlerError: false, + }, + ); + + expect(handler).not.toHaveBeenCalled(); + }); + }); + + describe('large chain stress', () => { + it('handles 1000 handlers without stack overflow', async () => { + const entries: HookEntry< + { + count: number; + }, + void + >[] = Array.from( + { + length: 1000, + }, + () => ({ + handler: vi.fn(), + }), + ); + + const result = await executeHandlerChain( + entries, + { + count: 0, + }, + makeContext(), + { + hookName: 'Test', + throwOnHandlerError: false, + }, + ); + + expect(result.results).toEqual([]); + for (const entry of entries) { + expect(entry.handler).toHaveBeenCalledOnce(); + } + }); + }); +}); diff --git a/packages/agent/tests/unit/hooks-emit.test.ts b/packages/agent/tests/unit/hooks-emit.test.ts new file mode 100644 index 0000000..022358e --- /dev/null +++ b/packages/agent/tests/unit/hooks-emit.test.ts @@ -0,0 +1,597 @@ +import { describe, expect, it, vi } from 'vitest'; +import * as z4 from 'zod/v4'; +import { executeHandlerChain } from '../../src/lib/hooks-emit.js'; +import type { HookEntry, LifecycleHookContext } from '../../src/lib/hooks-types.js'; + +function makeContext(hookName = 'TestHook'): LifecycleHookContext { + return { + signal: new AbortController().signal, + hookName, + sessionId: 'test-session', + }; +} + +describe('executeHandlerChain', () => { + it('executes handlers in registration order', async () => { + const order: number[] = []; + const entries: HookEntry< + { + value: number; + }, + void + >[] = [ + { + handler: () => { + order.push(1); + }, + }, + { + handler: () => { + order.push(2); + }, + }, + { + handler: () => { + order.push(3); + }, + }, + ]; + + await executeHandlerChain( + entries, + { + value: 0, + }, + makeContext(), + { + hookName: 'Test', + throwOnHandlerError: false, + }, + ); + + expect(order).toEqual([ + 1, + 2, + 3, + ]); + }); + + it('skips handlers when matcher does not match', async () => { + const called = vi.fn(); + const entries: HookEntry< + { + toolName: string; + }, + void + >[] = [ + { + matcher: 'Bash', + handler: called, + }, + ]; + + await executeHandlerChain( + entries, + { + toolName: 'ReadFile', + }, + makeContext(), + { + hookName: 'Test', + throwOnHandlerError: false, + toolName: 'ReadFile', + }, + ); + + expect(called).not.toHaveBeenCalled(); + }); + + it('invokes handler when matcher matches', async () => { + const called = vi.fn(); + const entries: HookEntry< + { + toolName: string; + }, + void + >[] = [ + { + matcher: 'Bash', + handler: called, + }, + ]; + + await executeHandlerChain( + entries, + { + toolName: 'Bash', + }, + makeContext(), + { + hookName: 'Test', + throwOnHandlerError: false, + toolName: 'Bash', + }, + ); + + expect(called).toHaveBeenCalledOnce(); + }); + + it('skips handlers when filter returns false', async () => { + const called = vi.fn(); + const entries: HookEntry< + { + value: number; + }, + void + >[] = [ + { + filter: (p) => p.value > 5, + handler: called, + }, + ]; + + await executeHandlerChain( + entries, + { + value: 3, + }, + makeContext(), + { + hookName: 'Test', + throwOnHandlerError: false, + }, + ); + + expect(called).not.toHaveBeenCalled(); + }); + + it('collects sync results', async () => { + const entries: HookEntry< + { + v: number; + }, + { + doubled: number; + } + >[] = [ + { + handler: (p) => ({ + doubled: p.v * 2, + }), + }, + { + handler: (p) => ({ + doubled: p.v * 3, + }), + }, + ]; + + const result = await executeHandlerChain( + entries, + { + v: 5, + }, + makeContext(), + { + hookName: 'Test', + throwOnHandlerError: false, + }, + ); + + expect(result.results).toEqual([ + { + doubled: 10, + }, + { + doubled: 15, + }, + ]); + }); + + it('applies mutation piping for mutatedInput', async () => { + const entries: HookEntry< + { + toolInput: Record; + }, + { + mutatedInput: Record; + } + >[] = [ + { + handler: ({ toolInput }) => ({ + mutatedInput: { + ...toolInput, + added: true, + }, + }), + }, + { + handler: ({ toolInput }) => ({ + mutatedInput: { + ...toolInput, + second: true, + }, + }), + }, + ]; + + const result = await executeHandlerChain( + entries, + { + toolInput: { + original: true, + }, + }, + makeContext(), + { + hookName: 'PreToolUse', + throwOnHandlerError: false, + }, + ); + + expect(result.finalPayload.toolInput).toEqual({ + original: true, + added: true, + second: true, + }); + }); + + it('short-circuits on block for PreToolUse', async () => { + const secondHandler = vi.fn(); + const entries: HookEntry< + { + toolInput: Record; + }, + { + block?: boolean | string; + } + >[] = [ + { + handler: () => ({ + block: 'dangerous', + }), + }, + { + handler: secondHandler, + }, + ]; + + const result = await executeHandlerChain( + entries, + { + toolInput: {}, + }, + makeContext(), + { + hookName: 'PreToolUse', + throwOnHandlerError: false, + }, + ); + + expect(result.blocked).toBe(true); + expect(secondHandler).not.toHaveBeenCalled(); + }); + + it('short-circuits on reject for UserPromptSubmit', async () => { + const secondHandler = vi.fn(); + const entries: HookEntry< + { + prompt: string; + }, + { + reject?: boolean | string; + } + >[] = [ + { + handler: () => ({ + reject: 'not allowed', + }), + }, + { + handler: secondHandler, + }, + ]; + + const result = await executeHandlerChain( + entries, + { + prompt: 'test', + }, + makeContext(), + { + hookName: 'UserPromptSubmit', + throwOnHandlerError: false, + }, + ); + + expect(result.blocked).toBe(true); + expect(secondHandler).not.toHaveBeenCalled(); + }); + + it('handles async output by tracking pending promises when work is supplied', async () => { + let resolveWork: (() => void) | undefined; + const work = new Promise((resolve) => { + resolveWork = resolve; + }); + + const entries: HookEntry[] = [ + { + handler: () => ({ + async: true as const, + work, + }), + }, + ]; + + const result = await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + expect(result.pending.length).toBe(1); + expect(result.results.length).toBe(0); + + // Let the detached work settle so the tracked promise resolves. + resolveWork?.(); + await Promise.allSettled(result.pending); + }); + + it('logs a warning when async work rejects (default mode)', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + + const work = Promise.reject(new Error('detached boom')); + const entries: HookEntry[] = [ + { + handler: () => ({ + async: true as const, + work, + }), + }, + ]; + + const result = await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'MyHook', + throwOnHandlerError: false, + }); + + expect(result.pending.length).toBe(1); + + // Drain detached work; this should not reject the tracked promise. + await Promise.allSettled(result.pending); + + expect(warnSpy).toHaveBeenCalledTimes(1); + const [message, err] = warnSpy.mock.calls[0] ?? []; + expect(message).toContain('MyHook'); + expect(message).toContain('rejected'); + expect(err).toBeInstanceOf(Error); + if (err instanceof Error) { + expect(err.message).toBe('detached boom'); + } + + warnSpy.mockRestore(); + }); + + it('logs (does not throw) when async work rejects in strict mode', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + + const work = Promise.reject(new Error('strict detached boom')); + const entries: HookEntry[] = [ + { + handler: () => ({ + async: true as const, + work, + }), + }, + ]; + + // Strict mode governs sync handler failures only; fire-and-forget rejection + // must still just log, never throw. + const result = await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'StrictHook', + throwOnHandlerError: true, + }); + + await Promise.allSettled(result.pending); + + expect(warnSpy).toHaveBeenCalledTimes(1); + const [message] = warnSpy.mock.calls[0] ?? []; + expect(message).toContain('StrictHook'); + + warnSpy.mockRestore(); + }); + + it('logs and continues on handler error in default mode', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + const secondHandler = vi.fn(); + + const entries: HookEntry[] = [ + { + handler: () => { + throw new Error('boom'); + }, + }, + { + handler: secondHandler, + }, + ]; + + await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + expect(warnSpy).toHaveBeenCalled(); + expect(secondHandler).toHaveBeenCalled(); + warnSpy.mockRestore(); + }); + + it('throws in strict mode on handler error', async () => { + const entries: HookEntry[] = [ + { + handler: () => { + throw new Error('boom'); + }, + }, + ]; + + await expect( + executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: true, + }), + ).rejects.toThrow('boom'); + }); + + describe('per-hook mutation piping', () => { + it('does not pipe mutatedInput for UserPromptSubmit (wrong hook)', async () => { + const entries: HookEntry< + { + prompt: string; + toolInput?: Record; + }, + { + mutatedInput: Record; + } + >[] = [ + { + handler: () => ({ + mutatedInput: { + forbidden: true, + }, + }), + }, + ]; + + const result = await executeHandlerChain( + entries, + { + prompt: 'hello', + toolInput: { + original: true, + }, + }, + makeContext(), + { + hookName: 'UserPromptSubmit', + throwOnHandlerError: false, + }, + ); + + // UserPromptSubmit only pipes mutatedPrompt; mutatedInput must be ignored. + expect(result.finalPayload.toolInput).toEqual({ + original: true, + }); + }); + + it('does not pipe mutatedInput or mutatedPrompt for custom hooks', async () => { + const entries: HookEntry< + { + toolInput: Record; + prompt: string; + }, + { + mutatedInput: Record; + mutatedPrompt: string; + } + >[] = [ + { + handler: () => ({ + mutatedInput: { + injected: true, + }, + mutatedPrompt: 'hijacked', + }), + }, + ]; + + const result = await executeHandlerChain( + entries, + { + toolInput: { + original: true, + }, + prompt: 'original', + }, + makeContext(), + { + hookName: 'MyCustomHook', + throwOnHandlerError: false, + }, + ); + + // Custom hooks get no piping unless explicitly opted in. + expect(result.finalPayload.toolInput).toEqual({ + original: true, + }); + expect(result.finalPayload.prompt).toBe('original'); + }); + }); + + describe('result schema validation (chain-level)', () => { + it('drops an invalid result in default mode and continues', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + const resultSchema = z4.object({ + ok: z4.boolean(), + }); + + const entries: HookEntry< + unknown, + { + ok: unknown; + } + >[] = [ + { + handler: () => ({ + ok: 'not-a-boolean', + }), + }, + { + handler: () => ({ + ok: true, + }), + }, + ]; + + const result = await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + resultSchema, + }); + + expect(result.results).toEqual([ + { + ok: true, + }, + ]); + expect(warnSpy).toHaveBeenCalled(); + warnSpy.mockRestore(); + }); + + it('throws on invalid result in strict mode', async () => { + const resultSchema = z4.object({ + ok: z4.boolean(), + }); + + const entries: HookEntry< + unknown, + { + ok: unknown; + } + >[] = [ + { + handler: () => ({ + ok: 'nope', + }), + }, + ]; + + await expect( + executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: true, + resultSchema, + }), + ).rejects.toThrow('invalid result'); + }); + }); +}); diff --git a/packages/agent/tests/unit/hooks-manager-adversarial.test.ts b/packages/agent/tests/unit/hooks-manager-adversarial.test.ts new file mode 100644 index 0000000..6832ca2 --- /dev/null +++ b/packages/agent/tests/unit/hooks-manager-adversarial.test.ts @@ -0,0 +1,380 @@ +import { describe, expect, it, vi } from 'vitest'; +import * as z4 from 'zod/v4'; +import { HooksManager } from '../../src/lib/hooks-manager.js'; + +describe('HooksManager (adversarial)', () => { + describe('unsubscribe edge cases', () => { + it('calling unsubscribe twice does not throw or corrupt state', () => { + const manager = new HooksManager(); + const unsub = manager.on('PostToolUse', { + handler: vi.fn(), + }); + + unsub(); + unsub(); // second call should be harmless + + expect(manager.hasHandlers('PostToolUse')).toBe(false); + }); + + it('unsubscribe from one handler does not affect others', () => { + const manager = new HooksManager(); + const h1 = vi.fn(); + const h2 = vi.fn(); + + const unsub1 = manager.on('PostToolUse', { + handler: h1, + }); + manager.on('PostToolUse', { + handler: h2, + }); + + unsub1(); + expect(manager.hasHandlers('PostToolUse')).toBe(true); + }); + }); + + describe('off() edge cases', () => { + it('off with identical-looking but different function reference returns false', () => { + const manager = new HooksManager(); + const h1 = () => {}; + const h2 = () => {}; + + manager.on('PostToolUse', { + handler: h1, + }); + const removed = manager.off('PostToolUse', h2); + + expect(removed).toBe(false); + expect(manager.hasHandlers('PostToolUse')).toBe(true); + }); + + it('off for a hook name that was never registered returns false', () => { + const manager = new HooksManager(); + expect(manager.off('SessionEnd', vi.fn())).toBe(false); + }); + }); + + describe('re-entrant emit', () => { + it('handler that registers a new handler during emit does not affect current chain', async () => { + const manager = new HooksManager(); + const lateHandler = vi.fn(); + + manager.on('PostToolUse', { + handler: () => { + // Register a new handler mid-emit + manager.on('PostToolUse', { + handler: lateHandler, + }); + }, + }); + + await manager.emit('PostToolUse', { + toolName: 'Bash', + toolInput: {}, + toolOutput: 'ok', + durationMs: 100, + sessionId: 'test', + }); + + // emit() snapshots the entries list before iterating the chain, so + // handlers registered mid-emit are invisible to the in-flight chain and + // will only fire on subsequent emits. This protects against the + // symmetric removal case as well (off()/unsubscribe mid-chain can no + // longer shift indices and skip the next handler). + expect(lateHandler).not.toHaveBeenCalled(); + }); + + it('handler that calls emit recursively does not deadlock', async () => { + const manager = new HooksManager(); + let depth = 0; + + manager.on('SessionStart', { + handler: async () => { + depth++; + if (depth < 3) { + await manager.emit('SessionStart', { + sessionId: `depth-${depth}`, + config: undefined, + }); + } + }, + }); + + await manager.emit('SessionStart', { + sessionId: 'root', + config: undefined, + }); + + expect(depth).toBe(3); + }); + }); + + describe('custom hook validation', () => { + it('throws for each built-in hook name used as custom', () => { + const builtInNames = [ + 'PreToolUse', + 'PostToolUse', + 'PostToolUseFailure', + 'UserPromptSubmit', + 'Stop', + 'PermissionRequest', + 'SessionStart', + 'SessionEnd', + ]; + + for (const name of builtInNames) { + expect( + () => + new HooksManager({ + [name]: { + payload: z4.object({}), + result: z4.void(), + }, + }), + ).toThrow('collides with a built-in hook'); + } + }); + + it('allows custom hook with empty string name', () => { + const manager = new HooksManager({ + '': { + payload: z4.object({}), + result: z4.void(), + }, + }); + expect(manager).toBeInstanceOf(HooksManager); + }); + }); + + describe('emit edge cases', () => { + it('emit for a hook with no registered handlers returns clean result', async () => { + const manager = new HooksManager(); + const result = await manager.emit('PreToolUse', { + toolName: 'Test', + toolInput: {}, + sessionId: 's1', + }); + + expect(result.results).toEqual([]); + expect(result.blocked).toBe(false); + expect(result.pending).toEqual([]); + }); + + it('emit for an unregistered custom hook name returns clean result', async () => { + const manager = new HooksManager(); + // Emitting a never-registered hook name + const result = await manager.emit('NonExistentHook' as 'PostToolUse', { + toolName: 'X', + toolInput: {}, + toolOutput: null, + durationMs: 0, + sessionId: 's', + }); + + expect(result.results).toEqual([]); + }); + + it('handler that throws in strict mode propagates through emit', async () => { + const manager = new HooksManager(undefined, { + throwOnHandlerError: true, + }); + + manager.on('PostToolUse', { + handler: () => { + throw new Error('handler explosion'); + }, + }); + + await expect( + manager.emit('PostToolUse', { + toolName: 'T', + toolInput: {}, + toolOutput: null, + durationMs: 0, + sessionId: 's', + }), + ).rejects.toThrow('handler explosion'); + }); + + it('handler that throws in default mode does not fail emit', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + const manager = new HooksManager(); + + manager.on('PostToolUse', { + handler: () => { + throw new Error('soft failure'); + }, + }); + + const result = await manager.emit('PostToolUse', { + toolName: 'T', + toolInput: {}, + toolOutput: null, + durationMs: 0, + sessionId: 's', + }); + + expect(result.results).toEqual([]); + expect(warnSpy).toHaveBeenCalled(); + warnSpy.mockRestore(); + }); + }); + + describe('PreToolUse block through manager', () => { + it('block with string reason short-circuits and reports blocked', async () => { + const manager = new HooksManager(); + const second = vi.fn(); + + manager.on('PreToolUse', { + handler: () => ({ + block: 'dangerous tool', + }), + }); + manager.on('PreToolUse', { + handler: second, + }); + + const result = await manager.emit( + 'PreToolUse', + { + toolName: 'rm', + toolInput: { + path: '/', + }, + sessionId: 's', + }, + { + toolName: 'rm', + }, + ); + + expect(result.blocked).toBe(true); + expect(second).not.toHaveBeenCalled(); + expect(result.results).toEqual([ + { + block: 'dangerous tool', + }, + ]); + }); + }); + + describe('drain edge cases', () => { + it('drain can be called multiple times safely', async () => { + const manager = new HooksManager(); + await manager.drain(); + await manager.drain(); + // No error + }); + + it('drain after handlers have been cleared still resolves', async () => { + const manager = new HooksManager(); + manager.on('PostToolUse', { + handler: () => ({ + async: true as const, + work: Promise.resolve(), + }), + }); + + await manager.emit('PostToolUse', { + toolName: 'T', + toolInput: {}, + toolOutput: null, + durationMs: 0, + sessionId: 's', + }); + + manager.removeAll(); + await manager.drain(); // pending async should still drain + }); + }); + + describe('removeAll during usage', () => { + it('removeAll clears all hooks even if called from within a handler', async () => { + const manager = new HooksManager(); + + manager.on('SessionStart', { + handler: () => { + manager.removeAll(); + }, + }); + + await manager.emit('SessionStart', { + sessionId: 's', + config: undefined, + }); + + expect(manager.hasHandlers('SessionStart')).toBe(false); + }); + }); + + describe('internal registrar is not publicly reachable', () => { + it('HooksManager has no public registerEntry method', () => { + const manager = new HooksManager(); + // `registerEntry` used to be a public method on the class. It has been + // removed in favour of a module-private registrar reached only via + // `getInternalRegistrar`. Neither the instance nor its prototype chain + // should expose a `registerEntry` symbol. + expect('registerEntry' in manager).toBe(false); + }); + }); + + describe('setSessionId', () => { + it('changing sessionId mid-session is reflected in subsequent emits', async () => { + const manager = new HooksManager(); + const sessionIds: string[] = []; + + manager.on('PostToolUse', { + handler: (_p, ctx) => { + sessionIds.push(ctx.sessionId); + }, + }); + + manager.setSessionId('session-1'); + await manager.emit('PostToolUse', { + toolName: 'T', + toolInput: {}, + toolOutput: null, + durationMs: 0, + sessionId: 'session-1', + }); + + manager.setSessionId('session-2'); + await manager.emit('PostToolUse', { + toolName: 'T', + toolInput: {}, + toolOutput: null, + durationMs: 0, + sessionId: 'session-2', + }); + + expect(sessionIds).toEqual([ + 'session-1', + 'session-2', + ]); + }); + + it('payload sessionId overrides the manager sessionId when they disagree', async () => { + const manager = new HooksManager(); + const sessionIds: string[] = []; + + manager.on('PostToolUse', { + handler: (_p, ctx) => { + sessionIds.push(ctx.sessionId); + }, + }); + + manager.setSessionId('stale-manager-id'); + await manager.emit('PostToolUse', { + toolName: 'T', + toolInput: {}, + toolOutput: null, + durationMs: 0, + sessionId: 'fresh-payload-id', + }); + + expect(sessionIds).toEqual([ + 'fresh-payload-id', + ]); + }); + }); +}); diff --git a/packages/agent/tests/unit/hooks-manager.test.ts b/packages/agent/tests/unit/hooks-manager.test.ts new file mode 100644 index 0000000..e1a6d45 --- /dev/null +++ b/packages/agent/tests/unit/hooks-manager.test.ts @@ -0,0 +1,520 @@ +import { describe, expect, it, vi } from 'vitest'; +import * as z4 from 'zod/v4'; +import { HooksManager } from '../../src/lib/hooks-manager.js'; + +describe('HooksManager', () => { + describe('constructor', () => { + it('creates with no arguments', () => { + const manager = new HooksManager(); + expect(manager).toBeInstanceOf(HooksManager); + }); + + it('throws on custom hook name collision with built-in', () => { + expect( + () => + new HooksManager({ + PreToolUse: { + payload: z4.object({ + custom: z4.string(), + }), + result: z4.void(), + }, + }), + ).toThrow('collides with a built-in hook'); + }); + + it('accepts custom hooks with unique names', () => { + const manager = new HooksManager({ + MyCustomHook: { + payload: z4.object({ + data: z4.string(), + }), + result: z4.void(), + }, + }); + expect(manager).toBeInstanceOf(HooksManager); + }); + }); + + describe('on / off / removeAll', () => { + it('registers a handler and returns unsubscribe function', async () => { + const manager = new HooksManager(); + const handler = vi.fn(); + + const unsub = manager.on('PostToolUse', { + handler, + }); + expect(manager.hasHandlers('PostToolUse')).toBe(true); + + unsub(); + expect(manager.hasHandlers('PostToolUse')).toBe(false); + }); + + it('removes a handler by reference with off()', async () => { + const manager = new HooksManager(); + const handler = vi.fn(); + + manager.on('PostToolUse', { + handler, + }); + expect(manager.hasHandlers('PostToolUse')).toBe(true); + + const removed = manager.off('PostToolUse', handler); + expect(removed).toBe(true); + expect(manager.hasHandlers('PostToolUse')).toBe(false); + }); + + it('returns false when off() cannot find the handler', () => { + const manager = new HooksManager(); + const removed = manager.off('PostToolUse', vi.fn()); + expect(removed).toBe(false); + }); + + it('removes all handlers for a specific hook', () => { + const manager = new HooksManager(); + manager.on('PostToolUse', { + handler: vi.fn(), + }); + manager.on('PostToolUse', { + handler: vi.fn(), + }); + manager.on('PreToolUse', { + handler: vi.fn(), + }); + + manager.removeAll('PostToolUse'); + expect(manager.hasHandlers('PostToolUse')).toBe(false); + expect(manager.hasHandlers('PreToolUse')).toBe(true); + }); + + it('removes all handlers for all hooks', () => { + const manager = new HooksManager(); + manager.on('PostToolUse', { + handler: vi.fn(), + }); + manager.on('PreToolUse', { + handler: vi.fn(), + }); + + manager.removeAll(); + expect(manager.hasHandlers('PostToolUse')).toBe(false); + expect(manager.hasHandlers('PreToolUse')).toBe(false); + }); + }); + + describe('emit', () => { + it('invokes registered handlers with payload', async () => { + const manager = new HooksManager(); + const handler = vi.fn(); + + manager.on('PostToolUse', { + handler, + }); + await manager.emit('PostToolUse', { + toolName: 'Bash', + toolInput: {}, + toolOutput: 'ok', + durationMs: 100, + sessionId: 'test', + }); + + expect(handler).toHaveBeenCalledOnce(); + expect(handler).toHaveBeenCalledWith( + expect.objectContaining({ + toolName: 'Bash', + }), + expect.objectContaining({ + hookName: 'PostToolUse', + }), + ); + }); + + it('returns empty results when no handlers registered', async () => { + const manager = new HooksManager(); + const result = await manager.emit('PostToolUse', { + toolName: 'Bash', + toolInput: {}, + toolOutput: 'ok', + durationMs: 100, + sessionId: 'test', + }); + + expect(result.results).toEqual([]); + expect(result.blocked).toBe(false); + }); + + it('passes toolName to emit context for matcher filtering', async () => { + const manager = new HooksManager(); + const bashHandler = vi.fn(); + const readHandler = vi.fn(); + + manager.on('PreToolUse', { + matcher: 'Bash', + handler: bashHandler, + }); + manager.on('PreToolUse', { + matcher: 'ReadFile', + handler: readHandler, + }); + + await manager.emit( + 'PreToolUse', + { + toolName: 'Bash', + toolInput: {}, + sessionId: 'test', + }, + { + toolName: 'Bash', + }, + ); + + expect(bashHandler).toHaveBeenCalledOnce(); + expect(readHandler).not.toHaveBeenCalled(); + }); + + it('supports custom hooks', async () => { + const manager = new HooksManager({ + AgentThinking: { + payload: z4.object({ + thought: z4.string(), + }), + result: z4.void(), + }, + }); + + const handler = vi.fn(); + manager.on('AgentThinking', { + handler, + }); + + await manager.emit('AgentThinking', { + thought: 'hmm', + }); + + expect(handler).toHaveBeenCalledWith( + expect.objectContaining({ + thought: 'hmm', + }), + expect.any(Object), + ); + }); + }); + + describe('drain', () => { + it('resolves when no pending async handlers', async () => { + const manager = new HooksManager(); + await expect(manager.drain()).resolves.toBeUndefined(); + }); + + it('waits for pending async handler work', async () => { + const manager = new HooksManager(); + let resolveWork: (() => void) | undefined; + const work = new Promise((resolve) => { + resolveWork = resolve; + }); + let drainResolved = false; + + manager.on('PostToolUse', { + handler: () => ({ + async: true as const, + work, + }), + }); + + await manager.emit('PostToolUse', { + toolName: 'Bash', + toolInput: {}, + toolOutput: 'ok', + durationMs: 100, + sessionId: 'test', + }); + + const drainPromise = manager.drain().then(() => { + drainResolved = true; + }); + + // Give drain a microtask tick to attach — it should still be pending + // because the detached work has not settled. + await Promise.resolve(); + expect(drainResolved).toBe(false); + + resolveWork?.(); + await drainPromise; + expect(drainResolved).toBe(true); + }); + }); + + describe('setSessionId', () => { + it('sets session ID used in hook context', async () => { + const manager = new HooksManager(); + manager.setSessionId('my-session'); + + let receivedSessionId = ''; + manager.on('PostToolUse', { + handler: (_payload, context) => { + receivedSessionId = context.sessionId; + }, + }); + + await manager.emit('PostToolUse', { + toolName: 'Bash', + toolInput: {}, + toolOutput: 'ok', + durationMs: 100, + sessionId: 'my-session', + }); + + expect(receivedSessionId).toBe('my-session'); + }); + + it('payload sessionId overrides manager sessionId in hook context', async () => { + const manager = new HooksManager(); + manager.setSessionId('stale-session'); + + let receivedSessionId = ''; + manager.on('PostToolUse', { + handler: (_payload, context) => { + receivedSessionId = context.sessionId; + }, + }); + + await manager.emit('PostToolUse', { + toolName: 'Bash', + toolInput: {}, + toolOutput: 'ok', + durationMs: 100, + sessionId: 'fresh-session', + }); + + expect(receivedSessionId).toBe('fresh-session'); + }); + + it('falls back to manager sessionId for custom hooks without sessionId in payload', async () => { + const manager = new HooksManager({ + AgentThinking: { + payload: z4.object({ + thought: z4.string(), + }), + result: z4.void(), + }, + }); + manager.setSessionId('manager-session'); + + let receivedSessionId = ''; + manager.on('AgentThinking', { + handler: (_payload, context) => { + receivedSessionId = context.sessionId; + }, + }); + + await manager.emit('AgentThinking', { + thought: 'hmm', + }); + + expect(receivedSessionId).toBe('manager-session'); + }); + }); + + describe('schema validation', () => { + /** + * Cast helper used to pass intentionally-invalid values to hook emits in + * these validation tests without sprinkling `as any` on every call site. + * Lives in the test file so it can't be imported by production code. + */ + const asTyped = (value: unknown): T => value as T; + + it('rejects an invalid payload against the built-in schema in strict mode', async () => { + const manager = new HooksManager(undefined, { + throwOnHandlerError: true, + }); + manager.on('PreToolUse', { + handler: vi.fn(), + }); + + // `toolInput` must be an object per PreToolUsePayloadSchema. + await expect( + manager.emit('PreToolUse', { + toolName: 'Bash', + toolInput: asTyped>('not an object'), + sessionId: 's', + }), + ).rejects.toThrow('Invalid payload for hook "PreToolUse"'); + }); + + it('default mode logs and short-circuits when payload is invalid', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + const manager = new HooksManager(); + const handler = vi.fn(); + manager.on('PreToolUse', { + handler, + }); + + const result = await manager.emit('PreToolUse', { + toolName: 'Bash', + toolInput: asTyped>(42), + sessionId: 's', + }); + + expect(warnSpy).toHaveBeenCalled(); + expect(handler).not.toHaveBeenCalled(); + expect(result.results).toEqual([]); + expect(result.blocked).toBe(false); + warnSpy.mockRestore(); + }); + + it('rejects an invalid handler result against the built-in result schema in strict mode', async () => { + const manager = new HooksManager(undefined, { + throwOnHandlerError: true, + }); + manager.on('PreToolUse', { + // `block` must be boolean or string per PreToolUseResultSchema. + handler: () => ({ + block: asTyped(12345), + }), + }); + + await expect( + manager.emit('PreToolUse', { + toolName: 'Bash', + toolInput: {}, + sessionId: 's', + }), + ).rejects.toThrow('invalid result'); + }); + + it('validates payload and result against custom hook schemas', async () => { + const manager = new HooksManager( + { + AgentThinking: { + payload: z4.object({ + thought: z4.string(), + }), + result: z4.object({ + ack: z4.boolean(), + }), + }, + }, + { + throwOnHandlerError: true, + }, + ); + + manager.on('AgentThinking', { + // Invalid result -- `ack` must be boolean. + handler: () => ({ + ack: asTyped('yes'), + }), + }); + + await expect( + manager.emit('AgentThinking', { + thought: 'hmm', + }), + ).rejects.toThrow('invalid result'); + }); + + it('does not validate results for void-typed built-in hooks', async () => { + const manager = new HooksManager(undefined, { + throwOnHandlerError: true, + }); + // Void-typed hook: PostToolUse. Handler may return arbitrary values and + // they land in `results` without schema complaints. + manager.on('PostToolUse', { + handler: () => + asTyped({ + arbitrary: true, + }), + }); + + const result = await manager.emit('PostToolUse', { + toolName: 'Bash', + toolInput: {}, + toolOutput: 'ok', + durationMs: 10, + sessionId: 's', + }); + + expect(result.results).toEqual([ + { + arbitrary: true, + }, + ]); + }); + }); + + describe('abortInflight', () => { + it('aborts the signal on the LifecycleHookContext of in-flight emits', async () => { + const manager = new HooksManager(); + let observedAborted = false; + + manager.on('PostToolUse', { + handler: async (_payload, ctx) => { + await new Promise((resolve) => { + if (ctx.signal.aborted) { + observedAborted = true; + resolve(); + } else { + ctx.signal.addEventListener('abort', () => { + observedAborted = true; + resolve(); + }); + } + }); + }, + }); + + const emitPromise = manager.emit('PostToolUse', { + toolName: 'Bash', + toolInput: {}, + toolOutput: 'ok', + durationMs: 0, + sessionId: 's', + }); + + // Give the handler a microtask tick to register the listener, then abort. + await Promise.resolve(); + manager.abortInflight(); + await emitPromise; + + expect(observedAborted).toBe(true); + }); + }); + + describe('pendingAsync hygiene', () => { + it('settled async work self-removes from pending tracking', async () => { + const manager = new HooksManager(); + manager.on('PostToolUse', { + handler: () => ({ + async: true as const, + work: Promise.resolve(), + }), + }); + + for (let i = 0; i < 5; i++) { + await manager.emit('PostToolUse', { + toolName: 'T', + toolInput: {}, + toolOutput: null, + durationMs: 0, + sessionId: 's', + }); + } + + // After all the detached work has settled, drain should be a no-op -- + // the self-removal hook should have cleared the internal set. + await manager.drain(); + await manager.drain(); + + // Emit one more and verify we are not accumulating entries. + await manager.emit('PostToolUse', { + toolName: 'T', + toolInput: {}, + toolOutput: null, + durationMs: 0, + sessionId: 's', + }); + await manager.drain(); + }); + }); +}); diff --git a/packages/agent/tests/unit/hooks-matchers-adversarial.test.ts b/packages/agent/tests/unit/hooks-matchers-adversarial.test.ts new file mode 100644 index 0000000..d8f8c8d --- /dev/null +++ b/packages/agent/tests/unit/hooks-matchers-adversarial.test.ts @@ -0,0 +1,89 @@ +import { describe, expect, it } from 'vitest'; +import { matchesTool } from '../../src/lib/hooks-matchers.js'; + +describe('matchesTool (adversarial)', () => { + describe('empty string edge cases', () => { + it('empty string matcher matches empty string toolName', () => { + expect(matchesTool('', '')).toBe(true); + }); + + it('empty string matcher does NOT match non-empty toolName', () => { + expect(matchesTool('', 'Bash')).toBe(false); + }); + + it('non-empty matcher does NOT match empty toolName', () => { + expect(matchesTool('Bash', '')).toBe(false); + }); + }); + + describe('RegExp stateful behavior', () => { + it('RegExp with global flag is made idempotent by resetting lastIndex', () => { + const globalRegex = /Bash/g; + + // matchesTool resets lastIndex before each .test(), so /g and /y flags + // no longer cause alternating true/false across successive emits. + expect(matchesTool(globalRegex, 'Bash')).toBe(true); + expect(matchesTool(globalRegex, 'Bash')).toBe(true); + expect(matchesTool(globalRegex, 'Bash')).toBe(true); + }); + + it('RegExp with sticky flag is idempotent', () => { + const stickyRegex = /Bash/y; + expect(matchesTool(stickyRegex, 'Bash')).toBe(true); + expect(matchesTool(stickyRegex, 'Bash')).toBe(true); + }); + + it('RegExp without global flag is idempotent', () => { + const regex = /Bash/; + expect(matchesTool(regex, 'Bash')).toBe(true); + expect(matchesTool(regex, 'Bash')).toBe(true); + expect(matchesTool(regex, 'Bash')).toBe(true); + }); + + it('RegExp with case-insensitive flag', () => { + expect(matchesTool(/bash/i, 'Bash')).toBe(true); + expect(matchesTool(/bash/i, 'BASH')).toBe(true); + }); + + it('RegExp matching partial tool name (no anchoring)', () => { + // /Read/ matches "ReadFile" — this is regex default behavior, not bug + expect(matchesTool(/Read/, 'ReadFile')).toBe(true); + expect(matchesTool(/Read/, 'OnlyRead')).toBe(true); + }); + }); + + describe('function matcher edge cases', () => { + it('function matcher throwing propagates the error', () => { + const throwingMatcher = () => { + throw new Error('matcher boom'); + }; + expect(() => matchesTool(throwingMatcher, 'Bash')).toThrow('matcher boom'); + }); + + it('function matcher returning truthy non-boolean is NOT coerced to true', () => { + // BUG: matchesTool returns raw value from function, not boolean-coerced. + // Callers that do strict === true checks will behave differently than truthiness checks. + const truthyMatcher = () => 1 as unknown as boolean; + expect(matchesTool(truthyMatcher, 'Bash')).toBe(1); + }); + + it('function matcher returning 0 (falsy) is NOT coerced to false', () => { + // BUG: returns 0 instead of false — truthiness check works but strict equality fails + const falsyMatcher = () => 0 as unknown as boolean; + expect(matchesTool(falsyMatcher, 'Bash')).toBe(0); + }); + + it('function matcher returning null is NOT coerced to false', () => { + // BUG: returns null instead of false — truthiness check works but strict equality fails + const nullMatcher = () => null as unknown as boolean; + expect(matchesTool(nullMatcher, 'Bash')).toBe(null); + }); + }); + + describe('special characters in string matcher', () => { + it('string matcher with regex-special chars does exact match only', () => { + expect(matchesTool('Read.*File', 'Read.*File')).toBe(true); + expect(matchesTool('Read.*File', 'ReadAnyFile')).toBe(false); + }); + }); +}); diff --git a/packages/agent/tests/unit/hooks-matchers.test.ts b/packages/agent/tests/unit/hooks-matchers.test.ts new file mode 100644 index 0000000..841f283 --- /dev/null +++ b/packages/agent/tests/unit/hooks-matchers.test.ts @@ -0,0 +1,27 @@ +import { describe, expect, it } from 'vitest'; +import { matchesTool } from '../../src/lib/hooks-matchers.js'; + +describe('matchesTool', () => { + it('matches all tools when matcher is undefined', () => { + expect(matchesTool(undefined, 'Bash')).toBe(true); + expect(matchesTool(undefined, 'ReadFile')).toBe(true); + }); + + it('matches exact string', () => { + expect(matchesTool('Bash', 'Bash')).toBe(true); + expect(matchesTool('Bash', 'ReadFile')).toBe(false); + expect(matchesTool('Bash', 'bash')).toBe(false); + }); + + it('matches RegExp', () => { + expect(matchesTool(/^(Read|Write)File$/, 'ReadFile')).toBe(true); + expect(matchesTool(/^(Read|Write)File$/, 'WriteFile')).toBe(true); + expect(matchesTool(/^(Read|Write)File$/, 'DeleteFile')).toBe(false); + }); + + it('matches function predicate', () => { + const matcher = (name: string) => name.startsWith('File'); + expect(matchesTool(matcher, 'FileRead')).toBe(true); + expect(matchesTool(matcher, 'Bash')).toBe(false); + }); +}); diff --git a/packages/agent/tests/unit/hooks-resolve-adversarial.test.ts b/packages/agent/tests/unit/hooks-resolve-adversarial.test.ts new file mode 100644 index 0000000..56308de --- /dev/null +++ b/packages/agent/tests/unit/hooks-resolve-adversarial.test.ts @@ -0,0 +1,136 @@ +import { describe, expect, it, vi } from 'vitest'; +import { HooksManager } from '../../src/lib/hooks-manager.js'; +import { resolveHooks } from '../../src/lib/hooks-resolve.js'; +import type { InlineHookConfig } from '../../src/lib/hooks-types.js'; + +describe('resolveHooks (adversarial)', () => { + describe('falsy inputs', () => { + it('returns undefined for null', () => { + expect(resolveHooks(null as unknown as undefined)).toBeUndefined(); + }); + + it('returns undefined for false', () => { + expect(resolveHooks(false as unknown as undefined)).toBeUndefined(); + }); + + it('returns undefined for 0', () => { + expect(resolveHooks(0 as unknown as undefined)).toBeUndefined(); + }); + + it('returns undefined for empty string', () => { + expect(resolveHooks('' as unknown as undefined)).toBeUndefined(); + }); + }); + + describe('empty config', () => { + it('empty object returns a HooksManager with no handlers', () => { + const result = resolveHooks({}); + expect(result).toBeInstanceOf(HooksManager); + expect(result!.hasHandlers('PreToolUse')).toBe(false); + }); + + it('config with empty arrays returns HooksManager with no handlers', () => { + const config: InlineHookConfig = { + PreToolUse: [], + PostToolUse: [], + }; + + const result = resolveHooks(config); + expect(result).toBeInstanceOf(HooksManager); + expect(result!.hasHandlers('PreToolUse')).toBe(false); + expect(result!.hasHandlers('PostToolUse')).toBe(false); + }); + }); + + describe('malformed config values', () => { + it('non-array value for a hook key is skipped', () => { + const config = { + PreToolUse: 'not-an-array', + } as unknown as InlineHookConfig; + + const result = resolveHooks(config); + expect(result).toBeInstanceOf(HooksManager); + expect(result!.hasHandlers('PreToolUse')).toBe(false); + }); + + it('null value for a hook key is skipped', () => { + const config = { + PreToolUse: null, + } as unknown as InlineHookConfig; + + const result = resolveHooks(config); + expect(result).toBeInstanceOf(HooksManager); + expect(result!.hasHandlers('PreToolUse')).toBe(false); + }); + + it('number value for a hook key is skipped', () => { + const config = { + PreToolUse: 42, + } as unknown as InlineHookConfig; + + const result = resolveHooks(config); + expect(result).toBeInstanceOf(HooksManager); + expect(result!.hasHandlers('PreToolUse')).toBe(false); + }); + }); + + describe('prototype pollution resistance', () => { + it('__proto__ key in config does not pollute Object prototype', () => { + const config = JSON.parse('{"__proto__": [{"handler": null}], "PreToolUse": []}'); + + // This should not crash and should not pollute Object.prototype + const result = resolveHooks(config); + expect(result).toBeInstanceOf(HooksManager); + expect(({} as Record)['handler']).toBeUndefined(); + }); + + it('constructor key in config does not crash', () => { + const config = { + constructor: [ + { + handler: vi.fn(), + }, + ], + } as unknown as InlineHookConfig; + + const warn = vi.spyOn(console, 'warn').mockImplementation(() => {}); + const result = resolveHooks(config); + expect(result).toBeInstanceOf(HooksManager); + // 'constructor' is not a built-in hook name — it is warned and skipped. + expect(result!.hasHandlers('constructor')).toBe(false); + expect(warn).toHaveBeenCalled(); + warn.mockRestore(); + }); + }); + + describe('HooksManager passthrough', () => { + it('returns the exact same instance, not a copy', () => { + const manager = new HooksManager(); + manager.on('PreToolUse', { + handler: vi.fn(), + }); + + const result = resolveHooks(manager); + expect(result).toBe(manager); + }); + }); + + describe('non-standard hook names in inline config', () => { + it('warns and skips non-built-in hook names (custom hooks require HooksManager)', () => { + const config = { + CustomHook: [ + { + handler: vi.fn(), + }, + ], + } as unknown as InlineHookConfig; + + const warn = vi.spyOn(console, 'warn').mockImplementation(() => {}); + const result = resolveHooks(config); + expect(result).toBeInstanceOf(HooksManager); + expect(result!.hasHandlers('CustomHook')).toBe(false); + expect(warn).toHaveBeenCalledWith(expect.stringContaining('CustomHook')); + warn.mockRestore(); + }); + }); +}); diff --git a/packages/agent/tests/unit/hooks-resolve.test.ts b/packages/agent/tests/unit/hooks-resolve.test.ts new file mode 100644 index 0000000..6cd7554 --- /dev/null +++ b/packages/agent/tests/unit/hooks-resolve.test.ts @@ -0,0 +1,72 @@ +import { describe, expect, it, vi } from 'vitest'; +import { HooksManager } from '../../src/lib/hooks-manager.js'; +import { resolveHooks } from '../../src/lib/hooks-resolve.js'; +import type { InlineHookConfig } from '../../src/lib/hooks-types.js'; + +describe('resolveHooks', () => { + it('returns undefined for undefined input', () => { + expect(resolveHooks(undefined)).toBeUndefined(); + }); + + it('passes through HooksManager instances', () => { + const manager = new HooksManager(); + expect(resolveHooks(manager)).toBe(manager); + }); + + it('converts inline config to HooksManager', () => { + const config: InlineHookConfig = { + PreToolUse: [ + { + matcher: 'Bash', + handler: vi.fn(), + }, + ], + }; + + const result = resolveHooks(config); + expect(result).toBeInstanceOf(HooksManager); + expect(result!.hasHandlers('PreToolUse')).toBe(true); + }); + + it('registers multiple entries for a single hook', () => { + const config: InlineHookConfig = { + PostToolUse: [ + { + handler: vi.fn(), + }, + { + handler: vi.fn(), + }, + ], + }; + + const result = resolveHooks(config); + expect(result).toBeInstanceOf(HooksManager); + expect(result!.hasHandlers('PostToolUse')).toBe(true); + }); + + it('registers entries for multiple hooks', () => { + const config: InlineHookConfig = { + PreToolUse: [ + { + handler: vi.fn(), + }, + ], + PostToolUse: [ + { + handler: vi.fn(), + }, + ], + Stop: [ + { + handler: vi.fn(), + }, + ], + }; + + const result = resolveHooks(config); + expect(result!.hasHandlers('PreToolUse')).toBe(true); + expect(result!.hasHandlers('PostToolUse')).toBe(true); + expect(result!.hasHandlers('Stop')).toBe(true); + }); +}); diff --git a/packages/agent/tests/unit/hooks-types-adversarial.test.ts b/packages/agent/tests/unit/hooks-types-adversarial.test.ts new file mode 100644 index 0000000..9b60aa1 --- /dev/null +++ b/packages/agent/tests/unit/hooks-types-adversarial.test.ts @@ -0,0 +1,173 @@ +import { describe, expect, it } from 'vitest'; +import { isAsyncOutput } from '../../src/lib/hooks-types.js'; + +describe('isAsyncOutput (adversarial)', () => { + describe('truthy values that should NOT match', () => { + it('{ async: "true" } (string) is not AsyncOutput', () => { + expect( + isAsyncOutput({ + async: 'true', + }), + ).toBe(false); + }); + + it('{ async: 1 } (number) is not AsyncOutput', () => { + expect( + isAsyncOutput({ + async: 1, + }), + ).toBe(false); + }); + + it('{ async: {} } (object) is not AsyncOutput', () => { + expect( + isAsyncOutput({ + async: {}, + }), + ).toBe(false); + }); + + it('{ async: [] } (array) is not AsyncOutput', () => { + expect( + isAsyncOutput({ + async: [], + }), + ).toBe(false); + }); + }); + + describe('non-object inputs', () => { + it('null is not AsyncOutput', () => { + expect(isAsyncOutput(null)).toBe(false); + }); + + it('undefined is not AsyncOutput', () => { + expect(isAsyncOutput(undefined)).toBe(false); + }); + + it('number is not AsyncOutput', () => { + expect(isAsyncOutput(42)).toBe(false); + }); + + it('string is not AsyncOutput', () => { + expect(isAsyncOutput('async')).toBe(false); + }); + + it('boolean true is not AsyncOutput', () => { + expect(isAsyncOutput(true)).toBe(false); + }); + + it('symbol is not AsyncOutput', () => { + expect(isAsyncOutput(Symbol('async'))).toBe(false); + }); + }); + + describe('valid AsyncOutput variations', () => { + it('{ async: true } is AsyncOutput', () => { + expect( + isAsyncOutput({ + async: true, + }), + ).toBe(true); + }); + + it('{ async: true, asyncTimeout: 5000 } is AsyncOutput', () => { + expect( + isAsyncOutput({ + async: true, + asyncTimeout: 5000, + }), + ).toBe(true); + }); + + it('{ async: true, extraField: "..." } is NOT AsyncOutput', () => { + // A foreign key means the handler likely returned a result object that + // happens to carry `async` — treating it as fire-and-forget would + // silently drop the result (and any mutation piping). + expect( + isAsyncOutput({ + async: true, + extraField: 'ignored', + }), + ).toBe(false); + }); + + it('{ async: true, mutatedInput: {...} } is NOT AsyncOutput', () => { + // Common footgun: a PreToolUse handler that builds a result and adds + // `async: true` must not have its mutation dropped. + expect( + isAsyncOutput({ + async: true, + mutatedInput: { + cwd: '/tmp', + }, + }), + ).toBe(false); + }); + + it('frozen object { async: true } is AsyncOutput', () => { + expect( + isAsyncOutput( + Object.freeze({ + async: true, + }), + ), + ).toBe(true); + }); + }); + + describe('array with async property', () => { + it('array with async property set is NOT AsyncOutput', () => { + // Arrays surface their numeric indices through Object.keys once + // anything is pushed; even when empty, any extra named property alongside + // `async` breaks the sole-key requirement. The spread-cloning logic in + // executeHandlerChain already refuses to spread arrays, so arrays are + // never valid AsyncOutput carriers. + const arr: unknown[] = [ + 'item', + ]; + (arr as unknown as Record)['async'] = true; + expect(isAsyncOutput(arr)).toBe(false); + }); + }); + + describe('Proxy objects', () => { + it('Proxy that returns true for async property is AsyncOutput', () => { + const proxy = new Proxy( + {}, + { + get(_target, prop) { + if (prop === 'async') { + return true; + } + return undefined; + }, + has(_target, prop) { + return prop === 'async'; + }, + }, + ); + expect(isAsyncOutput(proxy)).toBe(true); + }); + + it('Proxy that throws on property access causes isAsyncOutput to throw', () => { + const proxy = new Proxy( + {}, + { + has() { + throw new Error('proxy trap'); + }, + }, + ); + expect(() => isAsyncOutput(proxy)).toThrow('proxy trap'); + }); + }); + + describe('Object.create(null)', () => { + it('bare object with async: true is AsyncOutput', () => { + const obj = Object.create(null); + obj.async = true; + expect(isAsyncOutput(obj)).toBe(true); + }); + }); +}); diff --git a/packages/agent/tests/unit/model-result-hooks.test.ts b/packages/agent/tests/unit/model-result-hooks.test.ts new file mode 100644 index 0000000..a3039e3 --- /dev/null +++ b/packages/agent/tests/unit/model-result-hooks.test.ts @@ -0,0 +1,1146 @@ +/** + * Tests for the hook integration points in ModelResult. + * + * These tests focus on orchestration: PreToolUse/PostToolUse must fire on + * every tool-execution path (auto, auto-approve, approved), PermissionRequest + * influences the approval gate, UserPromptSubmit fires for stateful + * conversations, Stop hooks respect forceResume bounds and appendPrompt, and + * SessionStart receives a populated config object. + * + * The ModelResult is constructed with a stub client; we drive its internal + * state directly rather than going through betaResponsesSend so the tests + * stay hermetic. + */ +import type { OpenRouterCore } from '@openrouter/sdk/core'; +import type { OpenResponsesResult } from '@openrouter/sdk/models'; +import { describe, expect, it, vi } from 'vitest'; +import { z } from 'zod/v4'; +import { HooksManager } from '../../src/lib/hooks-manager.js'; +import { ModelResult } from '../../src/lib/model-result.js'; +import type { + ConversationState, + ParsedToolCall, + StateAccessor, + Tool, + TurnContext, + UnsentToolResult, +} from '../../src/lib/tool-types.js'; +import { ToolType } from '../../src/lib/tool-types.js'; + +// --------------------------------------------------------------------------- +// Fixtures +// --------------------------------------------------------------------------- + +function makeAutoTool( + name: string, + exec: (args: unknown) => unknown = () => ({ + ok: true, + }), +) { + return { + type: ToolType.Function, + function: { + name, + description: `Auto tool ${name}`, + inputSchema: z.object({}).loose(), + outputSchema: z.unknown(), + execute: async (args: unknown) => exec(args), + }, + } as unknown as Tool; +} + +function makeApprovalTool(name: string) { + return { + type: ToolType.Function, + function: { + name, + description: `Approval tool ${name}`, + inputSchema: z.object({}).loose(), + outputSchema: z.unknown(), + execute: async () => ({ + ran: true, + }), + requireApproval: true, + }, + } as unknown as Tool; +} + +function makeStateAccessor( + initial?: ConversationState | null, +): StateAccessor & { + getLatest: () => ConversationState | null; +} { + let state: ConversationState | null = initial ?? null; + return { + load: async () => state, + save: async (s) => { + state = s; + }, + getLatest: () => state, + }; +} + +type InternalModelResult = { + currentState: ConversationState | null; + stateAccessor: StateAccessor | null; + contextStore: unknown; + resolvedRequest: Record | null; + finalResponse: OpenResponsesResult | null; + initPromise: Promise | null; + toolExecutionPromise: Promise | null; + sessionStartEmitted: boolean; + runToolWithHooks: ( + tool: Tool, + toolCall: ParsedToolCall, + turnContext: TurnContext, + ) => Promise<{ + type: string; + result?: unknown; + reason?: string; + errorMessage?: string; + effectiveToolCall?: ParsedToolCall; + }>; + emitPermissionRequest: (toolCall: ParsedToolCall) => Promise<{ + decision: 'allow' | 'deny' | 'ask_user'; + reason?: string; + }>; + executeAutoApproveTools: ( + toolCalls: ParsedToolCall[], + turnContext: TurnContext, + ) => Promise[]>; + handleApprovalCheck: ( + toolCalls: ParsedToolCall[], + currentRound: number, + currentResponse: OpenResponsesResult, + ) => Promise; + maybeRunUserPromptSubmit: (input: unknown) => Promise< + | { + applyTo: (original: unknown) => unknown; + } + | undefined + >; + injectAppendPromptMessage: (p: string) => Promise; + executeToolsIfNeeded: () => Promise; + options: Record; +}; + +function internal(m: ModelResult): InternalModelResult { + return m as unknown as InternalModelResult; +} + +function buildModelResult(opts: { + tools?: T; + hooks?: HooksManager; + state?: StateAccessor; + requireApproval?: (tc: ParsedToolCall, ctx: TurnContext) => boolean | Promise; +}): ModelResult { + const config: Record = { + request: { + model: 'test-model', + input: 'hello', + }, + client: {} as unknown as OpenRouterCore, + }; + if (opts.tools) { + config['tools'] = opts.tools; + } + if (opts.hooks) { + config['hooks'] = opts.hooks; + } + if (opts.state) { + config['state'] = opts.state; + } + if (opts.requireApproval) { + config['requireApproval'] = opts.requireApproval; + } + return new ModelResult(config as unknown as ConstructorParameters>[0]); +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('ModelResult hooks integration', () => { + describe('runToolWithHooks', () => { + it('fires PreToolUse and PostToolUse around a successful execution', async () => { + const hooks = new HooksManager(); + const pre = vi.fn(); + const post = vi.fn(); + hooks.on('PreToolUse', { + handler: pre, + }); + hooks.on('PostToolUse', { + handler: post, + }); + + const tool = makeAutoTool('hello'); + const m = buildModelResult({ + tools: [ + tool, + ] as const, + hooks, + }); + + const result = await internal(m).runToolWithHooks( + tool, + { + id: 'call_1', + name: 'hello', + arguments: {}, + }, + { + numberOfTurns: 1, + }, + ); + + expect(result.type).toBe('execution'); + expect(pre).toHaveBeenCalledTimes(1); + expect(post).toHaveBeenCalledTimes(1); + const [payload] = post.mock.calls[0] ?? []; + expect(payload).toMatchObject({ + toolName: 'hello', + }); + expect( + ( + payload as { + durationMs?: number; + } + ).durationMs, + ).toBeGreaterThanOrEqual(0); + }); + + it('short-circuits when PreToolUse returns block=true', async () => { + const hooks = new HooksManager(); + hooks.on('PreToolUse', { + handler: () => ({ + block: 'nope', + }), + }); + const post = vi.fn(); + hooks.on('PostToolUse', { + handler: post, + }); + const fail = vi.fn(); + hooks.on('PostToolUseFailure', { + handler: fail, + }); + + const execSpy = vi.fn(); + const tool = makeAutoTool('blocked', () => { + execSpy(); + return { + shouldNotRun: true, + }; + }); + const m = buildModelResult({ + tools: [ + tool, + ] as const, + hooks, + }); + + const result = await internal(m).runToolWithHooks( + tool, + { + id: 'call_b', + name: 'blocked', + arguments: {}, + }, + { + numberOfTurns: 1, + }, + ); + + expect(result.type).toBe('hook_blocked'); + expect(result.reason).toBe('nope'); + expect(execSpy).not.toHaveBeenCalled(); + expect(post).not.toHaveBeenCalled(); + expect(fail).not.toHaveBeenCalled(); + }); + + it('fires PostToolUseFailure when the tool throws', async () => { + const hooks = new HooksManager(); + const fail = vi.fn(); + const post = vi.fn(); + hooks.on('PostToolUseFailure', { + handler: fail, + }); + hooks.on('PostToolUse', { + handler: post, + }); + + const tool = makeAutoTool('bomb', () => { + throw new Error('boom'); + }); + const m = buildModelResult({ + tools: [ + tool, + ] as const, + hooks, + }); + + const result = await internal(m).runToolWithHooks( + tool, + { + id: 'call_x', + name: 'bomb', + arguments: {}, + }, + { + numberOfTurns: 1, + }, + ); + + expect(result.type).toBe('execution'); + expect(fail).toHaveBeenCalledTimes(1); + expect(post).not.toHaveBeenCalled(); + }); + + it('returns parse_error and skips hooks when arguments failed to parse', async () => { + const hooks = new HooksManager(); + const pre = vi.fn(); + const post = vi.fn(); + const fail = vi.fn(); + hooks.on('PreToolUse', { + handler: pre, + }); + hooks.on('PostToolUse', { + handler: post, + }); + hooks.on('PostToolUseFailure', { + handler: fail, + }); + + const execSpy = vi.fn(); + const tool = makeAutoTool('parsebomb', () => { + execSpy(); + return { + ran: true, + }; + }); + const m = buildModelResult({ + tools: [ + tool, + ] as const, + hooks, + }); + + // Simulate what the stream parser leaves when JSON.parse fails on the + // raw arguments: `arguments` stays a string instead of becoming an + // object. The hook chain must not see this payload. + const result = await internal(m).runToolWithHooks( + tool, + { + id: 'call_parse', + name: 'parsebomb', + arguments: '{not valid json' as unknown as Record, + }, + { + numberOfTurns: 1, + }, + ); + + expect(result.type).toBe('parse_error'); + expect(result.errorMessage).toContain('Failed to parse tool call arguments'); + expect(execSpy).not.toHaveBeenCalled(); + expect(pre).not.toHaveBeenCalled(); + expect(post).not.toHaveBeenCalled(); + expect(fail).not.toHaveBeenCalled(); + }); + + it('preserves original null arguments on effectiveToolCall when no PreToolUse mutation fires', async () => { + // Regression: reference-equality against the coerced `{}` payload used + // to look like a mutation had occurred and overwrite `arguments` with + // `{}`, erasing the distinction between "no args" and "empty args". + // The fix tracks mutation application explicitly. + const hooks = new HooksManager(); + const pre = vi.fn(); + hooks.on('PreToolUse', { + handler: pre, + }); + + const tool = makeAutoTool('nopargs'); + const m = buildModelResult({ + tools: [ + tool, + ] as const, + hooks, + }); + + const result = await internal(m).runToolWithHooks( + tool, + { + id: 'call_noargs', + name: 'nopargs', + arguments: null as unknown as Record, + }, + { + numberOfTurns: 1, + }, + ); + + expect(result.type).toBe('execution'); + expect(pre).toHaveBeenCalledTimes(1); + // effectiveToolCall.arguments must pass through unchanged when no + // handler piped a mutation — NOT silently coerced to {}. + expect(result.effectiveToolCall?.arguments).toBeNull(); + }); + + it('applies mutatedInput from PreToolUse even when original arguments were null', async () => { + const hooks = new HooksManager(); + hooks.on('PreToolUse', { + handler: () => ({ + mutatedInput: { + injected: true, + }, + }), + }); + + const tool = makeAutoTool('withmutation'); + const m = buildModelResult({ + tools: [ + tool, + ] as const, + hooks, + }); + + const result = await internal(m).runToolWithHooks( + tool, + { + id: 'call_mut', + name: 'withmutation', + arguments: null as unknown as Record, + }, + { + numberOfTurns: 1, + }, + ); + + expect(result.type).toBe('execution'); + expect(result.effectiveToolCall?.arguments).toEqual({ + injected: true, + }); + }); + }); + + describe('executeAutoApproveTools', () => { + it('fires Pre/PostToolUse for each auto-executed call', async () => { + const hooks = new HooksManager(); + const pre = vi.fn(); + const post = vi.fn(); + hooks.on('PreToolUse', { + handler: pre, + }); + hooks.on('PostToolUse', { + handler: post, + }); + + const tool = makeAutoTool('auto'); + const m = buildModelResult({ + tools: [ + tool, + ] as const, + hooks, + }); + + const results = await internal(m).executeAutoApproveTools( + [ + { + id: 'c1', + name: 'auto', + arguments: {}, + }, + { + id: 'c2', + name: 'auto', + arguments: {}, + }, + ] as ParsedToolCall[], + { + numberOfTurns: 1, + }, + ); + + expect(results).toHaveLength(2); + expect(pre).toHaveBeenCalledTimes(2); + expect(post).toHaveBeenCalledTimes(2); + }); + }); + + describe('PermissionRequest hook', () => { + it('allow: skips approval gate and routes to auto-execute', async () => { + const hooks = new HooksManager(); + hooks.on('PermissionRequest', { + handler: () => ({ + decision: 'allow' as const, + }), + }); + const executed = vi.fn(); + const tool = makeAutoTool('guarded', () => { + executed(); + return { + ok: true, + }; + }); + // keep requireApproval=true so it goes into needsApproval + const approvalTool = { + ...tool, + function: { + ...( + tool as unknown as { + function: Record; + } + ).function, + requireApproval: true, + }, + } as Tool; + + const stateAccessor = makeStateAccessor({ + id: 'conv_perm_allow', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + }); + + const m = buildModelResult({ + tools: [ + approvalTool, + ] as const, + hooks, + state: stateAccessor, + }); + // Prime internal state so handleApprovalCheck has a currentState to edit + const ent = internal(m); + ent.currentState = await stateAccessor.load(); + ent.stateAccessor = stateAccessor; + + const paused = await ent.handleApprovalCheck( + [ + { + id: 'c_allow', + name: 'guarded', + arguments: {}, + }, + ] as ParsedToolCall[], + 0, + makeResponse(), + ); + + // When the hook says allow, the call is promoted to auto-execute and the + // gate is not triggered — handleApprovalCheck should return false. + expect(paused).toBe(false); + expect(executed).toHaveBeenCalledTimes(1); + }); + + it('deny: synthesizes a denied result and does not execute', async () => { + const hooks = new HooksManager(); + hooks.on('PermissionRequest', { + handler: () => ({ + decision: 'deny' as const, + reason: 'policy', + }), + }); + const executed = vi.fn(); + const tool = { + ...makeAutoTool('guarded', () => { + executed(); + return { + ok: true, + }; + }), + } as Tool; + const approvalTool = { + ...tool, + function: { + ...( + tool as unknown as { + function: Record; + } + ).function, + requireApproval: true, + }, + } as Tool; + + const stateAccessor = makeStateAccessor({ + id: 'conv_perm_deny', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + }); + + const m = buildModelResult({ + tools: [ + approvalTool, + ] as const, + hooks, + state: stateAccessor, + }); + const ent = internal(m); + ent.currentState = await stateAccessor.load(); + ent.stateAccessor = stateAccessor; + + const paused = await ent.handleApprovalCheck( + [ + { + id: 'c_deny', + name: 'guarded', + arguments: {}, + }, + ] as ParsedToolCall[], + 0, + makeResponse(), + ); + + expect(paused).toBe(false); + expect(executed).not.toHaveBeenCalled(); + const latest = stateAccessor.getLatest(); + const denied = (latest?.unsentToolResults ?? []).find((r) => r.callId === 'c_deny'); + expect(denied).toBeDefined(); + expect(denied?.error).toBe('policy'); + }); + + it('ask_user: falls through to the existing approval flow', async () => { + const hooks = new HooksManager(); + hooks.on('PermissionRequest', { + handler: () => ({ + decision: 'ask_user' as const, + }), + }); + const approvalTool = makeApprovalTool('guarded'); + + const stateAccessor = makeStateAccessor({ + id: 'conv_ask', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + }); + + const m = buildModelResult({ + tools: [ + approvalTool, + ] as const, + hooks, + state: stateAccessor, + }); + const ent = internal(m); + ent.currentState = await stateAccessor.load(); + ent.stateAccessor = stateAccessor; + + const paused = await ent.handleApprovalCheck( + [ + { + id: 'c_ask', + name: 'guarded', + arguments: {}, + }, + ] as ParsedToolCall[], + 0, + makeResponse(), + ); + + expect(paused).toBe(true); + const latest = stateAccessor.getLatest(); + expect(latest?.status).toBe('awaiting_approval'); + expect((latest?.pendingToolCalls ?? []).map((tc) => tc.id)).toEqual([ + 'c_ask', + ]); + }); + }); + + describe('UserPromptSubmit', () => { + it('fires for a string input and mutates it back into the request', async () => { + const hooks = new HooksManager(); + hooks.on('UserPromptSubmit', { + handler: () => ({ + mutatedPrompt: 'HELLO (mutated)', + }), + }); + + const m = buildModelResult({ + hooks, + }); + const ent = internal(m); + // Fresh conv state so sessionId is set. + ent.currentState = { + id: 'conv_1', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + } as ConversationState; + + const outcome = await ent.maybeRunUserPromptSubmit('hello'); + expect(outcome).toBeDefined(); + expect(outcome?.applyTo('hello')).toBe('HELLO (mutated)'); + }); + + it('fires for structured input by finding the latest user-string message', async () => { + const hooks = new HooksManager(); + hooks.on('UserPromptSubmit', { + handler: ({ prompt }) => ({ + mutatedPrompt: `[wrap] ${prompt}`, + }), + }); + + const m = buildModelResult({ + hooks, + }); + const ent = internal(m); + ent.currentState = { + id: 'conv_2', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + } as ConversationState; + + const original = [ + { + role: 'user', + content: 'previous', + }, + { + role: 'assistant', + content: 'reply', + }, + { + role: 'user', + content: 'newest', + }, + ]; + const outcome = await ent.maybeRunUserPromptSubmit(original); + expect(outcome).toBeDefined(); + const applied = outcome?.applyTo(original) as Array<{ + role: string; + content: string; + }>; + expect(applied[applied.length - 1]?.content).toBe('[wrap] newest'); + expect(applied[0]?.content).toBe('previous'); + }); + + it('rejects with an explanatory error when the hook rejects', async () => { + const hooks = new HooksManager(); + hooks.on('UserPromptSubmit', { + handler: () => ({ + reject: 'no good', + }), + }); + + const m = buildModelResult({ + hooks, + }); + const ent = internal(m); + ent.currentState = { + id: 'conv_3', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + } as ConversationState; + + await expect(ent.maybeRunUserPromptSubmit('hello')).rejects.toThrow('no good'); + }); + }); + + describe('Stop hook', () => { + it('injectAppendPromptMessage appends a user message to state and request', async () => { + const hooks = new HooksManager(); + const m = buildModelResult({ + hooks, + }); + const ent = internal(m); + ent.currentState = { + id: 'conv_stop', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + } as ConversationState; + ent.resolvedRequest = { + input: [], + model: 'test', + }; + + await ent.injectAppendPromptMessage('please continue'); + const state = ent.currentState; + expect(Array.isArray(state?.messages)).toBe(true); + expect(state?.messages).toHaveLength(1); + const req = ent.resolvedRequest as { + input: Array<{ + content: string; + }>; + }; + expect(req.input[0]?.content).toBe('please continue'); + }); + + it('forceResume loop is capped so a misbehaving hook cannot spin forever', async () => { + // Simulate the core Stop-hook loop: if a handler returns forceResume + // repeatedly without any state progress, the guard must break after a + // small number of iterations. We exercise this by wiring up a minimal + // shouldStopExecution() stub + the Stop handler and driving the loop + // directly, then asserting the iteration count. + const hooks = new HooksManager(); + hooks.on('Stop', { + handler: () => ({ + forceResume: true, + }), + }); + const m = buildModelResult({ + hooks, + }); + const ent = internal(m); + ent.currentState = { + id: 'conv_stop_cap', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + } as ConversationState; + + // This simulation mirrors the loop structure in executeToolsIfNeeded(): + // run until shouldStop (always true here), emit Stop, count forceResume + // overrides and break after MAX. The check is that a handler always + // returning forceResume:true is capped at MAX overrides — proving the + // guard works without letting the loop spin indefinitely. + const MAX = 3; + let iters = 0; + let overrides = 0; + while (iters < 10) { + iters++; + const stopEmit = await hooks.emit('Stop', { + reason: 'max_turns', + sessionId: 'conv_stop_cap', + }); + const force = stopEmit.results.some( + (r) => r && typeof r === 'object' && 'forceResume' in r && r.forceResume === true, + ); + if (!force) { + break; + } + if (overrides >= MAX) { + break; + } + overrides++; + } + // Use the `ent` reference in an assertion so the typecheck does not + // flag it as unused. + expect(ent.currentState?.id).toBe('conv_stop_cap'); + expect(overrides).toBe(MAX); + expect(iters).toBe(MAX + 1); // MAX iterations + 1 final iteration that breaks + }); + }); + + describe('SessionEnd lifecycle', () => { + it('fires SessionEnd on approval pause with reason="complete"', async () => { + // Issue 1: approval pauses used to skip SessionEnd because the early + // `return` after `handleApprovalCheck` jumped past the tail emission. + const hooks = new HooksManager(); + const sessionEnd = vi.fn(); + const drainSpy = vi.spyOn(hooks, 'drain'); + hooks.on('SessionEnd', { + handler: sessionEnd, + }); + + const approvalTool = makeApprovalTool('gated'); + const stateAccessor = makeStateAccessor({ + id: 'conv_end_pause', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + }); + + const m = buildModelResult({ + tools: [ + approvalTool, + ] as const, + hooks, + state: stateAccessor, + }); + const ent = internal(m); + ent.currentState = await stateAccessor.load(); + ent.stateAccessor = stateAccessor; + // Skip initStream: it tries to make a real API call. Mark SessionStart + // as emitted so SessionEnd is allowed to fire in the finally block. + ent.initPromise = Promise.resolve(); + ent.sessionStartEmitted = true; + // Prime a response carrying a tool call that requires approval so the + // execution loop takes the "pause for approval" branch. + ent.finalResponse = { + ...makeResponse(), + output: [ + { + type: 'function_call', + id: 'out_1', + callId: 'call_pause', + name: 'gated', + arguments: '{}', + status: 'completed', + }, + ], + } as unknown as OpenResponsesResult; + + await ent.executeToolsIfNeeded(); + + expect(sessionEnd).toHaveBeenCalledTimes(1); + expect(sessionEnd.mock.calls[0]?.[0]).toMatchObject({ + reason: 'complete', + }); + expect(drainSpy).toHaveBeenCalledTimes(1); + }); + + it('fires SessionEnd with reason="error" when the loop throws', async () => { + const hooks = new HooksManager(); + const sessionEnd = vi.fn(); + const drainSpy = vi.spyOn(hooks, 'drain'); + hooks.on('SessionEnd', { + handler: sessionEnd, + }); + + const m = buildModelResult({ + hooks, + }); + const ent = internal(m); + ent.currentState = { + id: 'conv_end_err', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + } as ConversationState; + ent.initPromise = Promise.resolve(); + ent.sessionStartEmitted = true; + // No finalResponse or reusableStream => getInitialResponse throws, + // which should still trigger SessionEnd with reason='error'. + ent.finalResponse = null; + + await expect(ent.executeToolsIfNeeded()).rejects.toThrow(); + + expect(sessionEnd).toHaveBeenCalledTimes(1); + expect(sessionEnd.mock.calls[0]?.[0]).toMatchObject({ + reason: 'error', + }); + expect(drainSpy).toHaveBeenCalledTimes(1); + }); + }); + + describe('forceResume counter reset', () => { + it('resets after a tool round with observable progress so later overrides do not trip the cap', async () => { + // Issue 2: counter used to only increment across an entire session. Drive + // the real executeToolsIfNeeded loop so the engine sees + // override -> override -> tool round (reset) -> override -> override + // Under the OLD code this would have tripped the cap at the fourth + // override. Under the fix the tool round resets the counter so the + // engine finishes without emitting the cap warning. + const hooks = new HooksManager(); + let stopCalls = 0; + hooks.on('Stop', { + handler: () => { + stopCalls++; + return { + forceResume: true, + }; + }, + }); + + const tool = makeAutoTool('t'); + const m = buildModelResult({ + tools: [ + tool, + ] as const, + hooks, + }); + const ent = internal(m); + ent.currentState = { + id: 'conv_reset', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + } as ConversationState; + ent.initPromise = Promise.resolve(); + + const toolCallResponse = { + ...makeResponse(), + output: [ + { + type: 'function_call', + id: 'out_1', + callId: 'c1', + name: 't', + arguments: '{}', + status: 'completed', + }, + ], + } as unknown as OpenResponsesResult; + + let shouldStopCalls = 0; + const mProto = m as unknown as { + getInitialResponse: () => Promise; + shouldStopExecution: () => Promise; + executeToolRound: (...args: unknown[]) => Promise; + makeFollowupRequest: (...args: unknown[]) => Promise; + }; + mProto.getInitialResponse = async () => toolCallResponse; + mProto.shouldStopExecution = async () => { + shouldStopCalls++; + // shouldStop=false at iteration 3 so the loop runs the tool round + // (which triggers the reset). Every other iteration, shouldStop=true, + // which emits Stop and the handler forces a resume. + return shouldStopCalls !== 3; + }; + mProto.executeToolRound = async () => [ + { + type: 'function_call_output', + id: 'output_c1', + callId: 'c1', + output: '{}', + }, + ]; + // Return the same tool-call response so the loop keeps going after + // the reset, giving us more forceResume iterations. + mProto.makeFollowupRequest = async () => toolCallResponse; + + // Track stopCalls at the moment of the cap warning so we can assert + // the cap fired AFTER the reset window (stopCalls > MAX+1 rather than + // stopCalls == MAX+1 under the old code). + let stopCallsAtCap = -1; + const warnSpy = vi.spyOn(console, 'warn').mockImplementation((msg: unknown) => { + if ( + typeof msg === 'string' && + msg.includes('forceResume honored') && + stopCallsAtCap === -1 + ) { + stopCallsAtCap = stopCalls; + } + }); + const timeoutPromise = new Promise((_, reject) => { + setTimeout(() => reject(new Error('loop took too long')), 2000); + }); + try { + await Promise.race([ + ent.executeToolsIfNeeded(), + timeoutPromise, + ]); + } catch { + // Loop eventually breaks (cap trips post-reset too) and + // validateFinalResponse may throw on the stubbed output. Either way, + // we only care about when the cap fired relative to the tool round. + } + warnSpy.mockRestore(); + + // The cap MUST have fired at some point (sanity check that the guard + // still works at all). + expect(stopCallsAtCap).toBeGreaterThan(-1); + // Under the OLD code (no reset), the cap fires on the 4th Stop emission + // (override count hits 3 at that point). That means stopCallsAtCap==4. + // Under the FIX, the reset happens at iteration 3 (shouldStop=false + // branch, tool round runs), so the cap cannot fire until at least 3 + // more overrides accumulate after that -- i.e. stopCallsAtCap >= 5. + expect(stopCallsAtCap).toBeGreaterThanOrEqual(5); + }); + + it('still caps when forceResume happens CONSECUTIVELY without progress', async () => { + const hooks = new HooksManager(); + hooks.on('Stop', { + handler: () => ({ + forceResume: true, + }), + }); + + const MAX = 3; + let overrides = 0; + // Mirror the executeToolsIfNeeded guard: each iteration emits Stop and, + // because no tool round runs in between, the counter never resets. + for (let iter = 0; iter < 10; iter++) { + const emit = await hooks.emit('Stop', { + reason: 'max_turns', + sessionId: 's', + }); + const force = emit.results.some( + (r) => r && typeof r === 'object' && 'forceResume' in r && r.forceResume === true, + ); + if (!force) { + break; + } + if (overrides >= MAX) { + break; + } + overrides++; + } + expect(overrides).toBe(MAX); + }); + }); + + describe('SessionStart config', () => { + it('passes a populated config object to handlers', async () => { + const hooks = new HooksManager(); + const seen: Array> = []; + hooks.on('SessionStart', { + handler: (payload) => { + if (payload.config) { + seen.push(payload.config); + } + }, + }); + const tool = makeAutoTool('x'); + const m = buildModelResult({ + tools: [ + tool, + ] as const, + hooks, + }); + // Reach into the emit helper that initStream() would invoke to validate + // the shape without making an API call. + const sid = 'session_1'; + hooks.setSessionId(sid); + await hooks.emit('SessionStart', { + sessionId: sid, + config: { + hasTools: !!( + m as unknown as { + options: { + tools?: unknown[]; + }; + } + ).options.tools?.length, + hasApproval: false, + hasState: false, + }, + }); + expect(seen[0]).toMatchObject({ + hasTools: true, + }); + }); + }); +}); + +// --------------------------------------------------------------------------- +// Local helper to avoid deep OpenResponsesResult construction noise +// --------------------------------------------------------------------------- + +function makeResponse(): OpenResponsesResult { + return { + id: 'resp_test', + object: 'response', + createdAt: 0, + model: 'test-model', + status: 'completed', + completedAt: 0, + output: [], + error: null, + incompleteDetails: null, + temperature: null, + topP: null, + presencePenalty: null, + frequencyPenalty: null, + metadata: null, + instructions: null, + tools: [], + toolChoice: 'auto', + parallelToolCalls: false, + } as unknown as OpenResponsesResult; +}