From 2982107f1f34e88eb24622e007effa51ef20756e Mon Sep 17 00:00:00 2001 From: Matt Apperson Date: Fri, 3 Apr 2026 11:31:39 -0400 Subject: [PATCH 1/7] feat: add HooksManager lifecycle hook system to callModel Implements an extensible hook system for callModel inspired by the Claude Agent SDK hooks pattern. Supports inline config for quick declarative usage and a HooksManager class for custom hooks, dynamic registration, and programmatic emit. Built-in hooks: PreToolUse, PostToolUse, PostToolUseFailure, UserPromptSubmit, Stop, PermissionRequest, SessionStart, SessionEnd. Features: tool matchers (string/RegExp/function), filter predicates, mutation piping, short-circuit on block/reject, async fire-and-forget handlers, configurable error handling, and custom hook definitions via Zod schema pairs. --- src/index.ts | 48 ++++++ src/inner-loop/call-model.ts | 3 + src/lib/async-params.ts | 5 + src/lib/hooks-emit.ts | 140 ++++++++++++++++ src/lib/hooks-manager.ts | 175 ++++++++++++++++++++ src/lib/hooks-matchers.ts | 25 +++ src/lib/hooks-resolve.ts | 31 ++++ src/lib/hooks-schemas.ts | 97 ++++++++++++ src/lib/hooks-types.ts | 255 ++++++++++++++++++++++++++++++ src/lib/model-result.ts | 131 ++++++++++++++- tests/unit/hooks-emit.test.ts | 216 +++++++++++++++++++++++++ tests/unit/hooks-manager.test.ts | 211 ++++++++++++++++++++++++ tests/unit/hooks-matchers.test.ts | 27 ++++ tests/unit/hooks-resolve.test.ts | 56 +++++++ 14 files changed, 1417 insertions(+), 3 deletions(-) create mode 100644 src/lib/hooks-emit.ts create mode 100644 src/lib/hooks-manager.ts create mode 100644 src/lib/hooks-matchers.ts create mode 100644 src/lib/hooks-resolve.ts create mode 100644 src/lib/hooks-schemas.ts create mode 100644 src/lib/hooks-types.ts create mode 100644 tests/unit/hooks-emit.test.ts create mode 100644 tests/unit/hooks-manager.test.ts create mode 100644 tests/unit/hooks-matchers.test.ts create mode 100644 tests/unit/hooks-resolve.test.ts diff --git a/src/index.ts b/src/index.ts index 0ffd87a..f47fef2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -110,3 +110,51 @@ export { } from './lib/tool-types.js'; // Turn context helpers export { buildTurnContext, normalizeInputToArray } from './lib/turn-context.js'; + +// Hooks system +export { HooksManager } from './lib/hooks-manager.js'; +export { resolveHooks } from './lib/hooks-resolve.js'; +export { matchesTool } from './lib/hooks-matchers.js'; +export { HookName } from './lib/hooks-types.js'; +export type { + HookContext, + HookHandler, + HookEntry, + HookReturn, + HookRegistry, + HookDefinition, + AsyncOutput, + ToolMatcher, + EmitResult, + InlineHookConfig, + HooksManagerOptions, + BuiltInHookDefinitions, + PreToolUsePayload, + PreToolUseResult, + PostToolUsePayload, + PostToolUseFailurePayload, + StopPayload, + StopResult, + PermissionRequestPayload, + PermissionRequestResult, + UserPromptSubmitPayload, + UserPromptSubmitResult, + SessionStartPayload, + SessionEndPayload, +} from './lib/hooks-types.js'; +export { + BUILT_IN_HOOKS, + BUILT_IN_HOOK_NAMES, + PreToolUsePayloadSchema, + PreToolUseResultSchema, + PostToolUsePayloadSchema, + PostToolUseFailurePayloadSchema, + StopPayloadSchema, + StopResultSchema, + PermissionRequestPayloadSchema, + PermissionRequestResultSchema, + UserPromptSubmitPayloadSchema, + UserPromptSubmitResultSchema, + SessionStartPayloadSchema, + SessionEndPayloadSchema, +} from './lib/hooks-schemas.js'; diff --git a/src/inner-loop/call-model.ts b/src/inner-loop/call-model.ts index ba12a32..67dc05b 100644 --- a/src/inner-loop/call-model.ts +++ b/src/inner-loop/call-model.ts @@ -6,6 +6,7 @@ import type { Tool } from '../lib/tool-types.js'; import { type GetResponseOptions, ModelResult } from '../lib/model-result.js'; import { convertToolsToAPIFormat } from '../lib/tool-executor.js'; +import { resolveHooks } from '../lib/hooks-resolve.js'; // Re-export CallModelInput for convenience export type { CallModelInput } from '../lib/async-params.js'; @@ -91,6 +92,7 @@ export function callModel< sharedContextSchema, onTurnStart, onTurnEnd, + hooks, ...apiRequest } = request; @@ -152,5 +154,6 @@ export function callModel< ...(onTurnEnd !== undefined && { onTurnEnd, }), + ...(hooks !== undefined && { hooks: resolveHooks(hooks) }), } as GetResponseOptions); } diff --git a/src/lib/async-params.ts b/src/lib/async-params.ts index 39588e9..8c5caf2 100644 --- a/src/lib/async-params.ts +++ b/src/lib/async-params.ts @@ -1,6 +1,8 @@ import type * as models from '@openrouter/sdk/models'; import type { OpenResponsesResult } from '@openrouter/sdk/models'; import type { ContextInput } from './tool-context.js'; +import type { InlineHookConfig } from './hooks-types.js'; +import type { HooksManager } from './hooks-manager.js'; import type { ParsedToolCall, StateAccessor, @@ -78,6 +80,8 @@ type BaseCallModelInput< * Receives the turn context and the completed response for that turn */ onTurnEnd?: (context: TurnContext, response: OpenResponsesResult) => void | Promise; + /** Hook system for lifecycle events. Accepts inline config or a HooksManager instance. */ + hooks?: InlineHookConfig | HooksManager; }; /** @@ -178,6 +182,7 @@ export async function resolveAsyncFunctions toolInput, mutatedPrompt -> prompt) + * - Short-circuit on block/reject fields + */ +export async function executeHandlerChain( + entries: ReadonlyArray>, + initialPayload: P, + context: HookContext, + options: ExecuteChainOptions, +): Promise> { + const results: R[] = []; + const pending: Promise[] = []; + let currentPayload = { ...initialPayload } as P; + let blocked = false; + + const blockField = BLOCK_FIELDS[options.hookName]; + const canBlock = BLOCK_HOOKS.has(options.hookName); + + for (let i = 0; i < entries.length; i++) { + const entry = entries[i]; + if (!entry) continue; + + // Matcher check for tool-scoped hooks + if ( + entry.matcher !== undefined && + options.toolName !== undefined && + !matchesTool(entry.matcher, options.toolName) + ) { + continue; + } + + // Filter check + if (entry.filter && !entry.filter(currentPayload)) { + continue; + } + + try { + const returnValue = await entry.handler(currentPayload, context); + + // Async fire-and-forget + if (isAsyncOutput(returnValue)) { + const asyncOutput = returnValue as AsyncOutput; + const timeout = asyncOutput.asyncTimeout ?? DEFAULT_ASYNC_TIMEOUT; + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), timeout); + + const asyncPromise = Promise.resolve().then(() => { + clearTimeout(timeoutId); + }); + pending.push(asyncPromise); + continue; + } + + // Void / undefined -- side-effect only, continue + if (returnValue === undefined || returnValue === null) { + continue; + } + + const result = returnValue as R; + results.push(result); + + // Apply mutation piping + currentPayload = applyMutations(currentPayload, result); + + // Short-circuit on block + if (canBlock && blockField && isBlockTriggered(result, blockField)) { + blocked = true; + break; + } + } catch (error) { + if (options.throwOnHandlerError) { + throw error; + } + console.warn( + `[HooksManager] Handler ${i} for hook "${options.hookName}" threw:`, + error, + ); + } + } + + return { results, pending, finalPayload: currentPayload, blocked }; +} + +/** + * Apply mutation fields from a result onto the current payload. + */ +function applyMutations(payload: P, result: R): P { + if (typeof result !== 'object' || result === null) { + return payload; + } + + let mutated = payload; + for (const [resultField, payloadField] of Object.entries(MUTATION_FIELD_MAP)) { + if (resultField in result) { + const value = (result as Record)[resultField]; + if (value !== undefined) { + mutated = { ...mutated, [payloadField]: value }; + } + } + } + return mutated; +} + +/** + * Check if a result triggers a short-circuit block. + */ +function isBlockTriggered(result: R, blockField: string): boolean { + if (typeof result !== 'object' || result === null) { + return false; + } + const value = (result as Record)[blockField]; + return value === true || typeof value === 'string'; +} diff --git a/src/lib/hooks-manager.ts b/src/lib/hooks-manager.ts new file mode 100644 index 0000000..fe0ed47 --- /dev/null +++ b/src/lib/hooks-manager.ts @@ -0,0 +1,175 @@ +import type { infer as zodInfer } from 'zod/v4/core'; +import type { + BuiltInHookDefinitions, + EmitResult, + HookContext, + HookEntry, + HookHandler, + HookRegistry, + HooksManagerOptions, + ToolMatcher, +} from './hooks-types.js'; +import { BUILT_IN_HOOK_NAMES } from './hooks-schemas.js'; +import { executeHandlerChain } from './hooks-emit.js'; + +//#region Types + +type AllHooks = BuiltInHookDefinitions & { + [K in keyof Custom]: { + payload: zodInfer; + result: zodInfer; + }; +}; + +type PayloadOf, K extends keyof H> = H[K]['payload']; +type ResultOf, K extends keyof H> = H[K]['result']; + +interface EntryRegistration { + readonly handler: HookHandler; + readonly matcher?: ToolMatcher; + readonly filter?: (payload: P) => boolean; +} + +//#endregion + +/** + * Typed, extensible hook system for agent lifecycle events. + * + * Supports both built-in hooks (PreToolUse, PostToolUse, etc.) and + * user-defined custom hooks with full type safety. + */ +export class HooksManager> { + private readonly entries = new Map[]>(); + private readonly pendingAsync: Promise[] = []; + private readonly throwOnHandlerError: boolean; + private sessionId = ''; + + constructor(customHooks?: Custom, options?: HooksManagerOptions) { + this.throwOnHandlerError = options?.throwOnHandlerError ?? false; + + // Validate no collisions between custom and built-in hook names + if (customHooks) { + for (const name of Object.keys(customHooks)) { + if (BUILT_IN_HOOK_NAMES.has(name)) { + throw new Error( + `Custom hook name "${name}" collides with a built-in hook. Choose a different name.`, + ); + } + } + } + } + + /** + * Set the session ID used in HookContext for all handler invocations. + */ + setSessionId(sessionId: string): void { + this.sessionId = sessionId; + } + + /** + * Register a handler for a hook. Returns an unsubscribe function. + */ + on & string>( + hookName: K, + entry: EntryRegistration, K>, ResultOf, K>>, + ): () => void { + return this.registerEntry(hookName, entry as HookEntry); + } + + /** + * Internal: register an untyped entry. Used by resolveHooks for inline config normalization. + * @internal + */ + registerEntry(hookName: string, entry: HookEntry): () => void { + const list = this.entries.get(hookName) ?? []; + list.push(entry); + this.entries.set(hookName, list); + + return () => { + const current = this.entries.get(hookName); + if (!current) return; + const idx = current.indexOf(entry); + if (idx !== -1) { + current.splice(idx, 1); + } + }; + } + + /** + * Remove a specific handler function from a hook. + * Returns true if found and removed, false otherwise. + */ + off & string>( + hookName: K, + handler: HookHandler, K>, ResultOf, K>>, + ): boolean { + const list = this.entries.get(hookName); + if (!list) return false; + + const idx = list.findIndex((e) => e.handler === handler); + if (idx === -1) return false; + + list.splice(idx, 1); + return true; + } + + /** + * Remove all handlers for a specific hook, or all handlers if no name given. + */ + removeAll & string>(hookName?: K): void { + if (hookName) { + this.entries.delete(hookName); + } else { + this.entries.clear(); + } + } + + /** + * Validate the payload, invoke matching handlers, and return results. + */ + async emit & string>( + hookName: K, + payload: PayloadOf, K>, + emitContext?: { toolName?: string }, + ): Promise, K>, PayloadOf, K>>> { + const list = this.entries.get(hookName) ?? []; + + const context: HookContext = { + signal: new AbortController().signal, + hookName, + sessionId: this.sessionId, + }; + + const result = await executeHandlerChain( + list as ReadonlyArray, K>, ResultOf, K>>>, + payload, + context, + { + hookName, + throwOnHandlerError: this.throwOnHandlerError, + toolName: emitContext?.toolName, + }, + ); + + // Track pending async work for drain() + this.pendingAsync.push(...result.pending); + + return result; + } + + /** + * Await all in-flight async handlers. Used for graceful shutdown. + */ + async drain(): Promise { + const pending = this.pendingAsync.splice(0); + await Promise.allSettled(pending); + } + + /** + * Check if any handlers are registered for a given hook. + */ + hasHandlers(hookName: string): boolean { + const list = this.entries.get(hookName); + return list !== undefined && list.length > 0; + } +} diff --git a/src/lib/hooks-matchers.ts b/src/lib/hooks-matchers.ts new file mode 100644 index 0000000..d66090e --- /dev/null +++ b/src/lib/hooks-matchers.ts @@ -0,0 +1,25 @@ +import type { ToolMatcher } from './hooks-types.js'; + +/** + * Evaluate a ToolMatcher against a tool name. + * + * - `undefined` -> wildcard, matches all tools + * - `string` -> exact match + * - `RegExp` -> `.test(toolName)` + * - `function` -> arbitrary predicate + */ +export function matchesTool( + matcher: ToolMatcher | undefined, + toolName: string, +): boolean { + if (matcher === undefined) { + return true; + } + if (typeof matcher === 'string') { + return matcher === toolName; + } + if (matcher instanceof RegExp) { + return matcher.test(toolName); + } + return matcher(toolName); +} diff --git a/src/lib/hooks-resolve.ts b/src/lib/hooks-resolve.ts new file mode 100644 index 0000000..d694540 --- /dev/null +++ b/src/lib/hooks-resolve.ts @@ -0,0 +1,31 @@ +import type { HookEntry, InlineHookConfig } from './hooks-types.js'; +import { HooksManager } from './hooks-manager.js'; + +/** + * Normalize a hooks option into a HooksManager instance. + * + * - `undefined` -> `undefined` (no hooks) + * - `HooksManager` -> passthrough + * - Plain object (InlineHookConfig) -> construct HooksManager, register all entries + */ +export function resolveHooks( + hooks: InlineHookConfig | HooksManager | undefined, +): HooksManager | undefined { + if (!hooks) { + return undefined; + } + + if (hooks instanceof HooksManager) { + return hooks; + } + + // Inline config -> HooksManager + const manager = new HooksManager(); + for (const [hookName, entries] of Object.entries(hooks)) { + if (!entries || !Array.isArray(entries)) continue; + for (const entry of entries) { + manager.registerEntry(hookName, entry as HookEntry); + } + } + return manager; +} diff --git a/src/lib/hooks-schemas.ts b/src/lib/hooks-schemas.ts new file mode 100644 index 0000000..e6c9ce4 --- /dev/null +++ b/src/lib/hooks-schemas.ts @@ -0,0 +1,97 @@ +import * as z4 from 'zod/v4'; +import type { HookDefinition } from './hooks-types.js'; + +//#region Payload Schemas + +export const PreToolUsePayloadSchema = z4.object({ + toolName: z4.string(), + toolInput: z4.record(z4.string(), z4.unknown()), + sessionId: z4.string(), +}); + +export const PostToolUsePayloadSchema = z4.object({ + toolName: z4.string(), + toolInput: z4.record(z4.string(), z4.unknown()), + toolOutput: z4.unknown(), + durationMs: z4.number(), + sessionId: z4.string(), +}); + +export const PostToolUseFailurePayloadSchema = z4.object({ + toolName: z4.string(), + toolInput: z4.record(z4.string(), z4.unknown()), + error: z4.unknown(), + sessionId: z4.string(), +}); + +export const StopPayloadSchema = z4.object({ + reason: z4.enum(['end_turn', 'max_tokens', 'stop_sequence']), + sessionId: z4.string(), +}); + +export const PermissionRequestPayloadSchema = z4.object({ + toolName: z4.string(), + toolInput: z4.record(z4.string(), z4.unknown()), + riskLevel: z4.enum(['low', 'medium', 'high']), + sessionId: z4.string(), +}); + +export const UserPromptSubmitPayloadSchema = z4.object({ + prompt: z4.string(), + sessionId: z4.string(), +}); + +export const SessionStartPayloadSchema = z4.object({ + sessionId: z4.string(), + config: z4.record(z4.string(), z4.unknown()).optional(), +}); + +export const SessionEndPayloadSchema = z4.object({ + sessionId: z4.string(), + reason: z4.enum(['user', 'error', 'max_turns', 'complete']), +}); + +//#endregion + +//#region Result Schemas + +export const PreToolUseResultSchema = z4.object({ + mutatedInput: z4.record(z4.string(), z4.unknown()).optional(), + block: z4.union([z4.boolean(), z4.string()]).optional(), +}); + +export const StopResultSchema = z4.object({ + forceResume: z4.boolean().optional(), + appendPrompt: z4.string().optional(), +}); + +export const PermissionRequestResultSchema = z4.object({ + decision: z4.enum(['allow', 'deny', 'ask_user']), + reason: z4.string().optional(), +}); + +export const UserPromptSubmitResultSchema = z4.object({ + mutatedPrompt: z4.string().optional(), + reject: z4.union([z4.boolean(), z4.string()]).optional(), +}); + +const VoidResultSchema = z4.void(); + +//#endregion + +//#region Built-in Hook Registry + +export const BUILT_IN_HOOKS: Record = { + PreToolUse: { payload: PreToolUsePayloadSchema, result: PreToolUseResultSchema }, + PostToolUse: { payload: PostToolUsePayloadSchema, result: VoidResultSchema }, + PostToolUseFailure: { payload: PostToolUseFailurePayloadSchema, result: VoidResultSchema }, + UserPromptSubmit: { payload: UserPromptSubmitPayloadSchema, result: UserPromptSubmitResultSchema }, + Stop: { payload: StopPayloadSchema, result: StopResultSchema }, + PermissionRequest: { payload: PermissionRequestPayloadSchema, result: PermissionRequestResultSchema }, + SessionStart: { payload: SessionStartPayloadSchema, result: VoidResultSchema }, + SessionEnd: { payload: SessionEndPayloadSchema, result: VoidResultSchema }, +}; + +export const BUILT_IN_HOOK_NAMES = new Set(Object.keys(BUILT_IN_HOOKS)); + +//#endregion diff --git a/src/lib/hooks-types.ts b/src/lib/hooks-types.ts new file mode 100644 index 0000000..57d7c2c --- /dev/null +++ b/src/lib/hooks-types.ts @@ -0,0 +1,255 @@ +import type { $ZodType } from 'zod/v4/core'; + +//#region Hook Names + +export const HookName = { + PreToolUse: 'PreToolUse', + PostToolUse: 'PostToolUse', + PostToolUseFailure: 'PostToolUseFailure', + UserPromptSubmit: 'UserPromptSubmit', + Stop: 'Stop', + PermissionRequest: 'PermissionRequest', + SessionStart: 'SessionStart', + SessionEnd: 'SessionEnd', +} as const; + +export type HookName = (typeof HookName)[keyof typeof HookName]; + +//#endregion + +//#region Core Types + +/** + * A hook definition is a pair of Zod schemas: one for the payload and one for the result. + */ +export interface HookDefinition { + readonly payload: $ZodType; + readonly result: $ZodType; +} + +/** + * A registry maps hook names to their definitions. + */ +export type HookRegistry = Record; + +/** + * Context provided to every hook handler invocation. + */ +export interface HookContext { + readonly signal: AbortSignal; + readonly hookName: string; + readonly sessionId: string; +} + +/** + * Returned by a handler to signal fire-and-forget mode. + * The agent proceeds immediately without waiting for completion. + */ +export interface AsyncOutput { + readonly async: true; + /** Milliseconds before the async handler is aborted. Default: 30000 */ + readonly asyncTimeout?: number; +} + +const DEFAULT_ASYNC_TIMEOUT = 30_000; + +export { DEFAULT_ASYNC_TIMEOUT }; + +/** + * A handler may return a sync result, an async signal, or void. + */ +export type HookReturn = R | AsyncOutput | void; + +/** + * A hook handler receives the validated payload and context. + */ +export type HookHandler = ( + payload: P, + context: HookContext, +) => HookReturn | Promise>; + +/** + * Matcher for tool-scoped hooks. Filters handler invocation by tool name. + */ +export type ToolMatcher = string | RegExp | ((toolName: string) => boolean); + +/** + * An entry registered for a specific hook. + */ +export interface HookEntry { + readonly handler: HookHandler; + readonly matcher?: ToolMatcher; + readonly filter?: (payload: P) => boolean; +} + +/** + * Result of emitting a hook through the handler chain. + */ +export interface EmitResult { + /** Sync results from handlers that returned a hook-specific value. */ + readonly results: R[]; + /** Handles to detached async handler work. */ + readonly pending: Promise[]; + /** The payload after all mutation piping has been applied. */ + readonly finalPayload: P; + /** True if any handler triggered a block/reject short-circuit. */ + readonly blocked: boolean; +} + +//#endregion + +//#region Options + +export interface HooksManagerOptions { + /** + * If true, a throwing handler stops the chain and propagates the error. + * If false (default), the error is logged as a warning and execution continues. + */ + readonly throwOnHandlerError?: boolean; +} + +//#endregion + +//#region Payload Types + +export interface PreToolUsePayload { + readonly toolName: string; + readonly toolInput: Record; + readonly sessionId: string; +} + +export interface PostToolUsePayload { + readonly toolName: string; + readonly toolInput: Record; + readonly toolOutput: unknown; + readonly durationMs: number; + readonly sessionId: string; +} + +export interface PostToolUseFailurePayload { + readonly toolName: string; + readonly toolInput: Record; + readonly error: unknown; + readonly sessionId: string; +} + +export interface StopPayload { + readonly reason: 'end_turn' | 'max_tokens' | 'stop_sequence'; + readonly sessionId: string; +} + +export interface PermissionRequestPayload { + readonly toolName: string; + readonly toolInput: Record; + readonly riskLevel: 'low' | 'medium' | 'high'; + readonly sessionId: string; +} + +export interface UserPromptSubmitPayload { + readonly prompt: string; + readonly sessionId: string; +} + +export interface SessionStartPayload { + readonly sessionId: string; + readonly config: Record | undefined; +} + +export interface SessionEndPayload { + readonly sessionId: string; + readonly reason: 'user' | 'error' | 'max_turns' | 'complete'; +} + +//#endregion + +//#region Result Types + +export interface PreToolUseResult { + readonly mutatedInput?: Record; + readonly block?: boolean | string; +} + +export interface StopResult { + readonly forceResume?: boolean; + readonly appendPrompt?: string; +} + +export interface PermissionRequestResult { + readonly decision: 'allow' | 'deny' | 'ask_user'; + readonly reason?: string; +} + +export interface UserPromptSubmitResult { + readonly mutatedPrompt?: string; + readonly reject?: boolean | string; +} + +//#endregion + +//#region Built-in Hook Registry (type-level) + +export interface BuiltInHookDefinitions { + PreToolUse: { payload: PreToolUsePayload; result: PreToolUseResult }; + PostToolUse: { payload: PostToolUsePayload; result: void }; + PostToolUseFailure: { payload: PostToolUseFailurePayload; result: void }; + UserPromptSubmit: { payload: UserPromptSubmitPayload; result: UserPromptSubmitResult }; + Stop: { payload: StopPayload; result: StopResult }; + PermissionRequest: { payload: PermissionRequestPayload; result: PermissionRequestResult }; + SessionStart: { payload: SessionStartPayload; result: void }; + SessionEnd: { payload: SessionEndPayload; result: void }; +} + +//#endregion + +//#region Inline Config + +/** + * Inline hook config passed directly to callModel. + * Only supports built-in hooks. For custom hooks, use a HooksManager instance. + */ +export type InlineHookConfig = { + [K in keyof BuiltInHookDefinitions]?: HookEntry< + BuiltInHookDefinitions[K]['payload'], + BuiltInHookDefinitions[K]['result'] extends void ? void : BuiltInHookDefinitions[K]['result'] + >[]; +}; + +//#endregion + +//#region Helper Types + +/** + * Checks if a value is an AsyncOutput signal. + */ +export function isAsyncOutput(value: unknown): value is AsyncOutput { + return ( + typeof value === 'object' && + value !== null && + 'async' in value && + (value as { async: unknown }).async === true + ); +} + +/** + * Mutation field mapping for payload piping. + * Maps result field names to the payload field they replace. + */ +export const MUTATION_FIELD_MAP: Record = { + mutatedInput: 'toolInput', + mutatedPrompt: 'prompt', +}; + +/** + * Hook names that support short-circuit blocking. + */ +export const BLOCK_HOOKS = new Set(['PreToolUse', 'UserPromptSubmit']); + +/** + * Result fields that trigger short-circuit. + */ +export const BLOCK_FIELDS: Record = { + PreToolUse: 'block', + UserPromptSubmit: 'reject', +}; + +//#endregion diff --git a/src/lib/model-result.ts b/src/lib/model-result.ts index 7c4894c..59e32c6 100644 --- a/src/lib/model-result.ts +++ b/src/lib/model-result.ts @@ -73,6 +73,7 @@ import { type ContextInput, resolveContext, ToolContextStore } from './tool-cont import { ToolEventBroadcaster } from './tool-event-broadcaster.js'; import { executeTool } from './tool-executor.js'; import { hasExecuteFunction, isToolCallOutputEvent } from './tool-types.js'; +import type { HooksManager } from './hooks-manager.js'; /** * Default maximum number of tool execution steps if no stopWhen is specified. @@ -134,6 +135,8 @@ export interface GetResponseOptions< onTurnStart?: (context: TurnContext) => void | Promise; /** Callback invoked at the end of each tool execution turn */ onTurnEnd?: (context: TurnContext, response: models.OpenResponsesResult) => void | Promise; + /** Hook system for lifecycle events */ + hooks?: HooksManager; } /** @@ -211,8 +214,12 @@ export class ModelResult< // Context store for typed tool context (persists across turns) private contextStore: ToolContextStore | null = null; + // Hook system + private readonly hooksManager: HooksManager | undefined; + constructor(options: GetResponseOptions) { this.options = options; + this.hooksManager = options.hooks; // Runtime validation: approval decisions require state const hasApprovalDecisions = @@ -743,18 +750,75 @@ export class ModelResult< } : undefined; + // Emit PreToolUse hook -- can block or mutate input + let effectiveToolCall = toolCall; + if (this.hooksManager) { + const preResult = await this.hooksManager.emit('PreToolUse', { + toolName: toolCall.name, + toolInput: (toolCall.arguments ?? {}) as Record, + sessionId: this.currentState?.id ?? '', + }, { toolName: toolCall.name }); + + if (preResult.blocked) { + const blockResult = preResult.results.find( + (r) => r && typeof r === 'object' && 'block' in r && r.block, + ); + const reason = blockResult && typeof blockResult.block === 'string' + ? blockResult.block + : 'Blocked by PreToolUse hook'; + return { + type: 'hook_blocked' as const, + toolCall, + output: { + type: 'function_call_output' as const, + id: `output_${toolCall.id}`, + callId: toolCall.id, + output: JSON.stringify({ error: reason }), + }, + }; + } + + // Apply mutated input if present + const finalInput = preResult.finalPayload.toolInput; + if (finalInput !== toolCall.arguments) { + effectiveToolCall = { ...toolCall, arguments: finalInput }; + } + } + + const startTime = Date.now(); const result = await executeTool( tool, - toolCall, + effectiveToolCall, turnContext, onPreliminaryResult, this.contextStore ?? undefined, this.options.sharedContextSchema, ); + const durationMs = Date.now() - startTime; + + // Emit PostToolUse or PostToolUseFailure + if (this.hooksManager) { + if (result.error) { + await this.hooksManager.emit('PostToolUseFailure', { + toolName: effectiveToolCall.name, + toolInput: (effectiveToolCall.arguments ?? {}) as Record, + error: result.error, + sessionId: this.currentState?.id ?? '', + }, { toolName: effectiveToolCall.name }); + } else { + await this.hooksManager.emit('PostToolUse', { + toolName: effectiveToolCall.name, + toolInput: (effectiveToolCall.arguments ?? {}) as Record, + toolOutput: result.result, + durationMs, + sessionId: this.currentState?.id ?? '', + }, { toolName: effectiveToolCall.name }); + } + } return { type: 'execution' as const, - toolCall, + toolCall: effectiveToolCall, tool, result, preliminaryResultsForCall, @@ -773,6 +837,16 @@ export class ModelResult< const errorMessage = settled.reason instanceof Error ? settled.reason.message : String(settled.reason); + // Emit PostToolUseFailure for rejected promises + if (this.hooksManager) { + await this.hooksManager.emit('PostToolUseFailure', { + toolName: originalToolCall.name, + toolInput: (originalToolCall.arguments ?? {}) as Record, + error: settled.reason, + sessionId: this.currentState?.id ?? '', + }, { toolName: originalToolCall.name }); + } + this.broadcastToolResult(originalToolCall.id, { error: errorMessage, } as InferToolOutputsUnion); @@ -797,7 +871,7 @@ export class ModelResult< const value = settled.value; if (!value) continue; - if (value.type === 'parse_error') { + if (value.type === 'parse_error' || value.type === 'hook_blocked') { toolResults.push(value.output); this.turnBroadcaster?.push({ type: 'tool.call_output' as const, @@ -1144,6 +1218,36 @@ export class ModelResult< } } + // Emit SessionStart hook + if (this.hooksManager) { + const sessionId = this.currentState?.id ?? ''; + this.hooksManager.setSessionId(sessionId); + await this.hooksManager.emit('SessionStart', { + sessionId, + config: undefined, + }); + } + + // Emit UserPromptSubmit hook (can mutate prompt or reject) + if (this.hooksManager && typeof baseRequest.input === 'string') { + const submitResult = await this.hooksManager.emit('UserPromptSubmit', { + prompt: baseRequest.input, + sessionId: this.currentState?.id ?? '', + }); + if (submitResult.blocked) { + const rejectResult = submitResult.results.find( + (r) => r && typeof r === 'object' && 'reject' in r && r.reject, + ); + const reason = rejectResult && typeof rejectResult.reject === 'string' + ? rejectResult.reject + : 'Prompt rejected by hook'; + throw new Error(reason); + } + if (submitResult.finalPayload.prompt !== baseRequest.input) { + baseRequest = { ...baseRequest, input: submitResult.finalPayload.prompt }; + } + } + // Store resolved request with stream mode this.resolvedRequest = { ...baseRequest, @@ -1402,6 +1506,18 @@ export class ModelResult< // Check stop conditions if (await this.shouldStopExecution()) { + // Emit Stop hook -- can force resume or inject prompt + if (this.hooksManager) { + const stopResult = await this.hooksManager.emit('Stop', { + reason: 'end_turn' as const, + sessionId: this.currentState?.id ?? '', + }); + const lastResult = stopResult.results.at(-1); + if (lastResult && typeof lastResult === 'object' && 'forceResume' in lastResult && lastResult.forceResume) { + // Don't break -- continue the loop + continue; + } + } break; } @@ -1461,6 +1577,15 @@ export class ModelResult< this.validateFinalResponse(currentResponse); this.finalResponse = currentResponse; await this.markStateComplete(); + + // Emit SessionEnd hook and drain async handlers + if (this.hooksManager) { + await this.hooksManager.emit('SessionEnd', { + sessionId: this.currentState?.id ?? '', + reason: 'complete' as const, + }); + await this.hooksManager.drain(); + } })(); return this.toolExecutionPromise; diff --git a/tests/unit/hooks-emit.test.ts b/tests/unit/hooks-emit.test.ts new file mode 100644 index 0000000..8c32b7f --- /dev/null +++ b/tests/unit/hooks-emit.test.ts @@ -0,0 +1,216 @@ +import { describe, it, expect, vi } from 'vitest'; +import { executeHandlerChain } from '../../src/lib/hooks-emit.js'; +import type { HookContext, HookEntry } from '../../src/lib/hooks-types.js'; + +function makeContext(hookName = 'TestHook'): HookContext { + return { + signal: new AbortController().signal, + hookName, + sessionId: 'test-session', + }; +} + +describe('executeHandlerChain', () => { + it('executes handlers in registration order', async () => { + const order: number[] = []; + const entries: HookEntry<{ value: number }, void>[] = [ + { handler: () => { order.push(1); } }, + { handler: () => { order.push(2); } }, + { handler: () => { order.push(3); } }, + ]; + + await executeHandlerChain(entries, { value: 0 }, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + expect(order).toEqual([1, 2, 3]); + }); + + it('skips handlers when matcher does not match', async () => { + const called = vi.fn(); + const entries: HookEntry<{ toolName: string }, void>[] = [ + { matcher: 'Bash', handler: called }, + ]; + + await executeHandlerChain( + entries, + { toolName: 'ReadFile' }, + makeContext(), + { hookName: 'Test', throwOnHandlerError: false, toolName: 'ReadFile' }, + ); + + expect(called).not.toHaveBeenCalled(); + }); + + it('invokes handler when matcher matches', async () => { + const called = vi.fn(); + const entries: HookEntry<{ toolName: string }, void>[] = [ + { matcher: 'Bash', handler: called }, + ]; + + await executeHandlerChain( + entries, + { toolName: 'Bash' }, + makeContext(), + { hookName: 'Test', throwOnHandlerError: false, toolName: 'Bash' }, + ); + + expect(called).toHaveBeenCalledOnce(); + }); + + it('skips handlers when filter returns false', async () => { + const called = vi.fn(); + const entries: HookEntry<{ value: number }, void>[] = [ + { filter: (p) => p.value > 5, handler: called }, + ]; + + await executeHandlerChain(entries, { value: 3 }, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + expect(called).not.toHaveBeenCalled(); + }); + + it('collects sync results', async () => { + const entries: HookEntry<{ v: number }, { doubled: number }>[] = [ + { handler: (p) => ({ doubled: p.v * 2 }) }, + { handler: (p) => ({ doubled: p.v * 3 }) }, + ]; + + const result = await executeHandlerChain(entries, { v: 5 }, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + expect(result.results).toEqual([{ doubled: 10 }, { doubled: 15 }]); + }); + + it('applies mutation piping for mutatedInput', async () => { + const entries: HookEntry< + { toolInput: Record }, + { mutatedInput: Record } + >[] = [ + { + handler: ({ toolInput }) => ({ + mutatedInput: { ...toolInput, added: true }, + }), + }, + { + handler: ({ toolInput }) => ({ + mutatedInput: { ...toolInput, second: true }, + }), + }, + ]; + + const result = await executeHandlerChain( + entries, + { toolInput: { original: true } }, + makeContext(), + { hookName: 'PreToolUse', throwOnHandlerError: false }, + ); + + expect(result.finalPayload.toolInput).toEqual({ + original: true, + added: true, + second: true, + }); + }); + + it('short-circuits on block for PreToolUse', async () => { + const secondHandler = vi.fn(); + const entries: HookEntry< + { toolInput: Record }, + { block?: boolean | string } + >[] = [ + { handler: () => ({ block: 'dangerous' }) }, + { handler: secondHandler }, + ]; + + const result = await executeHandlerChain( + entries, + { toolInput: {} }, + makeContext(), + { hookName: 'PreToolUse', throwOnHandlerError: false }, + ); + + expect(result.blocked).toBe(true); + expect(secondHandler).not.toHaveBeenCalled(); + }); + + it('short-circuits on reject for UserPromptSubmit', async () => { + const secondHandler = vi.fn(); + const entries: HookEntry< + { prompt: string }, + { reject?: boolean | string } + >[] = [ + { handler: () => ({ reject: 'not allowed' }) }, + { handler: secondHandler }, + ]; + + const result = await executeHandlerChain( + entries, + { prompt: 'test' }, + makeContext(), + { hookName: 'UserPromptSubmit', throwOnHandlerError: false }, + ); + + expect(result.blocked).toBe(true); + expect(secondHandler).not.toHaveBeenCalled(); + }); + + it('handles async output by tracking pending promises', async () => { + const entries: HookEntry[] = [ + { handler: () => ({ async: true as const }) }, + ]; + + const result = await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + expect(result.pending.length).toBe(1); + expect(result.results.length).toBe(0); + }); + + it('logs and continues on handler error in default mode', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + const secondHandler = vi.fn(); + + const entries: HookEntry[] = [ + { + handler: () => { + throw new Error('boom'); + }, + }, + { handler: secondHandler }, + ]; + + await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + expect(warnSpy).toHaveBeenCalled(); + expect(secondHandler).toHaveBeenCalled(); + warnSpy.mockRestore(); + }); + + it('throws in strict mode on handler error', async () => { + const entries: HookEntry[] = [ + { + handler: () => { + throw new Error('boom'); + }, + }, + ]; + + await expect( + executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: true, + }), + ).rejects.toThrow('boom'); + }); +}); diff --git a/tests/unit/hooks-manager.test.ts b/tests/unit/hooks-manager.test.ts new file mode 100644 index 0000000..4d657ab --- /dev/null +++ b/tests/unit/hooks-manager.test.ts @@ -0,0 +1,211 @@ +import { describe, it, expect, vi } from 'vitest'; +import * as z4 from 'zod/v4'; +import { HooksManager } from '../../src/lib/hooks-manager.js'; + +describe('HooksManager', () => { + describe('constructor', () => { + it('creates with no arguments', () => { + const manager = new HooksManager(); + expect(manager).toBeInstanceOf(HooksManager); + }); + + it('throws on custom hook name collision with built-in', () => { + expect(() => + new HooksManager({ + PreToolUse: { + payload: z4.object({ custom: z4.string() }), + result: z4.void(), + }, + }), + ).toThrow('collides with a built-in hook'); + }); + + it('accepts custom hooks with unique names', () => { + const manager = new HooksManager({ + MyCustomHook: { + payload: z4.object({ data: z4.string() }), + result: z4.void(), + }, + }); + expect(manager).toBeInstanceOf(HooksManager); + }); + }); + + describe('on / off / removeAll', () => { + it('registers a handler and returns unsubscribe function', async () => { + const manager = new HooksManager(); + const handler = vi.fn(); + + const unsub = manager.on('PostToolUse', { handler }); + expect(manager.hasHandlers('PostToolUse')).toBe(true); + + unsub(); + expect(manager.hasHandlers('PostToolUse')).toBe(false); + }); + + it('removes a handler by reference with off()', async () => { + const manager = new HooksManager(); + const handler = vi.fn(); + + manager.on('PostToolUse', { handler }); + expect(manager.hasHandlers('PostToolUse')).toBe(true); + + const removed = manager.off('PostToolUse', handler); + expect(removed).toBe(true); + expect(manager.hasHandlers('PostToolUse')).toBe(false); + }); + + it('returns false when off() cannot find the handler', () => { + const manager = new HooksManager(); + const removed = manager.off('PostToolUse', vi.fn()); + expect(removed).toBe(false); + }); + + it('removes all handlers for a specific hook', () => { + const manager = new HooksManager(); + manager.on('PostToolUse', { handler: vi.fn() }); + manager.on('PostToolUse', { handler: vi.fn() }); + manager.on('PreToolUse', { handler: vi.fn() }); + + manager.removeAll('PostToolUse'); + expect(manager.hasHandlers('PostToolUse')).toBe(false); + expect(manager.hasHandlers('PreToolUse')).toBe(true); + }); + + it('removes all handlers for all hooks', () => { + const manager = new HooksManager(); + manager.on('PostToolUse', { handler: vi.fn() }); + manager.on('PreToolUse', { handler: vi.fn() }); + + manager.removeAll(); + expect(manager.hasHandlers('PostToolUse')).toBe(false); + expect(manager.hasHandlers('PreToolUse')).toBe(false); + }); + }); + + describe('emit', () => { + it('invokes registered handlers with payload', async () => { + const manager = new HooksManager(); + const handler = vi.fn(); + + manager.on('PostToolUse', { handler }); + await manager.emit('PostToolUse', { + toolName: 'Bash', + toolInput: {}, + toolOutput: 'ok', + durationMs: 100, + sessionId: 'test', + }); + + expect(handler).toHaveBeenCalledOnce(); + expect(handler).toHaveBeenCalledWith( + expect.objectContaining({ toolName: 'Bash' }), + expect.objectContaining({ hookName: 'PostToolUse' }), + ); + }); + + it('returns empty results when no handlers registered', async () => { + const manager = new HooksManager(); + const result = await manager.emit('PostToolUse', { + toolName: 'Bash', + toolInput: {}, + toolOutput: 'ok', + durationMs: 100, + sessionId: 'test', + }); + + expect(result.results).toEqual([]); + expect(result.blocked).toBe(false); + }); + + it('passes toolName to emit context for matcher filtering', async () => { + const manager = new HooksManager(); + const bashHandler = vi.fn(); + const readHandler = vi.fn(); + + manager.on('PreToolUse', { matcher: 'Bash', handler: bashHandler }); + manager.on('PreToolUse', { matcher: 'ReadFile', handler: readHandler }); + + await manager.emit( + 'PreToolUse', + { toolName: 'Bash', toolInput: {}, sessionId: 'test' }, + { toolName: 'Bash' }, + ); + + expect(bashHandler).toHaveBeenCalledOnce(); + expect(readHandler).not.toHaveBeenCalled(); + }); + + it('supports custom hooks', async () => { + const manager = new HooksManager({ + AgentThinking: { + payload: z4.object({ thought: z4.string() }), + result: z4.void(), + }, + }); + + const handler = vi.fn(); + manager.on('AgentThinking', { handler }); + + await manager.emit('AgentThinking', { thought: 'hmm' }); + + expect(handler).toHaveBeenCalledWith( + expect.objectContaining({ thought: 'hmm' }), + expect.any(Object), + ); + }); + }); + + describe('drain', () => { + it('resolves when no pending async handlers', async () => { + const manager = new HooksManager(); + await expect(manager.drain()).resolves.toBeUndefined(); + }); + + it('waits for pending async handlers', async () => { + const manager = new HooksManager(); + manager.on('PostToolUse', { + handler: () => { + // Return async output signal + return { async: true as const }; + }, + }); + + await manager.emit('PostToolUse', { + toolName: 'Bash', + toolInput: {}, + toolOutput: 'ok', + durationMs: 100, + sessionId: 'test', + }); + + await manager.drain(); + // Drain completes (async tracking works even if side effects are independent) + expect(true).toBe(true); + }); + }); + + describe('setSessionId', () => { + it('sets session ID used in hook context', async () => { + const manager = new HooksManager(); + manager.setSessionId('my-session'); + + let receivedSessionId = ''; + manager.on('PostToolUse', { + handler: (_payload, context) => { + receivedSessionId = context.sessionId; + }, + }); + + await manager.emit('PostToolUse', { + toolName: 'Bash', + toolInput: {}, + toolOutput: 'ok', + durationMs: 100, + sessionId: 'test', + }); + + expect(receivedSessionId).toBe('my-session'); + }); + }); +}); diff --git a/tests/unit/hooks-matchers.test.ts b/tests/unit/hooks-matchers.test.ts new file mode 100644 index 0000000..eb80060 --- /dev/null +++ b/tests/unit/hooks-matchers.test.ts @@ -0,0 +1,27 @@ +import { describe, it, expect } from 'vitest'; +import { matchesTool } from '../../src/lib/hooks-matchers.js'; + +describe('matchesTool', () => { + it('matches all tools when matcher is undefined', () => { + expect(matchesTool(undefined, 'Bash')).toBe(true); + expect(matchesTool(undefined, 'ReadFile')).toBe(true); + }); + + it('matches exact string', () => { + expect(matchesTool('Bash', 'Bash')).toBe(true); + expect(matchesTool('Bash', 'ReadFile')).toBe(false); + expect(matchesTool('Bash', 'bash')).toBe(false); + }); + + it('matches RegExp', () => { + expect(matchesTool(/^(Read|Write)File$/, 'ReadFile')).toBe(true); + expect(matchesTool(/^(Read|Write)File$/, 'WriteFile')).toBe(true); + expect(matchesTool(/^(Read|Write)File$/, 'DeleteFile')).toBe(false); + }); + + it('matches function predicate', () => { + const matcher = (name: string) => name.startsWith('File'); + expect(matchesTool(matcher, 'FileRead')).toBe(true); + expect(matchesTool(matcher, 'Bash')).toBe(false); + }); +}); diff --git a/tests/unit/hooks-resolve.test.ts b/tests/unit/hooks-resolve.test.ts new file mode 100644 index 0000000..e932ccd --- /dev/null +++ b/tests/unit/hooks-resolve.test.ts @@ -0,0 +1,56 @@ +import { describe, it, expect, vi } from 'vitest'; +import { resolveHooks } from '../../src/lib/hooks-resolve.js'; +import { HooksManager } from '../../src/lib/hooks-manager.js'; +import type { InlineHookConfig } from '../../src/lib/hooks-types.js'; + +describe('resolveHooks', () => { + it('returns undefined for undefined input', () => { + expect(resolveHooks(undefined)).toBeUndefined(); + }); + + it('passes through HooksManager instances', () => { + const manager = new HooksManager(); + expect(resolveHooks(manager)).toBe(manager); + }); + + it('converts inline config to HooksManager', () => { + const config: InlineHookConfig = { + PreToolUse: [ + { + matcher: 'Bash', + handler: vi.fn(), + }, + ], + }; + + const result = resolveHooks(config); + expect(result).toBeInstanceOf(HooksManager); + expect(result!.hasHandlers('PreToolUse')).toBe(true); + }); + + it('registers multiple entries for a single hook', () => { + const config: InlineHookConfig = { + PostToolUse: [ + { handler: vi.fn() }, + { handler: vi.fn() }, + ], + }; + + const result = resolveHooks(config); + expect(result).toBeInstanceOf(HooksManager); + expect(result!.hasHandlers('PostToolUse')).toBe(true); + }); + + it('registers entries for multiple hooks', () => { + const config: InlineHookConfig = { + PreToolUse: [{ handler: vi.fn() }], + PostToolUse: [{ handler: vi.fn() }], + Stop: [{ handler: vi.fn() }], + }; + + const result = resolveHooks(config); + expect(result!.hasHandlers('PreToolUse')).toBe(true); + expect(result!.hasHandlers('PostToolUse')).toBe(true); + expect(result!.hasHandlers('Stop')).toBe(true); + }); +}); From c4c525dc4cb68ece27a287c84143655b5bdb31b5 Mon Sep 17 00:00:00 2001 From: Matt Apperson Date: Fri, 3 Apr 2026 11:49:51 -0400 Subject: [PATCH 2/7] test: add adversarial unit tests for hooks system 81 edge-case tests covering handler chain execution, manager lifecycle, tool matchers, resolver normalization, and async output detection. Surfaces real issues: filter throws bypass error handling, global RegExp stateful .test(), and function matcher lacking boolean coercion. --- tests/unit/hooks-emit-adversarial.test.ts | 415 ++++++++++++++++++ tests/unit/hooks-manager-adversarial.test.ts | 317 +++++++++++++ tests/unit/hooks-matchers-adversarial.test.ts | 87 ++++ tests/unit/hooks-resolve-adversarial.test.ts | 122 +++++ tests/unit/hooks-types-adversarial.test.ts | 113 +++++ 5 files changed, 1054 insertions(+) create mode 100644 tests/unit/hooks-emit-adversarial.test.ts create mode 100644 tests/unit/hooks-manager-adversarial.test.ts create mode 100644 tests/unit/hooks-matchers-adversarial.test.ts create mode 100644 tests/unit/hooks-resolve-adversarial.test.ts create mode 100644 tests/unit/hooks-types-adversarial.test.ts diff --git a/tests/unit/hooks-emit-adversarial.test.ts b/tests/unit/hooks-emit-adversarial.test.ts new file mode 100644 index 0000000..e5a886d --- /dev/null +++ b/tests/unit/hooks-emit-adversarial.test.ts @@ -0,0 +1,415 @@ +import { describe, it, expect, vi } from 'vitest'; +import { executeHandlerChain } from '../../src/lib/hooks-emit.js'; +import type { HookContext, HookEntry } from '../../src/lib/hooks-types.js'; + +function makeContext(hookName = 'TestHook'): HookContext { + return { + signal: new AbortController().signal, + hookName, + sessionId: 'test-session', + }; +} + +describe('executeHandlerChain (adversarial)', () => { + describe('empty and degenerate inputs', () => { + it('returns clean result for empty entries array', async () => { + const result = await executeHandlerChain([], { v: 1 }, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + expect(result.results).toEqual([]); + expect(result.pending).toEqual([]); + expect(result.blocked).toBe(false); + expect(result.finalPayload).toEqual({ v: 1 }); + }); + + it('skips sparse array holes (undefined entries)', async () => { + // eslint-disable-next-line no-sparse-arrays + const entries = [, , { handler: vi.fn() }] as unknown as HookEntry< + unknown, + void + >[]; + + await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + expect(entries[2]!.handler).toHaveBeenCalledOnce(); + }); + }); + + describe('handler return value edge cases', () => { + it('treats null return as void (no result collected)', async () => { + const entries: HookEntry[] = [ + { handler: () => null as unknown as { v: number } }, + ]; + + const result = await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + expect(result.results).toEqual([]); + }); + + it('collects non-object primitive results without mutation crash', async () => { + const entries: HookEntry<{ toolInput: Record }, number>[] = + [{ handler: () => 42 }, { handler: () => 99 }]; + + const result = await executeHandlerChain( + entries, + { toolInput: { a: 1 } }, + makeContext(), + { hookName: 'PreToolUse', throwOnHandlerError: false }, + ); + + // applyMutations should be a no-op for primitives + expect(result.results).toEqual([42, 99]); + expect(result.finalPayload.toolInput).toEqual({ a: 1 }); + }); + + it('does not trigger block on non-blocking hooks even if result has block field', async () => { + const entries: HookEntry[] = [ + { handler: () => ({ block: true }) }, + ]; + + const result = await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'PostToolUse', // Not a blocking hook + throwOnHandlerError: false, + }); + + expect(result.blocked).toBe(false); + expect(result.results).toEqual([{ block: true }]); + }); + + it('block: false does not short-circuit PreToolUse', async () => { + const second = vi.fn(() => ({ block: false })); + const entries: HookEntry< + { toolInput: Record }, + { block: boolean } + >[] = [{ handler: () => ({ block: false }) }, { handler: second }]; + + const result = await executeHandlerChain( + entries, + { toolInput: {} }, + makeContext(), + { hookName: 'PreToolUse', throwOnHandlerError: false }, + ); + + expect(result.blocked).toBe(false); + expect(second).toHaveBeenCalled(); + }); + + it('block: 0 (falsy number) does not short-circuit', async () => { + const second = vi.fn(); + const entries: HookEntry< + { toolInput: Record }, + { block: number } + >[] = [ + { handler: () => ({ block: 0 }) }, + { handler: second }, + ]; + + const result = await executeHandlerChain( + entries, + { toolInput: {} }, + makeContext(), + { hookName: 'PreToolUse', throwOnHandlerError: false }, + ); + + // 0 is neither `true` nor a string, so should not block + expect(result.blocked).toBe(false); + expect(second).toHaveBeenCalled(); + }); + + it('block: "" (empty string) triggers short-circuit since typeof is string', async () => { + const second = vi.fn(); + const entries: HookEntry< + { toolInput: Record }, + { block: string } + >[] = [{ handler: () => ({ block: '' }) }, { handler: second }]; + + const result = await executeHandlerChain( + entries, + { toolInput: {} }, + makeContext(), + { hookName: 'PreToolUse', throwOnHandlerError: false }, + ); + + // isBlockTriggered checks typeof value === 'string', so empty string IS a string + expect(result.blocked).toBe(true); + expect(second).not.toHaveBeenCalled(); + }); + }); + + describe('mutation piping edge cases', () => { + it('mutatedInput: undefined does not overwrite existing toolInput', async () => { + const entries: HookEntry< + { toolInput: Record }, + { mutatedInput: undefined } + >[] = [{ handler: () => ({ mutatedInput: undefined }) }]; + + const result = await executeHandlerChain( + entries, + { toolInput: { original: true } }, + makeContext(), + { hookName: 'PreToolUse', throwOnHandlerError: false }, + ); + + expect(result.finalPayload.toolInput).toEqual({ original: true }); + }); + + it('mutatedPrompt pipes correctly through UserPromptSubmit', async () => { + const entries: HookEntry< + { prompt: string }, + { mutatedPrompt: string } + >[] = [ + { handler: () => ({ mutatedPrompt: 'rewritten-1' }) }, + { + handler: (p) => ({ + mutatedPrompt: `${p.prompt}+appended`, + }), + }, + ]; + + const result = await executeHandlerChain( + entries, + { prompt: 'original' }, + makeContext(), + { hookName: 'UserPromptSubmit', throwOnHandlerError: false }, + ); + + // First handler rewrites prompt to 'rewritten-1', second sees 'rewritten-1' as p.prompt + expect(result.finalPayload.prompt).toBe('rewritten-1+appended'); + }); + + it('result with __proto__ key does not cause prototype pollution', async () => { + const entries: HookEntry< + { toolInput: Record }, + Record + >[] = [ + { + handler: () => { + const obj = Object.create(null); + obj['__proto__'] = { polluted: true }; + return obj; + }, + }, + ]; + + const result = await executeHandlerChain( + entries, + { toolInput: {} }, + makeContext(), + { hookName: 'PreToolUse', throwOnHandlerError: false }, + ); + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + expect((({} as Record)['polluted'] as unknown)).toBeUndefined(); + expect(result.results.length).toBe(1); + }); + }); + + describe('filter edge cases', () => { + it('throwing filter is caught in non-strict mode', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + const entries: HookEntry[] = [ + { + filter: () => { + throw new Error('filter boom'); + }, + handler: vi.fn(), + }, + ]; + + // The filter throw happens inside the try block since filter is called + // before handler. Let's verify current behavior. + // Looking at the code: filter is called OUTSIDE the try block! + // This means a throwing filter will propagate. + await expect( + executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }), + ).rejects.toThrow('filter boom'); + + warnSpy.mockRestore(); + }); + + it('throwing filter in strict mode propagates error', async () => { + const entries: HookEntry[] = [ + { + filter: () => { + throw new Error('filter fail'); + }, + handler: vi.fn(), + }, + ]; + + await expect( + executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: true, + }), + ).rejects.toThrow('filter fail'); + }); + }); + + describe('async output edge cases', () => { + it('{ async: true } with block field is treated as async, not block', async () => { + const entries: HookEntry< + { toolInput: Record }, + { async: true; block: true } + >[] = [ + { + handler: () => ({ + async: true as const, + block: true, + }), + }, + ]; + + const result = await executeHandlerChain( + entries, + { toolInput: {} }, + makeContext(), + { hookName: 'PreToolUse', throwOnHandlerError: false }, + ); + + // isAsyncOutput check comes before block check, so this should be async + expect(result.pending.length).toBe(1); + expect(result.blocked).toBe(false); + expect(result.results).toEqual([]); + }); + + it('{ async: "true" } (string) is NOT treated as async', async () => { + const entries: HookEntry[] = [ + { handler: () => ({ async: 'true' }) }, + ]; + + const result = await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + // isAsyncOutput requires async === true (boolean), not "true" (string) + expect(result.pending.length).toBe(0); + expect(result.results).toEqual([{ async: 'true' }]); + }); + }); + + describe('error handling edge cases', () => { + it('handler returning a rejected promise is caught in non-strict mode', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + const secondHandler = vi.fn(); + + const entries: HookEntry[] = [ + { handler: () => Promise.reject(new Error('async boom')) }, + { handler: secondHandler }, + ]; + + await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + expect(warnSpy).toHaveBeenCalled(); + expect(secondHandler).toHaveBeenCalled(); + warnSpy.mockRestore(); + }); + + it('non-Error thrown values are caught in non-strict mode', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + + const entries: HookEntry[] = [ + { + handler: () => { + throw 'string error'; + }, + }, + ]; + + await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + expect(warnSpy).toHaveBeenCalled(); + warnSpy.mockRestore(); + }); + + it('non-Error thrown values propagate in strict mode', async () => { + const entries: HookEntry[] = [ + { + handler: () => { + throw 'string error'; + }, + }, + ]; + + await expect( + executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: true, + }), + ).rejects.toBe('string error'); + }); + }); + + describe('matcher + filter interaction', () => { + it('matcher is checked before filter — mismatched matcher skips filter call', async () => { + const filterFn = vi.fn(() => true); + const entries: HookEntry[] = [ + { matcher: 'Bash', filter: filterFn, handler: vi.fn() }, + ]; + + await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + toolName: 'ReadFile', + }); + + expect(filterFn).not.toHaveBeenCalled(); + }); + + it('matcher without toolName in options does NOT skip (matcher ignored)', async () => { + const handler = vi.fn(); + const entries: HookEntry[] = [ + { matcher: 'Bash', handler }, + ]; + + await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + // no toolName + }); + + // Code: matcher !== undefined && toolName !== undefined -> skip + // Since toolName is undefined, matcher check is skipped entirely + expect(handler).toHaveBeenCalled(); + }); + }); + + describe('large chain stress', () => { + it('handles 1000 handlers without stack overflow', async () => { + const entries: HookEntry<{ count: number }, void>[] = Array.from( + { length: 1000 }, + () => ({ handler: vi.fn() }), + ); + + const result = await executeHandlerChain( + entries, + { count: 0 }, + makeContext(), + { hookName: 'Test', throwOnHandlerError: false }, + ); + + expect(result.results).toEqual([]); + for (const entry of entries) { + expect(entry.handler).toHaveBeenCalledOnce(); + } + }); + }); +}); diff --git a/tests/unit/hooks-manager-adversarial.test.ts b/tests/unit/hooks-manager-adversarial.test.ts new file mode 100644 index 0000000..4976036 --- /dev/null +++ b/tests/unit/hooks-manager-adversarial.test.ts @@ -0,0 +1,317 @@ +import { describe, it, expect, vi } from 'vitest'; +import * as z4 from 'zod/v4'; +import { HooksManager } from '../../src/lib/hooks-manager.js'; + +describe('HooksManager (adversarial)', () => { + describe('unsubscribe edge cases', () => { + it('calling unsubscribe twice does not throw or corrupt state', () => { + const manager = new HooksManager(); + const unsub = manager.on('PostToolUse', { handler: vi.fn() }); + + unsub(); + unsub(); // second call should be harmless + + expect(manager.hasHandlers('PostToolUse')).toBe(false); + }); + + it('unsubscribe from one handler does not affect others', () => { + const manager = new HooksManager(); + const h1 = vi.fn(); + const h2 = vi.fn(); + + const unsub1 = manager.on('PostToolUse', { handler: h1 }); + manager.on('PostToolUse', { handler: h2 }); + + unsub1(); + expect(manager.hasHandlers('PostToolUse')).toBe(true); + }); + }); + + describe('off() edge cases', () => { + it('off with identical-looking but different function reference returns false', () => { + const manager = new HooksManager(); + const h1 = () => {}; + const h2 = () => {}; + + manager.on('PostToolUse', { handler: h1 }); + const removed = manager.off('PostToolUse', h2); + + expect(removed).toBe(false); + expect(manager.hasHandlers('PostToolUse')).toBe(true); + }); + + it('off for a hook name that was never registered returns false', () => { + const manager = new HooksManager(); + expect(manager.off('SessionEnd', vi.fn())).toBe(false); + }); + }); + + describe('re-entrant emit', () => { + it('handler that registers a new handler during emit does not affect current chain', async () => { + const manager = new HooksManager(); + const lateHandler = vi.fn(); + + manager.on('PostToolUse', { + handler: () => { + // Register a new handler mid-emit + manager.on('PostToolUse', { handler: lateHandler }); + }, + }); + + await manager.emit('PostToolUse', { + toolName: 'Bash', + toolInput: {}, + toolOutput: 'ok', + durationMs: 100, + sessionId: 'test', + }); + + // The late handler was added to the array during iteration. + // Since entries is a mutable array and we iterate by index, + // the late handler MAY be called (pushed to end of array, i < entries.length). + // This test documents the actual behavior. + // The handler pushes to the list which the for-loop iterates over, + // so it WILL be called since entries.length grows. + expect(lateHandler).toHaveBeenCalledOnce(); + }); + + it('handler that calls emit recursively does not deadlock', async () => { + const manager = new HooksManager(); + let depth = 0; + + manager.on('SessionStart', { + handler: async () => { + depth++; + if (depth < 3) { + await manager.emit('SessionStart', { + sessionId: `depth-${depth}`, + config: undefined, + }); + } + }, + }); + + await manager.emit('SessionStart', { + sessionId: 'root', + config: undefined, + }); + + expect(depth).toBe(3); + }); + }); + + describe('custom hook validation', () => { + it('throws for each built-in hook name used as custom', () => { + const builtInNames = [ + 'PreToolUse', + 'PostToolUse', + 'PostToolUseFailure', + 'UserPromptSubmit', + 'Stop', + 'PermissionRequest', + 'SessionStart', + 'SessionEnd', + ]; + + for (const name of builtInNames) { + expect( + () => + new HooksManager({ + [name]: { + payload: z4.object({}), + result: z4.void(), + }, + }), + ).toThrow('collides with a built-in hook'); + } + }); + + it('allows custom hook with empty string name', () => { + const manager = new HooksManager({ + '': { + payload: z4.object({}), + result: z4.void(), + }, + }); + expect(manager).toBeInstanceOf(HooksManager); + }); + }); + + describe('emit edge cases', () => { + it('emit for a hook with no registered handlers returns clean result', async () => { + const manager = new HooksManager(); + const result = await manager.emit('PreToolUse', { + toolName: 'Test', + toolInput: {}, + sessionId: 's1', + }); + + expect(result.results).toEqual([]); + expect(result.blocked).toBe(false); + expect(result.pending).toEqual([]); + }); + + it('emit for an unregistered custom hook name returns clean result', async () => { + const manager = new HooksManager(); + // Emitting a never-registered hook name + const result = await manager.emit( + 'NonExistentHook' as 'PostToolUse', + { + toolName: 'X', + toolInput: {}, + toolOutput: null, + durationMs: 0, + sessionId: 's', + }, + ); + + expect(result.results).toEqual([]); + }); + + it('handler that throws in strict mode propagates through emit', async () => { + const manager = new HooksManager(undefined, { + throwOnHandlerError: true, + }); + + manager.on('PostToolUse', { + handler: () => { + throw new Error('handler explosion'); + }, + }); + + await expect( + manager.emit('PostToolUse', { + toolName: 'T', + toolInput: {}, + toolOutput: null, + durationMs: 0, + sessionId: 's', + }), + ).rejects.toThrow('handler explosion'); + }); + + it('handler that throws in default mode does not fail emit', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + const manager = new HooksManager(); + + manager.on('PostToolUse', { + handler: () => { + throw new Error('soft failure'); + }, + }); + + const result = await manager.emit('PostToolUse', { + toolName: 'T', + toolInput: {}, + toolOutput: null, + durationMs: 0, + sessionId: 's', + }); + + expect(result.results).toEqual([]); + expect(warnSpy).toHaveBeenCalled(); + warnSpy.mockRestore(); + }); + }); + + describe('PreToolUse block through manager', () => { + it('block with string reason short-circuits and reports blocked', async () => { + const manager = new HooksManager(); + const second = vi.fn(); + + manager.on('PreToolUse', { + handler: () => ({ block: 'dangerous tool' }), + }); + manager.on('PreToolUse', { handler: second }); + + const result = await manager.emit( + 'PreToolUse', + { toolName: 'rm', toolInput: { path: '/' }, sessionId: 's' }, + { toolName: 'rm' }, + ); + + expect(result.blocked).toBe(true); + expect(second).not.toHaveBeenCalled(); + expect(result.results).toEqual([{ block: 'dangerous tool' }]); + }); + }); + + describe('drain edge cases', () => { + it('drain can be called multiple times safely', async () => { + const manager = new HooksManager(); + await manager.drain(); + await manager.drain(); + // No error + }); + + it('drain after handlers have been cleared still resolves', async () => { + const manager = new HooksManager(); + manager.on('PostToolUse', { + handler: () => ({ async: true as const }), + }); + + await manager.emit('PostToolUse', { + toolName: 'T', + toolInput: {}, + toolOutput: null, + durationMs: 0, + sessionId: 's', + }); + + manager.removeAll(); + await manager.drain(); // pending async should still drain + }); + }); + + describe('removeAll during usage', () => { + it('removeAll clears all hooks even if called from within a handler', async () => { + const manager = new HooksManager(); + + manager.on('SessionStart', { + handler: () => { + manager.removeAll(); + }, + }); + + await manager.emit('SessionStart', { + sessionId: 's', + config: undefined, + }); + + expect(manager.hasHandlers('SessionStart')).toBe(false); + }); + }); + + describe('setSessionId', () => { + it('changing sessionId mid-session is reflected in subsequent emits', async () => { + const manager = new HooksManager(); + const sessionIds: string[] = []; + + manager.on('PostToolUse', { + handler: (_p, ctx) => { + sessionIds.push(ctx.sessionId); + }, + }); + + manager.setSessionId('session-1'); + await manager.emit('PostToolUse', { + toolName: 'T', + toolInput: {}, + toolOutput: null, + durationMs: 0, + sessionId: 'ignored', + }); + + manager.setSessionId('session-2'); + await manager.emit('PostToolUse', { + toolName: 'T', + toolInput: {}, + toolOutput: null, + durationMs: 0, + sessionId: 'ignored', + }); + + expect(sessionIds).toEqual(['session-1', 'session-2']); + }); + }); +}); diff --git a/tests/unit/hooks-matchers-adversarial.test.ts b/tests/unit/hooks-matchers-adversarial.test.ts new file mode 100644 index 0000000..4e3f064 --- /dev/null +++ b/tests/unit/hooks-matchers-adversarial.test.ts @@ -0,0 +1,87 @@ +import { describe, it, expect } from 'vitest'; +import { matchesTool } from '../../src/lib/hooks-matchers.js'; + +describe('matchesTool (adversarial)', () => { + describe('empty string edge cases', () => { + it('empty string matcher matches empty string toolName', () => { + expect(matchesTool('', '')).toBe(true); + }); + + it('empty string matcher does NOT match non-empty toolName', () => { + expect(matchesTool('', 'Bash')).toBe(false); + }); + + it('non-empty matcher does NOT match empty toolName', () => { + expect(matchesTool('Bash', '')).toBe(false); + }); + }); + + describe('RegExp stateful behavior', () => { + it('RegExp with global flag has stateful .test() — may produce inconsistent results', () => { + const globalRegex = /Bash/g; + + // First call — matches + const first = matchesTool(globalRegex, 'Bash'); + // Second call — global regex has lastIndex set, may NOT match + const second = matchesTool(globalRegex, 'Bash'); + + // This documents that global flag is dangerous with matchesTool + // First call returns true, second returns false due to lastIndex + expect(first).toBe(true); + expect(second).toBe(false); + }); + + it('RegExp without global flag is idempotent', () => { + const regex = /Bash/; + expect(matchesTool(regex, 'Bash')).toBe(true); + expect(matchesTool(regex, 'Bash')).toBe(true); + expect(matchesTool(regex, 'Bash')).toBe(true); + }); + + it('RegExp with case-insensitive flag', () => { + expect(matchesTool(/bash/i, 'Bash')).toBe(true); + expect(matchesTool(/bash/i, 'BASH')).toBe(true); + }); + + it('RegExp matching partial tool name (no anchoring)', () => { + // /Read/ matches "ReadFile" — this is regex default behavior, not bug + expect(matchesTool(/Read/, 'ReadFile')).toBe(true); + expect(matchesTool(/Read/, 'OnlyRead')).toBe(true); + }); + }); + + describe('function matcher edge cases', () => { + it('function matcher throwing propagates the error', () => { + const throwingMatcher = () => { + throw new Error('matcher boom'); + }; + expect(() => matchesTool(throwingMatcher, 'Bash')).toThrow('matcher boom'); + }); + + it('function matcher returning truthy non-boolean is NOT coerced to true', () => { + // BUG: matchesTool returns raw value from function, not boolean-coerced. + // Callers that do strict === true checks will behave differently than truthiness checks. + const truthyMatcher = () => 1 as unknown as boolean; + expect(matchesTool(truthyMatcher, 'Bash')).toBe(1); + }); + + it('function matcher returning 0 (falsy) is NOT coerced to false', () => { + // BUG: returns 0 instead of false — truthiness check works but strict equality fails + const falsyMatcher = () => 0 as unknown as boolean; + expect(matchesTool(falsyMatcher, 'Bash')).toBe(0); + }); + + it('function matcher returning null is NOT coerced to false', () => { + // BUG: returns null instead of false — truthiness check works but strict equality fails + const nullMatcher = () => null as unknown as boolean; + expect(matchesTool(nullMatcher, 'Bash')).toBe(null); + }); + }); + + describe('special characters in string matcher', () => { + it('string matcher with regex-special chars does exact match only', () => { + expect(matchesTool('Read.*File', 'Read.*File')).toBe(true); + expect(matchesTool('Read.*File', 'ReadAnyFile')).toBe(false); + }); + }); +}); diff --git a/tests/unit/hooks-resolve-adversarial.test.ts b/tests/unit/hooks-resolve-adversarial.test.ts new file mode 100644 index 0000000..68883d5 --- /dev/null +++ b/tests/unit/hooks-resolve-adversarial.test.ts @@ -0,0 +1,122 @@ +import { describe, it, expect, vi } from 'vitest'; +import { resolveHooks } from '../../src/lib/hooks-resolve.js'; +import { HooksManager } from '../../src/lib/hooks-manager.js'; +import type { InlineHookConfig } from '../../src/lib/hooks-types.js'; + +describe('resolveHooks (adversarial)', () => { + describe('falsy inputs', () => { + it('returns undefined for null', () => { + expect(resolveHooks(null as unknown as undefined)).toBeUndefined(); + }); + + it('returns undefined for false', () => { + expect(resolveHooks(false as unknown as undefined)).toBeUndefined(); + }); + + it('returns undefined for 0', () => { + expect(resolveHooks(0 as unknown as undefined)).toBeUndefined(); + }); + + it('returns undefined for empty string', () => { + expect(resolveHooks('' as unknown as undefined)).toBeUndefined(); + }); + }); + + describe('empty config', () => { + it('empty object returns a HooksManager with no handlers', () => { + const result = resolveHooks({}); + expect(result).toBeInstanceOf(HooksManager); + expect(result!.hasHandlers('PreToolUse')).toBe(false); + }); + + it('config with empty arrays returns HooksManager with no handlers', () => { + const config: InlineHookConfig = { + PreToolUse: [], + PostToolUse: [], + }; + + const result = resolveHooks(config); + expect(result).toBeInstanceOf(HooksManager); + expect(result!.hasHandlers('PreToolUse')).toBe(false); + expect(result!.hasHandlers('PostToolUse')).toBe(false); + }); + }); + + describe('malformed config values', () => { + it('non-array value for a hook key is skipped', () => { + const config = { + PreToolUse: 'not-an-array', + } as unknown as InlineHookConfig; + + const result = resolveHooks(config); + expect(result).toBeInstanceOf(HooksManager); + expect(result!.hasHandlers('PreToolUse')).toBe(false); + }); + + it('null value for a hook key is skipped', () => { + const config = { + PreToolUse: null, + } as unknown as InlineHookConfig; + + const result = resolveHooks(config); + expect(result).toBeInstanceOf(HooksManager); + expect(result!.hasHandlers('PreToolUse')).toBe(false); + }); + + it('number value for a hook key is skipped', () => { + const config = { + PreToolUse: 42, + } as unknown as InlineHookConfig; + + const result = resolveHooks(config); + expect(result).toBeInstanceOf(HooksManager); + expect(result!.hasHandlers('PreToolUse')).toBe(false); + }); + }); + + describe('prototype pollution resistance', () => { + it('__proto__ key in config does not pollute Object prototype', () => { + const config = JSON.parse( + '{"__proto__": [{"handler": null}], "PreToolUse": []}', + ); + + // This should not crash and should not pollute Object.prototype + const result = resolveHooks(config); + expect(result).toBeInstanceOf(HooksManager); + expect(({} as Record)['handler']).toBeUndefined(); + }); + + it('constructor key in config does not crash', () => { + const config = { + constructor: [{ handler: vi.fn() }], + } as unknown as InlineHookConfig; + + const result = resolveHooks(config); + expect(result).toBeInstanceOf(HooksManager); + // 'constructor' is treated as a hook name — it gets registered + expect(result!.hasHandlers('constructor')).toBe(true); + }); + }); + + describe('HooksManager passthrough', () => { + it('returns the exact same instance, not a copy', () => { + const manager = new HooksManager(); + manager.on('PreToolUse', { handler: vi.fn() }); + + const result = resolveHooks(manager); + expect(result).toBe(manager); + }); + }); + + describe('non-standard hook names in inline config', () => { + it('registers handlers for arbitrary hook names (not just built-in)', () => { + const config = { + CustomHook: [{ handler: vi.fn() }], + } as unknown as InlineHookConfig; + + const result = resolveHooks(config); + expect(result).toBeInstanceOf(HooksManager); + expect(result!.hasHandlers('CustomHook')).toBe(true); + }); + }); +}); diff --git a/tests/unit/hooks-types-adversarial.test.ts b/tests/unit/hooks-types-adversarial.test.ts new file mode 100644 index 0000000..daf831c --- /dev/null +++ b/tests/unit/hooks-types-adversarial.test.ts @@ -0,0 +1,113 @@ +import { describe, it, expect } from 'vitest'; +import { isAsyncOutput } from '../../src/lib/hooks-types.js'; + +describe('isAsyncOutput (adversarial)', () => { + describe('truthy values that should NOT match', () => { + it('{ async: "true" } (string) is not AsyncOutput', () => { + expect(isAsyncOutput({ async: 'true' })).toBe(false); + }); + + it('{ async: 1 } (number) is not AsyncOutput', () => { + expect(isAsyncOutput({ async: 1 })).toBe(false); + }); + + it('{ async: {} } (object) is not AsyncOutput', () => { + expect(isAsyncOutput({ async: {} })).toBe(false); + }); + + it('{ async: [] } (array) is not AsyncOutput', () => { + expect(isAsyncOutput({ async: [] })).toBe(false); + }); + }); + + describe('non-object inputs', () => { + it('null is not AsyncOutput', () => { + expect(isAsyncOutput(null)).toBe(false); + }); + + it('undefined is not AsyncOutput', () => { + expect(isAsyncOutput(undefined)).toBe(false); + }); + + it('number is not AsyncOutput', () => { + expect(isAsyncOutput(42)).toBe(false); + }); + + it('string is not AsyncOutput', () => { + expect(isAsyncOutput('async')).toBe(false); + }); + + it('boolean true is not AsyncOutput', () => { + expect(isAsyncOutput(true)).toBe(false); + }); + + it('symbol is not AsyncOutput', () => { + expect(isAsyncOutput(Symbol('async'))).toBe(false); + }); + }); + + describe('valid AsyncOutput variations', () => { + it('{ async: true } is AsyncOutput', () => { + expect(isAsyncOutput({ async: true })).toBe(true); + }); + + it('{ async: true, asyncTimeout: 5000 } is AsyncOutput', () => { + expect(isAsyncOutput({ async: true, asyncTimeout: 5000 })).toBe(true); + }); + + it('{ async: true, extraField: "ignored" } is still AsyncOutput', () => { + expect(isAsyncOutput({ async: true, extraField: 'ignored' })).toBe(true); + }); + + it('frozen object { async: true } is AsyncOutput', () => { + expect(isAsyncOutput(Object.freeze({ async: true }))).toBe(true); + }); + }); + + describe('array with async property', () => { + it('array with async property set is treated as AsyncOutput', () => { + const arr: unknown[] = []; + (arr as unknown as Record)['async'] = true; + // Arrays are objects and have 'async' in arr, so this should match + expect(isAsyncOutput(arr)).toBe(true); + }); + }); + + describe('Proxy objects', () => { + it('Proxy that returns true for async property is AsyncOutput', () => { + const proxy = new Proxy( + {}, + { + get(_target, prop) { + if (prop === 'async') return true; + return undefined; + }, + has(_target, prop) { + return prop === 'async'; + }, + }, + ); + expect(isAsyncOutput(proxy)).toBe(true); + }); + + it('Proxy that throws on property access causes isAsyncOutput to throw', () => { + const proxy = new Proxy( + {}, + { + has() { + throw new Error('proxy trap'); + }, + }, + ); + expect(() => isAsyncOutput(proxy)).toThrow('proxy trap'); + }); + }); + + describe('Object.create(null)', () => { + it('bare object with async: true is AsyncOutput', () => { + const obj = Object.create(null); + obj.async = true; + expect(isAsyncOutput(obj)).toBe(true); + }); + }); +}); From 3fa8730c36a5d938a17b34d4821dd03f70ed4242 Mon Sep 17 00:00:00 2001 From: Matt Apperson Date: Thu, 16 Apr 2026 17:01:13 -0400 Subject: [PATCH 3/7] style: apply Biome formatting and import ordering Fixes CI failure on `validate` job by applying Biome's auto-fixes for import sorting in async-params.ts and model-result.ts, and formatting in hooks-matchers.ts, hooks-schemas.ts, model-result.ts, and conversation-state.test.ts. --- src/lib/async-params.ts | 4 +- src/lib/hooks-matchers.ts | 5 +- src/lib/hooks-schemas.ts | 79 +++++++++++++++---- src/lib/model-result.ts | 109 +++++++++++++++++--------- tests/unit/conversation-state.test.ts | 28 +++++-- 5 files changed, 163 insertions(+), 62 deletions(-) diff --git a/src/lib/async-params.ts b/src/lib/async-params.ts index f908d23..0e4314a 100644 --- a/src/lib/async-params.ts +++ b/src/lib/async-params.ts @@ -1,9 +1,9 @@ import type * as models from '@openrouter/sdk/models'; import type { OpenResponsesResult } from '@openrouter/sdk/models'; +import type { HooksManager } from './hooks-manager.js'; +import type { InlineHookConfig } from './hooks-types.js'; import type { Item } from './item-types.js'; import type { ContextInput } from './tool-context.js'; -import type { InlineHookConfig } from './hooks-types.js'; -import type { HooksManager } from './hooks-manager.js'; import type { ParsedToolCall, StateAccessor, diff --git a/src/lib/hooks-matchers.ts b/src/lib/hooks-matchers.ts index d66090e..386bc83 100644 --- a/src/lib/hooks-matchers.ts +++ b/src/lib/hooks-matchers.ts @@ -8,10 +8,7 @@ import type { ToolMatcher } from './hooks-types.js'; * - `RegExp` -> `.test(toolName)` * - `function` -> arbitrary predicate */ -export function matchesTool( - matcher: ToolMatcher | undefined, - toolName: string, -): boolean { +export function matchesTool(matcher: ToolMatcher | undefined, toolName: string): boolean { if (matcher === undefined) { return true; } diff --git a/src/lib/hooks-schemas.ts b/src/lib/hooks-schemas.ts index e6c9ce4..d33fd97 100644 --- a/src/lib/hooks-schemas.ts +++ b/src/lib/hooks-schemas.ts @@ -25,14 +25,22 @@ export const PostToolUseFailurePayloadSchema = z4.object({ }); export const StopPayloadSchema = z4.object({ - reason: z4.enum(['end_turn', 'max_tokens', 'stop_sequence']), + reason: z4.enum([ + 'end_turn', + 'max_tokens', + 'stop_sequence', + ]), sessionId: z4.string(), }); export const PermissionRequestPayloadSchema = z4.object({ toolName: z4.string(), toolInput: z4.record(z4.string(), z4.unknown()), - riskLevel: z4.enum(['low', 'medium', 'high']), + riskLevel: z4.enum([ + 'low', + 'medium', + 'high', + ]), sessionId: z4.string(), }); @@ -48,7 +56,12 @@ export const SessionStartPayloadSchema = z4.object({ export const SessionEndPayloadSchema = z4.object({ sessionId: z4.string(), - reason: z4.enum(['user', 'error', 'max_turns', 'complete']), + reason: z4.enum([ + 'user', + 'error', + 'max_turns', + 'complete', + ]), }); //#endregion @@ -57,7 +70,12 @@ export const SessionEndPayloadSchema = z4.object({ export const PreToolUseResultSchema = z4.object({ mutatedInput: z4.record(z4.string(), z4.unknown()).optional(), - block: z4.union([z4.boolean(), z4.string()]).optional(), + block: z4 + .union([ + z4.boolean(), + z4.string(), + ]) + .optional(), }); export const StopResultSchema = z4.object({ @@ -66,13 +84,22 @@ export const StopResultSchema = z4.object({ }); export const PermissionRequestResultSchema = z4.object({ - decision: z4.enum(['allow', 'deny', 'ask_user']), + decision: z4.enum([ + 'allow', + 'deny', + 'ask_user', + ]), reason: z4.string().optional(), }); export const UserPromptSubmitResultSchema = z4.object({ mutatedPrompt: z4.string().optional(), - reject: z4.union([z4.boolean(), z4.string()]).optional(), + reject: z4 + .union([ + z4.boolean(), + z4.string(), + ]) + .optional(), }); const VoidResultSchema = z4.void(); @@ -82,14 +109,38 @@ const VoidResultSchema = z4.void(); //#region Built-in Hook Registry export const BUILT_IN_HOOKS: Record = { - PreToolUse: { payload: PreToolUsePayloadSchema, result: PreToolUseResultSchema }, - PostToolUse: { payload: PostToolUsePayloadSchema, result: VoidResultSchema }, - PostToolUseFailure: { payload: PostToolUseFailurePayloadSchema, result: VoidResultSchema }, - UserPromptSubmit: { payload: UserPromptSubmitPayloadSchema, result: UserPromptSubmitResultSchema }, - Stop: { payload: StopPayloadSchema, result: StopResultSchema }, - PermissionRequest: { payload: PermissionRequestPayloadSchema, result: PermissionRequestResultSchema }, - SessionStart: { payload: SessionStartPayloadSchema, result: VoidResultSchema }, - SessionEnd: { payload: SessionEndPayloadSchema, result: VoidResultSchema }, + PreToolUse: { + payload: PreToolUsePayloadSchema, + result: PreToolUseResultSchema, + }, + PostToolUse: { + payload: PostToolUsePayloadSchema, + result: VoidResultSchema, + }, + PostToolUseFailure: { + payload: PostToolUseFailurePayloadSchema, + result: VoidResultSchema, + }, + UserPromptSubmit: { + payload: UserPromptSubmitPayloadSchema, + result: UserPromptSubmitResultSchema, + }, + Stop: { + payload: StopPayloadSchema, + result: StopResultSchema, + }, + PermissionRequest: { + payload: PermissionRequestPayloadSchema, + result: PermissionRequestResultSchema, + }, + SessionStart: { + payload: SessionStartPayloadSchema, + result: VoidResultSchema, + }, + SessionEnd: { + payload: SessionEndPayloadSchema, + result: VoidResultSchema, + }, }; export const BUILT_IN_HOOK_NAMES = new Set(Object.keys(BUILT_IN_HOOKS)); diff --git a/src/lib/model-result.ts b/src/lib/model-result.ts index e2ebb42..4cf1957 100644 --- a/src/lib/model-result.ts +++ b/src/lib/model-result.ts @@ -16,6 +16,7 @@ import { unsentResultsToAPIFormat, updateState, } from './conversation-state.js'; +import type { HooksManager } from './hooks-manager.js'; import { applyNextTurnParamsToRequest, executeNextTurnParamsFunctions, @@ -68,7 +69,6 @@ import type { UnsentToolResult, } from './tool-types.js'; import { hasExecuteFunction, isToolCallOutputEvent } from './tool-types.js'; -import type { HooksManager } from './hooks-manager.js'; /** * Default maximum number of tool execution steps if no stopWhen is specified. @@ -770,19 +770,26 @@ export class ModelResult< // Emit PreToolUse hook -- can block or mutate input let effectiveToolCall = toolCall; if (this.hooksManager) { - const preResult = await this.hooksManager.emit('PreToolUse', { - toolName: toolCall.name, - toolInput: (toolCall.arguments ?? {}) as Record, - sessionId: this.currentState?.id ?? '', - }, { toolName: toolCall.name }); + const preResult = await this.hooksManager.emit( + 'PreToolUse', + { + toolName: toolCall.name, + toolInput: (toolCall.arguments ?? {}) as Record, + sessionId: this.currentState?.id ?? '', + }, + { + toolName: toolCall.name, + }, + ); if (preResult.blocked) { const blockResult = preResult.results.find( (r) => r && typeof r === 'object' && 'block' in r && r.block, ); - const reason = blockResult && typeof blockResult.block === 'string' - ? blockResult.block - : 'Blocked by PreToolUse hook'; + const reason = + blockResult && typeof blockResult.block === 'string' + ? blockResult.block + : 'Blocked by PreToolUse hook'; return { type: 'hook_blocked' as const, toolCall, @@ -790,7 +797,9 @@ export class ModelResult< type: 'function_call_output' as const, id: `output_${toolCall.id}`, callId: toolCall.id, - output: JSON.stringify({ error: reason }), + output: JSON.stringify({ + error: reason, + }), }, }; } @@ -798,7 +807,10 @@ export class ModelResult< // Apply mutated input if present const finalInput = preResult.finalPayload.toolInput; if (finalInput !== toolCall.arguments) { - effectiveToolCall = { ...toolCall, arguments: finalInput }; + effectiveToolCall = { + ...toolCall, + arguments: finalInput, + }; } } @@ -816,20 +828,32 @@ export class ModelResult< // Emit PostToolUse or PostToolUseFailure if (this.hooksManager) { if (result.error) { - await this.hooksManager.emit('PostToolUseFailure', { - toolName: effectiveToolCall.name, - toolInput: (effectiveToolCall.arguments ?? {}) as Record, - error: result.error, - sessionId: this.currentState?.id ?? '', - }, { toolName: effectiveToolCall.name }); + await this.hooksManager.emit( + 'PostToolUseFailure', + { + toolName: effectiveToolCall.name, + toolInput: (effectiveToolCall.arguments ?? {}) as Record, + error: result.error, + sessionId: this.currentState?.id ?? '', + }, + { + toolName: effectiveToolCall.name, + }, + ); } else { - await this.hooksManager.emit('PostToolUse', { - toolName: effectiveToolCall.name, - toolInput: (effectiveToolCall.arguments ?? {}) as Record, - toolOutput: result.result, - durationMs, - sessionId: this.currentState?.id ?? '', - }, { toolName: effectiveToolCall.name }); + await this.hooksManager.emit( + 'PostToolUse', + { + toolName: effectiveToolCall.name, + toolInput: (effectiveToolCall.arguments ?? {}) as Record, + toolOutput: result.result, + durationMs, + sessionId: this.currentState?.id ?? '', + }, + { + toolName: effectiveToolCall.name, + }, + ); } } @@ -858,12 +882,18 @@ export class ModelResult< // Emit PostToolUseFailure for rejected promises if (this.hooksManager) { - await this.hooksManager.emit('PostToolUseFailure', { - toolName: originalToolCall.name, - toolInput: (originalToolCall.arguments ?? {}) as Record, - error: settled.reason, - sessionId: this.currentState?.id ?? '', - }, { toolName: originalToolCall.name }); + await this.hooksManager.emit( + 'PostToolUseFailure', + { + toolName: originalToolCall.name, + toolInput: (originalToolCall.arguments ?? {}) as Record, + error: settled.reason, + sessionId: this.currentState?.id ?? '', + }, + { + toolName: originalToolCall.name, + }, + ); } this.broadcastToolResult(originalToolCall.id, { @@ -1279,13 +1309,17 @@ export class ModelResult< const rejectResult = submitResult.results.find( (r) => r && typeof r === 'object' && 'reject' in r && r.reject, ); - const reason = rejectResult && typeof rejectResult.reject === 'string' - ? rejectResult.reject - : 'Prompt rejected by hook'; + const reason = + rejectResult && typeof rejectResult.reject === 'string' + ? rejectResult.reject + : 'Prompt rejected by hook'; throw new Error(reason); } if (submitResult.finalPayload.prompt !== baseRequest.input) { - baseRequest = { ...baseRequest, input: submitResult.finalPayload.prompt }; + baseRequest = { + ...baseRequest, + input: submitResult.finalPayload.prompt, + }; } } @@ -1567,7 +1601,12 @@ export class ModelResult< sessionId: this.currentState?.id ?? '', }); const lastResult = stopResult.results.at(-1); - if (lastResult && typeof lastResult === 'object' && 'forceResume' in lastResult && lastResult.forceResume) { + if ( + lastResult && + typeof lastResult === 'object' && + 'forceResume' in lastResult && + lastResult.forceResume + ) { // Don't break -- continue the loop continue; } diff --git a/tests/unit/conversation-state.test.ts b/tests/unit/conversation-state.test.ts index f4381e1..81c60e2 100644 --- a/tests/unit/conversation-state.test.ts +++ b/tests/unit/conversation-state.test.ts @@ -597,7 +597,9 @@ describe('Conversation State Utilities', () => { it('should stringify error outputs', () => { const result = createRejectedResult('call-1', 'test_tool', 'Something went wrong'); - const formatted = unsentResultsToAPIFormat([result]); + const formatted = unsentResultsToAPIFormat([ + result, + ]); expect(formatted[0]?.output).toBe('{"error":"Something went wrong"}'); }); @@ -609,7 +611,9 @@ describe('Conversation State Utilities', () => { text: 'Hello world', }, ]; - const results = [createUnsentResult('call-1', 'test_tool', contentArray)]; + const results = [ + createUnsentResult('call-1', 'test_tool', contentArray), + ]; const formatted = unsentResultsToAPIFormat(results); @@ -625,7 +629,9 @@ describe('Conversation State Utilities', () => { imageUrl: 'data:image/png;base64,abc123', }, ]; - const results = [createUnsentResult('call-1', 'image_gen', contentArray)]; + const results = [ + createUnsentResult('call-1', 'image_gen', contentArray), + ]; const formatted = unsentResultsToAPIFormat(results); @@ -645,7 +651,9 @@ describe('Conversation State Utilities', () => { imageUrl: 'data:image/png;base64,abc123', }, ]; - const results = [createUnsentResult('call-1', 'image_gen', contentArray)]; + const results = [ + createUnsentResult('call-1', 'image_gen', contentArray), + ]; const formatted = unsentResultsToAPIFormat(results); @@ -657,7 +665,9 @@ describe('Conversation State Utilities', () => { 'item1', 'item2', ]; - const results = [createUnsentResult('call-1', 'test_tool', regularArray)]; + const results = [ + createUnsentResult('call-1', 'test_tool', regularArray), + ]; const formatted = unsentResultsToAPIFormat(results); @@ -665,7 +675,9 @@ describe('Conversation State Utilities', () => { }); it('should stringify empty arrays', () => { - const results = [createUnsentResult('call-1', 'test_tool', [])]; + const results = [ + createUnsentResult('call-1', 'test_tool', []), + ]; const formatted = unsentResultsToAPIFormat(results); @@ -679,7 +691,9 @@ describe('Conversation State Utilities', () => { data: 'test', }, ]; - const results = [createUnsentResult('call-1', 'test_tool', invalidArray)]; + const results = [ + createUnsentResult('call-1', 'test_tool', invalidArray), + ]; const formatted = unsentResultsToAPIFormat(results); From a5177c515eb11bbb6086b1c85541cbed60b743a8 Mon Sep 17 00:00:00 2001 From: Matt Apperson Date: Mon, 20 Apr 2026 15:20:28 -0400 Subject: [PATCH 4/7] fix(hooks): address PR review feedback across hooks system Library internals: - AsyncOutput.work field enables true fire-and-forget; chain tracks background work with timeout instead of advertising it cosmetically - HooksManager.emit now validates payloads and handler results against their Zod schemas; stored customHooks registry drives custom-hook validation - MUTATION_FIELD_MAP scoped per-hook so only PreToolUse maps mutatedInput->toolInput and only UserPromptSubmit maps mutatedPrompt->prompt; custom hooks opt out by default - pendingAsync converted to a self-cleaning Set so it no longer leaks across long sessions - Per-emit AbortController tracked on the manager; abortInflight() cancels every in-flight emit's HookContext.signal - Matcher without toolName now fails closed (skipped) instead of open - isBlockTriggered rejects empty strings to stay consistent with the truthy reason search in model-result - registerEntry hidden behind a module-private symbol + helper so resolveHooks can still register inline configs without exposing a back door to user code model-result integration: - Fire Pre/Post/PostFailure hooks from every tool-execution path by routing through a new runToolWithHooks helper (auto, auto-approved, and manually approved calls are all covered) - Emit PermissionRequest in handleApprovalCheck; allow/deny/ask_user decisions gate the approval flow, denied calls synthesize an error result without executing - UserPromptSubmit now fires before stateful input-wrapping, handles both string and structured message-array inputs, and applies mutations back to the base request - Cap Stop forceResume at 3 consecutive overrides to prevent infinite loops when a stop condition keeps firing - Check .some(forceResume) across all Stop results and honor appendPrompt by injecting a user message into the next turn - SessionStart config populated with {hasTools, hasApproval, hasState} rather than hardcoded undefined - Swap Date.now() -> performance.now() for durationMs timing --- packages/agent/src/lib/hooks-emit.ts | 128 ++- packages/agent/src/lib/hooks-manager.ts | 202 +++-- packages/agent/src/lib/hooks-resolve.ts | 9 +- packages/agent/src/lib/hooks-schemas.ts | 13 + packages/agent/src/lib/hooks-types.ts | 40 +- packages/agent/src/lib/model-result.ts | 736 +++++++++++++---- .../tests/unit/hooks-emit-adversarial.test.ts | 46 +- packages/agent/tests/unit/hooks-emit.test.ts | 166 +++- .../unit/hooks-manager-adversarial.test.ts | 12 + .../agent/tests/unit/hooks-manager.test.ts | 235 +++++- .../tests/unit/model-result-hooks.test.ts | 763 ++++++++++++++++++ 11 files changed, 2089 insertions(+), 261 deletions(-) create mode 100644 packages/agent/tests/unit/model-result-hooks.test.ts diff --git a/packages/agent/src/lib/hooks-emit.ts b/packages/agent/src/lib/hooks-emit.ts index f9e3580..9cba3e0 100644 --- a/packages/agent/src/lib/hooks-emit.ts +++ b/packages/agent/src/lib/hooks-emit.ts @@ -1,3 +1,5 @@ +import type { $ZodType } from 'zod/v4/core'; +import { safeParse } from 'zod/v4/core'; import { matchesTool } from './hooks-matchers.js'; import type { AsyncOutput, EmitResult, HookContext, HookEntry } from './hooks-types.js'; import { @@ -12,17 +14,30 @@ export interface ExecuteChainOptions { readonly hookName: string; readonly throwOnHandlerError: boolean; readonly toolName?: string | undefined; + /** + * Optional Zod schema to validate each handler's result BEFORE the chain + * applies mutation piping or short-circuit logic. Validation errors are + * handled the same way as handler throws: either re-thrown (strict mode) or + * logged as a warning (default). + * + * Void-typed hooks typically pass `undefined` here; results in that case are + * not validated. + */ + readonly resultSchema?: $ZodType | undefined; } /** * Execute a chain of hook handlers sequentially. * * Supports: - * - ToolMatcher and filter-based skipping - * - Sync results collected into `results` - * - Async fire-and-forget via `{ async: true }` return - * - Mutation piping (mutatedInput -> toolInput, mutatedPrompt -> prompt) - * - Short-circuit on block/reject fields + * - ToolMatcher and filter-based skipping (matcher fails closed: a handler with + * a matcher and no `options.toolName` is skipped) + * - Sync results validated against `options.resultSchema` and collected into `results` + * - Async fire-and-forget via a returned {@link AsyncOutput} -- the handler's + * `work` promise is pushed to `pending` without being awaited; the manager is + * responsible for draining/timing out that work + * - Per-hook mutation piping (driven by {@link MUTATION_FIELD_MAP}) + * - Short-circuit on block/reject fields (non-empty string or `true`) */ export async function executeHandlerChain( entries: ReadonlyArray>, @@ -39,6 +54,7 @@ export async function executeHandlerChain( const blockField = BLOCK_FIELDS[options.hookName]; const canBlock = BLOCK_HOOKS.has(options.hookName); + const mutationMap = MUTATION_FIELD_MAP[options.hookName]; for (let i = 0; i < entries.length; i++) { const entry = entries[i]; @@ -46,13 +62,16 @@ export async function executeHandlerChain( continue; } - // Matcher check for tool-scoped hooks - if ( - entry.matcher !== undefined && - options.toolName !== undefined && - !matchesTool(entry.matcher, options.toolName) - ) { - continue; + // Matcher check for tool-scoped hooks. Matchers fail closed: if a matcher + // is registered and no toolName is available for this emit, we skip the + // handler rather than invoking it globally. + if (entry.matcher !== undefined) { + if (options.toolName === undefined) { + continue; + } + if (!matchesTool(entry.matcher, options.toolName)) { + continue; + } } // Filter check @@ -63,30 +82,47 @@ export async function executeHandlerChain( try { const returnValue = await entry.handler(currentPayload, context); - // Async fire-and-forget + // Async fire-and-forget: the handler has returned a signal describing + // detached work. Track the (optional) work promise for drain/timeout. if (isAsyncOutput(returnValue)) { - const asyncOutput = returnValue as AsyncOutput; - const timeout = asyncOutput.asyncTimeout ?? DEFAULT_ASYNC_TIMEOUT; - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), timeout); - - const asyncPromise = Promise.resolve().then(() => { - clearTimeout(timeoutId); - }); - pending.push(asyncPromise); + const asyncOutput: AsyncOutput = returnValue; + const trackedWork = trackAsyncWork(asyncOutput); + if (trackedWork !== undefined) { + pending.push(trackedWork); + } continue; } - // Void / undefined -- side-effect only, continue + // Void / undefined / null -- side-effect only, continue if (returnValue === undefined || returnValue === null) { continue; } + // Validate the result against the schema if one is supplied. A failure + // here is treated like any other handler error: propagated in strict + // mode, logged otherwise. + if (options.resultSchema) { + const validation = safeParse(options.resultSchema, returnValue); + if (!validation.success) { + const err = new Error( + `[HooksManager] Handler ${i} for hook "${options.hookName}" returned an invalid result: ${validation.error.message}`, + ); + if (options.throwOnHandlerError) { + throw err; + } + console.warn(err.message); + continue; + } + } + + // Non-schema-void results pass through unchanged; we narrow via R. const result = returnValue as R; results.push(result); - // Apply mutation piping - currentPayload = applyMutations(currentPayload, result); + // Apply mutation piping -- only hooks listed in MUTATION_FIELD_MAP participate. + if (mutationMap) { + currentPayload = applyMutations(currentPayload, result, mutationMap); + } // Short-circuit on block if (canBlock && blockField && isBlockTriggered(result, blockField)) { @@ -109,16 +145,44 @@ export async function executeHandlerChain( }; } +/** + * Given an {@link AsyncOutput} signal, return a Promise that resolves + * when the handler's detached `work` settles OR the timeout fires -- whichever + * is first. Returns `undefined` if there is no work to track. + */ +function trackAsyncWork(output: AsyncOutput): Promise | undefined { + if (output.work === undefined) { + return undefined; + } + const timeout = output.asyncTimeout ?? DEFAULT_ASYNC_TIMEOUT; + + return new Promise((resolve) => { + let settled = false; + const finish = () => { + if (settled) { + return; + } + settled = true; + clearTimeout(timeoutId); + resolve(); + }; + + const timeoutId = setTimeout(finish, timeout); + + output.work?.then(finish, finish); + }); +} + /** * Apply mutation fields from a result onto the current payload. */ -function applyMutations(payload: P, result: R): P { +function applyMutations(payload: P, result: R, mutationMap: Record): P { if (typeof result !== 'object' || result === null) { return payload; } let mutated = payload; - for (const [resultField, payloadField] of Object.entries(MUTATION_FIELD_MAP)) { + for (const [resultField, payloadField] of Object.entries(mutationMap)) { if (resultField in result) { const value = (result as Record)[resultField]; if (value !== undefined) { @@ -134,11 +198,19 @@ function applyMutations(payload: P, result: R): P { /** * Check if a result triggers a short-circuit block. + * + * A block fires when the field is `=== true` or a non-empty string. Empty + * strings are treated as "no block reason supplied" -- they do NOT trigger a + * short-circuit, which keeps emit consistent with callers that look up the + * first block reason with a truthy check. */ function isBlockTriggered(result: R, blockField: string): boolean { if (typeof result !== 'object' || result === null) { return false; } const value = (result as Record)[blockField]; - return value === true || typeof value === 'string'; + if (value === true) { + return true; + } + return typeof value === 'string' && value.length > 0; } diff --git a/packages/agent/src/lib/hooks-manager.ts b/packages/agent/src/lib/hooks-manager.ts index a3e3be0..baf3b70 100644 --- a/packages/agent/src/lib/hooks-manager.ts +++ b/packages/agent/src/lib/hooks-manager.ts @@ -1,6 +1,7 @@ -import type { infer as zodInfer } from 'zod/v4/core'; +import type { $ZodType, infer as zodInfer } from 'zod/v4/core'; +import { safeParse } from 'zod/v4/core'; import { executeHandlerChain } from './hooks-emit.js'; -import { BUILT_IN_HOOK_NAMES } from './hooks-schemas.js'; +import { BUILT_IN_HOOK_NAMES, BUILT_IN_HOOKS, VOID_RESULT_HOOKS } from './hooks-schemas.js'; import type { BuiltInHookDefinitions, EmitResult, @@ -32,6 +33,15 @@ interface EntryRegistration { //#endregion +/** + * Module-private key used by `resolveHooks` to call the internal + * untyped-registration path without exposing it as a public method on the + * class. See {@link getInternalRegistrar} below. + */ +const INTERNAL_REGISTRAR_KEY: unique symbol = Symbol('HooksManager.internalRegistrar'); + +type InternalRegistrar = (hookName: string, entry: HookEntry) => () => void; + /** * Typed, extensible hook system for agent lifecycle events. * @@ -40,14 +50,16 @@ interface EntryRegistration { */ export class HooksManager> { private readonly entries = new Map[]>(); - private readonly pendingAsync: Promise[] = []; + private readonly pendingAsync = new Set>(); + private readonly customHooks: HookRegistry; + private readonly inflightControllers = new Set(); private readonly throwOnHandlerError: boolean; private sessionId = ''; constructor(customHooks?: Custom, options?: HooksManagerOptions) { this.throwOnHandlerError = options?.throwOnHandlerError ?? false; - // Validate no collisions between custom and built-in hook names + // Validate no collisions between custom and built-in hook names. if (customHooks) { for (const name of Object.keys(customHooks)) { if (BUILT_IN_HOOK_NAMES.has(name)) { @@ -56,6 +68,11 @@ export class HooksManager> { ); } } + this.customHooks = { + ...customHooks, + }; + } else { + this.customHooks = {}; } } @@ -73,28 +90,7 @@ export class HooksManager> { hookName: K, entry: EntryRegistration, K>, ResultOf, K>>, ): () => void { - return this.registerEntry(hookName, entry as HookEntry); - } - - /** - * Internal: register an untyped entry. Used by resolveHooks for inline config normalization. - * @internal - */ - registerEntry(hookName: string, entry: HookEntry): () => void { - const list = this.entries.get(hookName) ?? []; - list.push(entry); - this.entries.set(hookName, list); - - return () => { - const current = this.entries.get(hookName); - if (!current) { - return; - } - const idx = current.indexOf(entry); - if (idx !== -1) { - current.splice(idx, 1); - } - }; + return this._register(hookName, entry as HookEntry); } /** @@ -131,7 +127,12 @@ export class HooksManager> { } /** - * Validate the payload, invoke matching handlers, and return results. + * Validate the payload (and each handler's result) against the registered + * Zod schemas, invoke matching handlers, and return results. + * + * Payload validation failure is treated according to `throwOnHandlerError`: + * strict mode re-throws, default mode logs a warning and returns an empty + * result without invoking any handlers. */ async emit & string>( hookName: K, @@ -142,37 +143,89 @@ export class HooksManager> { ): Promise, K>, PayloadOf, K>>> { const list = this.entries.get(hookName) ?? []; + const definition = this._definitionFor(hookName); + if (definition) { + const parsed = safeParse(definition.payload, payload); + if (!parsed.success) { + const err = new Error( + `[HooksManager] Invalid payload for hook "${hookName}": ${parsed.error.message}`, + ); + if (this.throwOnHandlerError) { + throw err; + } + console.warn(err.message); + return { + results: [], + pending: [], + finalPayload: payload, + blocked: false, + }; + } + } + + const controller = new AbortController(); + this.inflightControllers.add(controller); + const context: HookContext = { - signal: new AbortController().signal, + signal: controller.signal, hookName, sessionId: this.sessionId, }; - const result = await executeHandlerChain( - list as ReadonlyArray< - HookEntry, K>, ResultOf, K>> - >, - payload, - context, - { - hookName, - throwOnHandlerError: this.throwOnHandlerError, - toolName: emitContext?.toolName, - }, - ); - - // Track pending async work for drain() - this.pendingAsync.push(...result.pending); - - return result; + try { + const resultSchema = + definition && !VOID_RESULT_HOOKS.has(hookName) ? definition.result : undefined; + + const result = await executeHandlerChain( + list as ReadonlyArray< + HookEntry, K>, ResultOf, K>> + >, + payload, + context, + { + hookName, + throwOnHandlerError: this.throwOnHandlerError, + toolName: emitContext?.toolName, + resultSchema, + }, + ); + + // Track pending async work for drain(); each promise self-removes once + // it settles so the set can't grow unbounded across many emits. + for (const p of result.pending) { + this.pendingAsync.add(p); + p.finally(() => { + this.pendingAsync.delete(p); + }); + } + + return result; + } finally { + this.inflightControllers.delete(controller); + } } /** - * Await all in-flight async handlers. Used for graceful shutdown. + * Await all in-flight async handler work. Used for graceful shutdown. */ async drain(): Promise { - const pending = this.pendingAsync.splice(0); - await Promise.allSettled(pending); + while (this.pendingAsync.size > 0) { + const snapshot = Array.from(this.pendingAsync); + await Promise.allSettled(snapshot); + } + } + + /** + * Abort every in-flight `emit()` by firing their context `signal`s. Handlers + * that kick off async work should observe `context.signal` to honor this. + * + * Does not remove pending async work from `pendingAsync` — callers that want + * to wait for handlers to wind down should still call `drain()` afterward. + */ + abortInflight(reason?: unknown): void { + for (const controller of this.inflightControllers) { + controller.abort(reason); + } } /** @@ -182,4 +235,57 @@ export class HooksManager> { const list = this.entries.get(hookName); return list !== undefined && list.length > 0; } + + /** + * Internal registration path used by `resolveHooks` to translate an + * InlineHookConfig object into this manager. User code should use `on()`. + */ + [INTERNAL_REGISTRAR_KEY](hookName: string, entry: HookEntry): () => void { + return this._register(hookName, entry); + } + + private _register(hookName: string, entry: HookEntry): () => void { + const list = this.entries.get(hookName) ?? []; + list.push(entry); + this.entries.set(hookName, list); + + return () => { + const current = this.entries.get(hookName); + if (!current) { + return; + } + const idx = current.indexOf(entry); + if (idx !== -1) { + current.splice(idx, 1); + } + }; + } + + private _definitionFor(hookName: string): + | { + payload: $ZodType; + result: $ZodType; + } + | undefined { + const builtIn = BUILT_IN_HOOKS[hookName]; + if (builtIn) { + return builtIn; + } + const custom = this.customHooks[hookName]; + if (custom) { + return custom; + } + return undefined; + } +} + +/** + * Internal: return the private registrar of a HooksManager instance. This + * lets `resolveHooks` register arbitrary hook names (including unknown ones + * from an inline config) without exposing a public bypass of the typed `on()`. + * + * NOT PART OF THE PUBLIC API. + */ +export function getInternalRegistrar(manager: HooksManager): InternalRegistrar { + return manager[INTERNAL_REGISTRAR_KEY].bind(manager); } diff --git a/packages/agent/src/lib/hooks-resolve.ts b/packages/agent/src/lib/hooks-resolve.ts index bf7fd28..4846336 100644 --- a/packages/agent/src/lib/hooks-resolve.ts +++ b/packages/agent/src/lib/hooks-resolve.ts @@ -1,4 +1,4 @@ -import { HooksManager } from './hooks-manager.js'; +import { getInternalRegistrar, HooksManager } from './hooks-manager.js'; import type { HookEntry, InlineHookConfig } from './hooks-types.js'; /** @@ -19,14 +19,17 @@ export function resolveHooks( return hooks; } - // Inline config -> HooksManager + // Inline config -> HooksManager. We register through the internal registrar + // so we don't have to constrain `resolveHooks` to the typed `on()` surface + // and so user code can't reach this path. const manager = new HooksManager(); + const register = getInternalRegistrar(manager); for (const [hookName, entries] of Object.entries(hooks)) { if (!entries || !Array.isArray(entries)) { continue; } for (const entry of entries) { - manager.registerEntry(hookName, entry as HookEntry); + register(hookName, entry as HookEntry); } } return manager; diff --git a/packages/agent/src/lib/hooks-schemas.ts b/packages/agent/src/lib/hooks-schemas.ts index d33fd97..952841b 100644 --- a/packages/agent/src/lib/hooks-schemas.ts +++ b/packages/agent/src/lib/hooks-schemas.ts @@ -145,4 +145,17 @@ export const BUILT_IN_HOOKS: Record = { export const BUILT_IN_HOOK_NAMES = new Set(Object.keys(BUILT_IN_HOOKS)); +/** + * Set of built-in hook names whose result schema is `z4.void()`. These hooks + * have no meaningful result object, so the emit pipeline skips result + * validation for them (allowing handlers to return arbitrary values that are + * then collected as opaque results without complaint). + */ +export const VOID_RESULT_HOOKS = new Set([ + 'PostToolUse', + 'PostToolUseFailure', + 'SessionStart', + 'SessionEnd', +]); + //#endregion diff --git a/packages/agent/src/lib/hooks-types.ts b/packages/agent/src/lib/hooks-types.ts index 41675ab..68fde8a 100644 --- a/packages/agent/src/lib/hooks-types.ts +++ b/packages/agent/src/lib/hooks-types.ts @@ -34,6 +34,10 @@ export type HookRegistry = Record; /** * Context provided to every hook handler invocation. + * + * - `signal` is aborted if the manager's `abortInflight()` is called while the + * emit is still running. Handlers that kick off background work via + * {@link AsyncOutput} should observe the signal for cancellation. */ export interface HookContext { readonly signal: AbortSignal; @@ -43,10 +47,21 @@ export interface HookContext { /** * Returned by a handler to signal fire-and-forget mode. - * The agent proceeds immediately without waiting for completion. + * + * The chain proceeds immediately without waiting for completion. Any background + * work the handler has kicked off should be attached as `work` so the manager + * can track it for `drain()` and enforce the `asyncTimeout`. + * + * The handler is expected to construct and return this object synchronously (or + * resolve to it quickly). The `work` promise is the detached work to track. */ export interface AsyncOutput { readonly async: true; + /** + * Background work the manager should track for `drain()` and time out after + * `asyncTimeout` ms. Omit if there is no work to track. + */ + readonly work?: Promise; /** Milliseconds before the async handler is aborted. Default: 30000 */ readonly asyncTimeout?: number; } @@ -103,8 +118,9 @@ export interface EmitResult { export interface HooksManagerOptions { /** - * If true, a throwing handler stops the chain and propagates the error. - * If false (default), the error is logged as a warning and execution continues. + * If true, a throwing handler or a schema-validation failure stops the chain + * and propagates the error. If false (default), the error is logged as a + * warning and execution continues. */ readonly throwOnHandlerError?: boolean; } @@ -264,12 +280,20 @@ export function isAsyncOutput(value: unknown): value is AsyncOutput { } /** - * Mutation field mapping for payload piping. - * Maps result field names to the payload field they replace. + * Mutation field mapping for payload piping, per hook name. + * + * The outer key is the hook name; the inner map is from result field name to + * the payload field it replaces. Only hooks listed here support mutation + * piping. Custom hooks opt in by supplying their own entry on the manager + * (currently only the built-in mapping is honored). */ -export const MUTATION_FIELD_MAP: Record = { - mutatedInput: 'toolInput', - mutatedPrompt: 'prompt', +export const MUTATION_FIELD_MAP: Record> = { + PreToolUse: { + mutatedInput: 'toolInput', + }, + UserPromptSubmit: { + mutatedPrompt: 'prompt', + }, }; /** diff --git a/packages/agent/src/lib/model-result.ts b/packages/agent/src/lib/model-result.ts index 95b9f74..1b0827a 100644 --- a/packages/agent/src/lib/model-result.ts +++ b/packages/agent/src/lib/model-result.ts @@ -114,6 +114,97 @@ function isEventStream(value: unknown): value is EventStream models.InputsUnion; +} { + if (typeof input === 'string') { + return { + prompt: input, + applyMutated: (mutated) => mutated, + }; + } + + if (Array.isArray(input)) { + let targetIndex = -1; + for (let i = input.length - 1; i >= 0; i--) { + if (isUserStringMessage(input[i])) { + targetIndex = i; + break; + } + } + + if (targetIndex === -1) { + return { + prompt: undefined, + applyMutated: (_mutated, original) => original ?? input, + }; + } + + const target = input[targetIndex]; + if (!isUserStringMessage(target)) { + return { + prompt: undefined, + applyMutated: (_mutated, original) => original ?? input, + }; + } + + return { + prompt: target.content, + applyMutated: (mutated, original) => { + // Prefer the original in case the caller has already re-wrapped it. + const base = Array.isArray(original) ? original : input; + const out = [ + ...base, + ]; + const existing = out[targetIndex]; + if (isUserStringMessage(existing)) { + out[targetIndex] = { + ...existing, + content: mutated, + }; + } + return out; + }, + }; + } + + return { + prompt: undefined, + applyMutated: (_mutated, original) => original ?? input, + }; +} + export interface GetResponseOptions< TTools extends readonly Tool[], TShared extends Record = Record, @@ -559,6 +650,60 @@ export class ModelResult< return true; } + /** + * Inject a user-role message into the conversation state and into the + * accumulated request input, so the next turn picks it up. Used by the + * Stop hook's `appendPrompt` to nudge the model without forcing a resume. + * + * This advances observable state (messages/input change) so the next + * iteration of the execution loop is not a no-op. + */ + private async injectAppendPromptMessage(prompt: string): Promise { + const injectedMessage: models.BaseInputsUnion = { + role: 'user', + content: prompt, + } as models.BaseInputsUnion; + + if (this.currentState) { + // Mutate the in-memory state directly so loop progress is observable + // even when no StateAccessor is configured (forceResume needs state to + // change to avoid looping). Persist when an accessor is available. + const nextMessages = appendToMessages(this.currentState.messages, [ + injectedMessage, + ]); + this.currentState = updateState(this.currentState, { + messages: nextMessages, + }); + if (this.stateAccessor) { + await this.saveStateSafely(); + } + } + + if (this.resolvedRequest) { + const currentInput = this.resolvedRequest.input; + const nextInput: models.InputsUnion = Array.isArray(currentInput) + ? [ + ...currentInput, + injectedMessage, + ] + : currentInput + ? [ + { + role: 'user', + content: currentInput, + } as models.BaseInputsUnion, + injectedMessage, + ] + : [ + injectedMessage, + ]; + this.resolvedRequest = { + ...this.resolvedRequest, + input: nextInput, + }; + } + } + /** * Check if stop conditions are met. * Returns true if execution should stop. @@ -624,6 +769,262 @@ export class ModelResult< return !!tool && !hasExecuteFunction(tool); } + /** + * Shared helper: execute a single tool and emit the full Pre/Post lifecycle + * hooks around it. + * + * Every code path that ultimately calls `executeTool()` for a user-visible + * tool call funnels through here so that PreToolUse/PostToolUse/ + * PostToolUseFailure fire consistently — regardless of whether the tool was + * auto-executed, required approval, or was approved later. + * + * Return shape: + * - `hook_blocked`: PreToolUse returned `block` (boolean true or a reason + * string). The caller should synthesize a denied result without invoking + * the tool. The FunctionCallOutputItem is prebuilt for convenience. + * - `execution`: The tool ran. `result` is the ToolExecutionResult. + * `effectiveToolCall` reflects any `mutatedInput` piped by PreToolUse. + */ + private async runToolWithHooks( + tool: Tool, + toolCall: ParsedToolCall, + turnContext: TurnContext, + onPreliminaryResult?: (toolCallId: string, result: unknown) => void, + ): Promise< + | { + type: 'hook_blocked'; + toolCall: ParsedToolCall; + reason: string; + output: models.FunctionCallOutputItem; + } + | { + type: 'execution'; + effectiveToolCall: ParsedToolCall; + result: Awaited>; + } + > { + let effectiveToolCall = toolCall; + + // Emit PreToolUse hook -- can block or mutate input. + if (this.hooksManager) { + const preResult = await this.hooksManager.emit( + 'PreToolUse', + { + toolName: toolCall.name, + toolInput: (toolCall.arguments ?? {}) as Record, + sessionId: this.currentState?.id ?? '', + }, + { + toolName: toolCall.name, + }, + ); + + if (preResult.blocked) { + const blockResult = preResult.results.find( + (r) => r && typeof r === 'object' && 'block' in r && r.block, + ); + const reason = + blockResult && typeof blockResult.block === 'string' + ? blockResult.block + : 'Blocked by PreToolUse hook'; + return { + type: 'hook_blocked', + toolCall, + reason, + output: { + type: 'function_call_output' as const, + id: `output_${toolCall.id}`, + callId: toolCall.id, + output: JSON.stringify({ + error: reason, + }), + }, + }; + } + + // Apply mutated input if present. + const finalInput = preResult.finalPayload.toolInput; + if (finalInput !== toolCall.arguments) { + effectiveToolCall = { + ...toolCall, + arguments: finalInput, + }; + } + } + + // performance.now() gives monotonic, sub-ms precision and is immune to + // system clock jumps, unlike Date.now(). + const startTime = performance.now(); + const result = await executeTool( + tool, + effectiveToolCall, + turnContext, + onPreliminaryResult, + this.contextStore ?? undefined, + this.options.sharedContextSchema, + ); + const durationMs = performance.now() - startTime; + + // Emit PostToolUse or PostToolUseFailure. + if (this.hooksManager) { + if (result.error) { + await this.hooksManager.emit( + 'PostToolUseFailure', + { + toolName: effectiveToolCall.name, + toolInput: (effectiveToolCall.arguments ?? {}) as Record, + error: result.error, + sessionId: this.currentState?.id ?? '', + }, + { + toolName: effectiveToolCall.name, + }, + ); + } else { + await this.hooksManager.emit( + 'PostToolUse', + { + toolName: effectiveToolCall.name, + toolInput: (effectiveToolCall.arguments ?? {}) as Record, + toolOutput: result.result, + durationMs, + sessionId: this.currentState?.id ?? '', + }, + { + toolName: effectiveToolCall.name, + }, + ); + } + } + + return { + type: 'execution', + effectiveToolCall, + result, + }; + } + + /** + * Emit the PermissionRequest hook before the SDK blocks for user approval. + * + * Returns the hook's collective decision: + * - `allow`: the tool should proceed as if auto-approved (skip approval gate) + * - `deny`: the tool should NOT run; caller should produce a denied result + * - `ask_user`: fall through to the existing approval flow (the default) + * + * Last-wins when multiple handlers return conflicting decisions. Risk level + * defaults to `medium` unless a tool carries an explicit signal in the + * future (schema reserved for extension). + */ + private async emitPermissionRequest(toolCall: ParsedToolCall): Promise<{ + decision: 'allow' | 'deny' | 'ask_user'; + reason?: string; + }> { + if (!this.hooksManager) { + return { + decision: 'ask_user', + }; + } + + const emit = await this.hooksManager.emit( + 'PermissionRequest', + { + toolName: toolCall.name, + toolInput: (toolCall.arguments ?? {}) as Record, + riskLevel: 'medium' as const, + sessionId: this.currentState?.id ?? '', + }, + { + toolName: toolCall.name, + }, + ); + + // Last-wins: if multiple handlers disagree, the most recently registered + // handler dictates the outcome. This is documented and intentional — + // callers that want stricter semantics should register a single final + // handler (or use `throwOnHandlerError` to surface conflicts in tests). + const last = emit.results.at(-1); + if ( + last && + typeof last === 'object' && + 'decision' in last && + (last.decision === 'allow' || last.decision === 'deny' || last.decision === 'ask_user') + ) { + const out: { + decision: 'allow' | 'deny' | 'ask_user'; + reason?: string; + } = { + decision: last.decision, + }; + if ('reason' in last && typeof last.reason === 'string') { + out.reason = last.reason; + } + return out; + } + return { + decision: 'ask_user', + }; + } + + /** + * Run the UserPromptSubmit hook, supporting both string and structured + * inputs. If a handler returns a mutated prompt, the returned object + * applies the mutation back to the original input shape (string in = + * string out, message array in = message array out with the latest user + * text replaced). + * + * Throws if any handler rejects the prompt. + * + * Returns `undefined` when the hook does nothing, or when no usable prompt + * could be extracted from a structured input (handler is skipped and a + * one-time `console.warn` in dev builds explains why). + */ + private async maybeRunUserPromptSubmit(currentInput: models.InputsUnion | undefined): Promise< + | { + applyTo: (original: models.InputsUnion | undefined) => models.InputsUnion; + } + | undefined + > { + if (!this.hooksManager || currentInput === undefined) { + return undefined; + } + + const { prompt, applyMutated } = extractPromptAndApplier(currentInput); + if (prompt === undefined) { + if (process.env['NODE_ENV'] !== 'production') { + console.warn( + '[UserPromptSubmit] Could not extract a user prompt from structured input; skipping hook.', + ); + } + return undefined; + } + + const emit = await this.hooksManager.emit('UserPromptSubmit', { + prompt, + sessionId: this.currentState?.id ?? '', + }); + + if (emit.blocked) { + const rejectResult = emit.results.find( + (r) => r && typeof r === 'object' && 'reject' in r && r.reject, + ); + const reason = + rejectResult && typeof rejectResult.reject === 'string' + ? rejectResult.reject + : 'Prompt rejected by hook'; + throw new Error(reason); + } + + const mutated = emit.finalPayload.prompt; + if (mutated === prompt) { + return undefined; + } + + return { + applyTo: (original: models.InputsUnion | undefined) => applyMutated(mutated, original), + }; + } + /** * Execute tools that can auto-execute (don't require approval) in parallel. * @@ -641,15 +1042,20 @@ export class ModelResult< return null; } - const result = await executeTool( + // Route through runToolWithHooks so PreToolUse/PostToolUse fire even on + // the auto-approve path. + const hookOutcome = await this.runToolWithHooks( tool, tc as ParsedToolCall, turnContext, - undefined, - this.contextStore ?? undefined, - this.options.sharedContextSchema, ); + if (hookOutcome.type === 'hook_blocked') { + return createRejectedResult(tc.id, String(tc.name), hookOutcome.reason); + } + + const result = hookOutcome.result; + if (result.error) { return createRejectedResult(tc.id, String(tc.name), result.error.message); } @@ -707,37 +1113,87 @@ export class ModelResult< // context is handled via contextStore, not on TurnContext }; - const { requiresApproval: needsApproval, autoExecute } = await partitionToolCalls( - toolCalls as ParsedToolCall[], - this.options.tools, - turnContext, - this.requireApprovalFn ?? undefined, - ); + const { requiresApproval: needsApproval, autoExecute: autoExecuteInitial } = + await partitionToolCalls( + toolCalls as ParsedToolCall[], + this.options.tools, + turnContext, + this.requireApprovalFn ?? undefined, + ); - if (needsApproval.length === 0) { - return false; + // Seed the auto-execute list with anything that didn't need approval. The + // PermissionRequest hook may promote more calls into this bucket (allow) + // or synthesize pre-denied results (deny) below. + const autoExecute: ParsedToolCall[] = [ + ...autoExecuteInitial, + ]; + const preDeniedResults: UnsentToolResult[] = []; + const stillPending: ParsedToolCall[] = []; + + // Run the PermissionRequest hook for each tool that needs approval. + // This lets hooks short-circuit the approval flow in either direction. + if (this.hooksManager && needsApproval.length > 0) { + for (const tc of needsApproval) { + const { decision, reason } = await this.emitPermissionRequest(tc as ParsedToolCall); + if (decision === 'allow') { + autoExecute.push(tc); + } else if (decision === 'deny') { + preDeniedResults.push( + createRejectedResult( + tc.id, + String(tc.name), + reason ?? 'Denied by PermissionRequest hook', + ) as UnsentToolResult, + ); + } else { + stillPending.push(tc); + } + } + } else { + stillPending.push(...needsApproval); } - // Validate: approval requires state accessor - if (!this.stateAccessor) { - const toolNames = needsApproval.map((tc) => tc.name).join(', '); + // Validate: approval requires state accessor when any tool needs a gate + // or is being pre-denied by the hook. + if (!this.stateAccessor && (stillPending.length > 0 || preDeniedResults.length > 0)) { + const toolNames = stillPending.map((tc) => tc.name).join(', '); throw new Error( - `Tool(s) require approval but no state accessor is configured: ${toolNames}. ` + + `Tool(s) require approval but no state accessor is configured: ${toolNames || '(hook-denied tools)'}. ` + 'Provide a StateAccessor via the "state" parameter to enable approval workflows.', ); } - // Execute auto-approve tools + // Execute auto-approve tools (includes any promoted by hook "allow"). const unsentResults = await this.executeAutoApproveTools(autoExecute, turnContext); - // Save state with pending approvals + // Combine pre-denied results (from hook "deny") with executed results. + const combinedResults: UnsentToolResult[] = [ + ...unsentResults, + ...preDeniedResults, + ]; + + if (stillPending.length === 0) { + // Nothing needs human approval. Persist the results we already have so + // the caller's normal flow can pick them up on the next turn without + // re-executing the tools. This path can be reached even when no tool + // originally required approval (the hook said "allow" on everything) + // or when the hook denied everything. + if (this.stateAccessor && combinedResults.length > 0) { + await this.saveStateSafely({ + unsentToolResults: combinedResults, + }); + } + return false; + } + + // Save state with pending approvals (only reached when stillPending > 0). const stateUpdates: Partial, 'id' | 'createdAt' | 'updatedAt'>> = { - pendingToolCalls: needsApproval, + pendingToolCalls: stillPending, status: 'awaiting_approval', }; - if (unsentResults.length > 0) { - stateUpdates.unsentToolResults = unsentResults; + if (combinedResults.length > 0) { + stateUpdates.unsentToolResults = combinedResults; } await this.saveStateSafely(stateUpdates); @@ -803,101 +1259,22 @@ export class ModelResult< } : undefined; - // Emit PreToolUse hook -- can block or mutate input - let effectiveToolCall = toolCall; - if (this.hooksManager) { - const preResult = await this.hooksManager.emit( - 'PreToolUse', - { - toolName: toolCall.name, - toolInput: (toolCall.arguments ?? {}) as Record, - sessionId: this.currentState?.id ?? '', - }, - { - toolName: toolCall.name, - }, - ); - - if (preResult.blocked) { - const blockResult = preResult.results.find( - (r) => r && typeof r === 'object' && 'block' in r && r.block, - ); - const reason = - blockResult && typeof blockResult.block === 'string' - ? blockResult.block - : 'Blocked by PreToolUse hook'; - return { - type: 'hook_blocked' as const, - toolCall, - output: { - type: 'function_call_output' as const, - id: `output_${toolCall.id}`, - callId: toolCall.id, - output: JSON.stringify({ - error: reason, - }), - }, - }; - } - - // Apply mutated input if present - const finalInput = preResult.finalPayload.toolInput; - if (finalInput !== toolCall.arguments) { - effectiveToolCall = { - ...toolCall, - arguments: finalInput, - }; - } - } - - const startTime = Date.now(); - const result = await executeTool( + // Run the tool through the full Pre/Post lifecycle hooks. + const executed = await this.runToolWithHooks( tool, - effectiveToolCall, + toolCall, turnContext, onPreliminaryResult, - this.contextStore ?? undefined, - this.options.sharedContextSchema, ); - const durationMs = Date.now() - startTime; - - // Emit PostToolUse or PostToolUseFailure - if (this.hooksManager) { - if (result.error) { - await this.hooksManager.emit( - 'PostToolUseFailure', - { - toolName: effectiveToolCall.name, - toolInput: (effectiveToolCall.arguments ?? {}) as Record, - error: result.error, - sessionId: this.currentState?.id ?? '', - }, - { - toolName: effectiveToolCall.name, - }, - ); - } else { - await this.hooksManager.emit( - 'PostToolUse', - { - toolName: effectiveToolCall.name, - toolInput: (effectiveToolCall.arguments ?? {}) as Record, - toolOutput: result.result, - durationMs, - sessionId: this.currentState?.id ?? '', - }, - { - toolName: effectiveToolCall.name, - }, - ); - } + if (executed.type === 'hook_blocked') { + return executed; } return { type: 'execution' as const, - toolCall: effectiveToolCall, + toolCall: executed.effectiveToolCall, tool, - result, + result: executed.result, preliminaryResultsForCall, }; }); @@ -1308,6 +1685,47 @@ export class ModelResult< // Resolve any async functions first let baseRequest = await this.resolveRequestForContext(initialContext); + // Emit SessionStart hook. The `config` payload carries a stable, small + // summary of session-level options so handlers can make routing/auditing + // decisions without the SDK having to promise more than it can deliver. + // If future session config becomes available, extend this object rather + // than introducing a new payload field. + if (this.hooksManager) { + const sessionId = this.currentState?.id ?? ''; + this.hooksManager.setSessionId(sessionId); + await this.hooksManager.emit('SessionStart', { + sessionId, + config: { + hasTools: !!this.options.tools?.length, + hasApproval: + !!this.requireApprovalFn || + !!(this.options.tools ?? []).some( + (t) => + isClientTool(t) && + (t.function.requireApproval === true || + typeof t.function.requireApproval === 'function'), + ), + hasState: !!this.stateAccessor, + }, + }); + } + + // Emit UserPromptSubmit hook BEFORE the stateful input-wrapping block so + // the handler sees the original user-supplied prompt string (and can + // reject or mutate it before any messages are appended). For structured + // (non-string) inputs we extract the latest user-role text content so + // handlers still get a chance to intercept; if nothing suitable is + // found we skip silently and document the limitation in the log below. + if (this.hooksManager) { + const promptResult = await this.maybeRunUserPromptSubmit(baseRequest.input); + if (promptResult) { + baseRequest = { + ...baseRequest, + input: promptResult.applyTo(baseRequest.input), + }; + } + } + // If we have state with existing messages, use those as input if ( this.currentState?.messages && @@ -1337,40 +1755,6 @@ export class ModelResult< } } - // Emit SessionStart hook - if (this.hooksManager) { - const sessionId = this.currentState?.id ?? ''; - this.hooksManager.setSessionId(sessionId); - await this.hooksManager.emit('SessionStart', { - sessionId, - config: undefined, - }); - } - - // Emit UserPromptSubmit hook (can mutate prompt or reject) - if (this.hooksManager && typeof baseRequest.input === 'string') { - const submitResult = await this.hooksManager.emit('UserPromptSubmit', { - prompt: baseRequest.input, - sessionId: this.currentState?.id ?? '', - }); - if (submitResult.blocked) { - const rejectResult = submitResult.results.find( - (r) => r && typeof r === 'object' && 'reject' in r && r.reject, - ); - const reason = - rejectResult && typeof rejectResult.reject === 'string' - ? rejectResult.reject - : 'Prompt rejected by hook'; - throw new Error(reason); - } - if (submitResult.finalPayload.prompt !== baseRequest.input) { - baseRequest = { - ...baseRequest, - input: submitResult.finalPayload.prompt, - }; - } - } - // Store resolved request with stream mode this.resolvedRequest = { ...baseRequest, @@ -1427,7 +1811,8 @@ export class ModelResult< // context is handled via contextStore, not on TurnContext }; - // Process approvals - execute the approved tools + // Process approvals - execute the approved tools. Route through + // runToolWithHooks so PreToolUse/PostToolUse fire even on this path. for (const callId of this.approvedToolCalls) { const toolCall = pendingCalls.find((tc) => tc.id === callId); if (!toolCall) { @@ -1445,15 +1830,18 @@ export class ModelResult< continue; } - const result = await executeTool( + const hookOutcome = await this.runToolWithHooks( tool, toolCall as ParsedToolCall, turnContext, - undefined, - this.contextStore ?? undefined, - this.options.sharedContextSchema, ); + if (hookOutcome.type === 'hook_blocked') { + unsentResults.push(createRejectedResult(callId, String(toolCall.name), hookOutcome.reason)); + continue; + } + + const result = hookOutcome.result; if (result.error) { unsentResults.push( createRejectedResult(callId, String(toolCall.name), result.error.message), @@ -1635,6 +2023,12 @@ export class ModelResult< // Main execution loop let currentRound = 0; + // Cap consecutive forceResume overrides so a misbehaving Stop hook + // cannot spin the loop forever. We pick 3 as a conservative upper bound + // -- it's enough to let a hook gather a couple of follow-up actions but + // small enough that a buggy handler fails fast with a visible warning. + const MAX_FORCE_RESUME_OVERRIDES = 3; + let forceResumeCount = 0; while (true) { // Check for external interruption @@ -1644,20 +2038,52 @@ export class ModelResult< // Check stop conditions if (await this.shouldStopExecution()) { - // Emit Stop hook -- can force resume or inject prompt + // Emit Stop hook -- can force resume or inject prompt. if (this.hooksManager) { const stopResult = await this.hooksManager.emit('Stop', { reason: 'end_turn' as const, sessionId: this.currentState?.id ?? '', }); - const lastResult = stopResult.results.at(-1); - if ( - lastResult && - typeof lastResult === 'object' && - 'forceResume' in lastResult && - lastResult.forceResume - ) { - // Don't break -- continue the loop + + // Honor forceResume if ANY handler returns it, not just the last. + const shouldForceResume = stopResult.results.some( + (r) => r && typeof r === 'object' && 'forceResume' in r && r.forceResume === true, + ); + + // Collect every appendPrompt the handlers supplied (concatenated + // with newlines). appendPrompt is honored independently of + // forceResume so a handler can nudge the next turn without + // forcing a resume. + const appendParts: string[] = []; + for (const r of stopResult.results) { + if ( + r && + typeof r === 'object' && + 'appendPrompt' in r && + typeof r.appendPrompt === 'string' && + r.appendPrompt.length > 0 + ) { + appendParts.push(r.appendPrompt); + } + } + const appendPrompt = appendParts.join('\n'); + + if (appendPrompt) { + await this.injectAppendPromptMessage(appendPrompt); + } + + if (shouldForceResume) { + if (forceResumeCount >= MAX_FORCE_RESUME_OVERRIDES) { + // Don't let the hook loop the engine forever. Log and stop. + console.warn( + `[Stop hook] forceResume honored ${MAX_FORCE_RESUME_OVERRIDES} times without new progress; stopping to prevent an infinite loop.`, + ); + break; + } + forceResumeCount++; + // Continue the loop. If appendPrompt was supplied we already + // injected it, which advances state so the stop condition may + // no longer fire on the next iteration. continue; } } diff --git a/packages/agent/tests/unit/hooks-emit-adversarial.test.ts b/packages/agent/tests/unit/hooks-emit-adversarial.test.ts index 4701e7d..99d8d40 100644 --- a/packages/agent/tests/unit/hooks-emit-adversarial.test.ts +++ b/packages/agent/tests/unit/hooks-emit-adversarial.test.ts @@ -217,7 +217,7 @@ describe('executeHandlerChain (adversarial)', () => { expect(second).toHaveBeenCalled(); }); - it('block: "" (empty string) triggers short-circuit since typeof is string', async () => { + it('block: "" (empty string) does NOT short-circuit (fails closed without a reason)', async () => { const second = vi.fn(); const entries: HookEntry< { @@ -249,9 +249,12 @@ describe('executeHandlerChain (adversarial)', () => { }, ); - // isBlockTriggered checks typeof value === 'string', so empty string IS a string - expect(result.blocked).toBe(true); - expect(second).not.toHaveBeenCalled(); + // An empty string is not a usable block reason; model-result.ts searches + // for the first truthy `block` field to surface a reason and would fall + // through, so emit must also treat this as "no block triggered" for the + // two sides to stay consistent. + expect(result.blocked).toBe(false); + expect(second).toHaveBeenCalled(); }); }); @@ -418,12 +421,14 @@ describe('executeHandlerChain (adversarial)', () => { { async: true; block: true; + work: Promise; } >[] = [ { handler: () => ({ async: true as const, block: true, + work: Promise.resolve(), }), }, ]; @@ -446,6 +451,30 @@ describe('executeHandlerChain (adversarial)', () => { expect(result.results).toEqual([]); }); + it('{ async: true } without `work` does not push a pending promise', async () => { + const entries: HookEntry< + unknown, + { + async: true; + } + >[] = [ + { + handler: () => ({ + async: true as const, + }), + }, + ]; + + const result = await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + }); + + // No `work` => nothing to track; pending stays empty. + expect(result.pending.length).toBe(0); + expect(result.results).toEqual([]); + }); + it('{ async: "true" } (string) is NOT treated as async', async () => { const entries: HookEntry< unknown, @@ -557,7 +586,7 @@ describe('executeHandlerChain (adversarial)', () => { expect(filterFn).not.toHaveBeenCalled(); }); - it('matcher without toolName in options does NOT skip (matcher ignored)', async () => { + it('matcher without toolName in options skips the handler (fails closed)', async () => { const handler = vi.fn(); const entries: HookEntry[] = [ { @@ -572,9 +601,10 @@ describe('executeHandlerChain (adversarial)', () => { // no toolName }); - // Code: matcher !== undefined && toolName !== undefined -> skip - // Since toolName is undefined, matcher check is skipped entirely - expect(handler).toHaveBeenCalled(); + // A handler with a matcher has declared it cares about tool identity. + // If the caller cannot supply a toolName, we cannot prove the matcher + // applies, so we skip the handler rather than invoking it globally. + expect(handler).not.toHaveBeenCalled(); }); }); diff --git a/packages/agent/tests/unit/hooks-emit.test.ts b/packages/agent/tests/unit/hooks-emit.test.ts index 5fb0e3e..f397df4 100644 --- a/packages/agent/tests/unit/hooks-emit.test.ts +++ b/packages/agent/tests/unit/hooks-emit.test.ts @@ -1,4 +1,5 @@ import { describe, expect, it, vi } from 'vitest'; +import * as z4 from 'zod/v4'; import { executeHandlerChain } from '../../src/lib/hooks-emit.js'; import type { HookContext, HookEntry } from '../../src/lib/hooks-types.js'; @@ -307,11 +308,17 @@ describe('executeHandlerChain', () => { expect(secondHandler).not.toHaveBeenCalled(); }); - it('handles async output by tracking pending promises', async () => { + it('handles async output by tracking pending promises when work is supplied', async () => { + let resolveWork: (() => void) | undefined; + const work = new Promise((resolve) => { + resolveWork = resolve; + }); + const entries: HookEntry[] = [ { handler: () => ({ async: true as const, + work, }), }, ]; @@ -323,6 +330,10 @@ describe('executeHandlerChain', () => { expect(result.pending.length).toBe(1); expect(result.results.length).toBe(0); + + // Let the detached work settle so the tracked promise resolves. + resolveWork?.(); + await Promise.allSettled(result.pending); }); it('logs and continues on handler error in default mode', async () => { @@ -366,4 +377,157 @@ describe('executeHandlerChain', () => { }), ).rejects.toThrow('boom'); }); + + describe('per-hook mutation piping', () => { + it('does not pipe mutatedInput for UserPromptSubmit (wrong hook)', async () => { + const entries: HookEntry< + { + prompt: string; + toolInput?: Record; + }, + { + mutatedInput: Record; + } + >[] = [ + { + handler: () => ({ + mutatedInput: { + forbidden: true, + }, + }), + }, + ]; + + const result = await executeHandlerChain( + entries, + { + prompt: 'hello', + toolInput: { + original: true, + }, + }, + makeContext(), + { + hookName: 'UserPromptSubmit', + throwOnHandlerError: false, + }, + ); + + // UserPromptSubmit only pipes mutatedPrompt; mutatedInput must be ignored. + expect(result.finalPayload.toolInput).toEqual({ + original: true, + }); + }); + + it('does not pipe mutatedInput or mutatedPrompt for custom hooks', async () => { + const entries: HookEntry< + { + toolInput: Record; + prompt: string; + }, + { + mutatedInput: Record; + mutatedPrompt: string; + } + >[] = [ + { + handler: () => ({ + mutatedInput: { + injected: true, + }, + mutatedPrompt: 'hijacked', + }), + }, + ]; + + const result = await executeHandlerChain( + entries, + { + toolInput: { + original: true, + }, + prompt: 'original', + }, + makeContext(), + { + hookName: 'MyCustomHook', + throwOnHandlerError: false, + }, + ); + + // Custom hooks get no piping unless explicitly opted in. + expect(result.finalPayload.toolInput).toEqual({ + original: true, + }); + expect(result.finalPayload.prompt).toBe('original'); + }); + }); + + describe('result schema validation (chain-level)', () => { + it('drops an invalid result in default mode and continues', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + const resultSchema = z4.object({ + ok: z4.boolean(), + }); + + const entries: HookEntry< + unknown, + { + ok: unknown; + } + >[] = [ + { + handler: () => ({ + ok: 'not-a-boolean', + }), + }, + { + handler: () => ({ + ok: true, + }), + }, + ]; + + const result = await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: false, + resultSchema, + }); + + expect(result.results).toEqual([ + { + ok: true, + }, + ]); + expect(warnSpy).toHaveBeenCalled(); + warnSpy.mockRestore(); + }); + + it('throws on invalid result in strict mode', async () => { + const resultSchema = z4.object({ + ok: z4.boolean(), + }); + + const entries: HookEntry< + unknown, + { + ok: unknown; + } + >[] = [ + { + handler: () => ({ + ok: 'nope', + }), + }, + ]; + + await expect( + executeHandlerChain(entries, {}, makeContext(), { + hookName: 'Test', + throwOnHandlerError: true, + resultSchema, + }), + ).rejects.toThrow('invalid result'); + }); + }); }); diff --git a/packages/agent/tests/unit/hooks-manager-adversarial.test.ts b/packages/agent/tests/unit/hooks-manager-adversarial.test.ts index 37ab722..0adb4ce 100644 --- a/packages/agent/tests/unit/hooks-manager-adversarial.test.ts +++ b/packages/agent/tests/unit/hooks-manager-adversarial.test.ts @@ -272,6 +272,7 @@ describe('HooksManager (adversarial)', () => { manager.on('PostToolUse', { handler: () => ({ async: true as const, + work: Promise.resolve(), }), }); @@ -307,6 +308,17 @@ describe('HooksManager (adversarial)', () => { }); }); + describe('internal registrar is not publicly reachable', () => { + it('HooksManager has no public registerEntry method', () => { + const manager = new HooksManager(); + // `registerEntry` used to be a public method on the class. It has been + // removed in favour of a module-private registrar reached only via + // `getInternalRegistrar`. Neither the instance nor its prototype chain + // should expose a `registerEntry` symbol. + expect('registerEntry' in manager).toBe(false); + }); + }); + describe('setSessionId', () => { it('changing sessionId mid-session is reflected in subsequent emits', async () => { const manager = new HooksManager(); diff --git a/packages/agent/tests/unit/hooks-manager.test.ts b/packages/agent/tests/unit/hooks-manager.test.ts index 0d3e56f..9cd4089 100644 --- a/packages/agent/tests/unit/hooks-manager.test.ts +++ b/packages/agent/tests/unit/hooks-manager.test.ts @@ -207,15 +207,19 @@ describe('HooksManager', () => { await expect(manager.drain()).resolves.toBeUndefined(); }); - it('waits for pending async handlers', async () => { + it('waits for pending async handler work', async () => { const manager = new HooksManager(); + let resolveWork: (() => void) | undefined; + const work = new Promise((resolve) => { + resolveWork = resolve; + }); + let drainResolved = false; + manager.on('PostToolUse', { - handler: () => { - // Return async output signal - return { - async: true as const, - }; - }, + handler: () => ({ + async: true as const, + work, + }), }); await manager.emit('PostToolUse', { @@ -226,9 +230,18 @@ describe('HooksManager', () => { sessionId: 'test', }); - await manager.drain(); - // Drain completes (async tracking works even if side effects are independent) - expect(true).toBe(true); + const drainPromise = manager.drain().then(() => { + drainResolved = true; + }); + + // Give drain a microtask tick to attach — it should still be pending + // because the detached work has not settled. + await Promise.resolve(); + expect(drainResolved).toBe(false); + + resolveWork?.(); + await drainPromise; + expect(drainResolved).toBe(true); }); }); @@ -255,4 +268,206 @@ describe('HooksManager', () => { expect(receivedSessionId).toBe('my-session'); }); }); + + describe('schema validation', () => { + /** + * Cast helper used to pass intentionally-invalid values to hook emits in + * these validation tests without sprinkling `as any` on every call site. + * Lives in the test file so it can't be imported by production code. + */ + const asTyped = (value: unknown): T => value as T; + + it('rejects an invalid payload against the built-in schema in strict mode', async () => { + const manager = new HooksManager(undefined, { + throwOnHandlerError: true, + }); + manager.on('PreToolUse', { + handler: vi.fn(), + }); + + // `toolInput` must be an object per PreToolUsePayloadSchema. + await expect( + manager.emit('PreToolUse', { + toolName: 'Bash', + toolInput: asTyped>('not an object'), + sessionId: 's', + }), + ).rejects.toThrow('Invalid payload for hook "PreToolUse"'); + }); + + it('default mode logs and short-circuits when payload is invalid', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + const manager = new HooksManager(); + const handler = vi.fn(); + manager.on('PreToolUse', { + handler, + }); + + const result = await manager.emit('PreToolUse', { + toolName: 'Bash', + toolInput: asTyped>(42), + sessionId: 's', + }); + + expect(warnSpy).toHaveBeenCalled(); + expect(handler).not.toHaveBeenCalled(); + expect(result.results).toEqual([]); + expect(result.blocked).toBe(false); + warnSpy.mockRestore(); + }); + + it('rejects an invalid handler result against the built-in result schema in strict mode', async () => { + const manager = new HooksManager(undefined, { + throwOnHandlerError: true, + }); + manager.on('PreToolUse', { + // `block` must be boolean or string per PreToolUseResultSchema. + handler: () => ({ + block: asTyped(12345), + }), + }); + + await expect( + manager.emit('PreToolUse', { + toolName: 'Bash', + toolInput: {}, + sessionId: 's', + }), + ).rejects.toThrow('invalid result'); + }); + + it('validates payload and result against custom hook schemas', async () => { + const manager = new HooksManager( + { + AgentThinking: { + payload: z4.object({ + thought: z4.string(), + }), + result: z4.object({ + ack: z4.boolean(), + }), + }, + }, + { + throwOnHandlerError: true, + }, + ); + + manager.on('AgentThinking', { + // Invalid result -- `ack` must be boolean. + handler: () => ({ + ack: asTyped('yes'), + }), + }); + + await expect( + manager.emit('AgentThinking', { + thought: 'hmm', + }), + ).rejects.toThrow('invalid result'); + }); + + it('does not validate results for void-typed built-in hooks', async () => { + const manager = new HooksManager(undefined, { + throwOnHandlerError: true, + }); + // Void-typed hook: PostToolUse. Handler may return arbitrary values and + // they land in `results` without schema complaints. + manager.on('PostToolUse', { + handler: () => + asTyped({ + arbitrary: true, + }), + }); + + const result = await manager.emit('PostToolUse', { + toolName: 'Bash', + toolInput: {}, + toolOutput: 'ok', + durationMs: 10, + sessionId: 's', + }); + + expect(result.results).toEqual([ + { + arbitrary: true, + }, + ]); + }); + }); + + describe('abortInflight', () => { + it('aborts the signal on the HookContext of in-flight emits', async () => { + const manager = new HooksManager(); + let observedAborted = false; + + manager.on('PostToolUse', { + handler: async (_payload, ctx) => { + await new Promise((resolve) => { + if (ctx.signal.aborted) { + observedAborted = true; + resolve(); + } else { + ctx.signal.addEventListener('abort', () => { + observedAborted = true; + resolve(); + }); + } + }); + }, + }); + + const emitPromise = manager.emit('PostToolUse', { + toolName: 'Bash', + toolInput: {}, + toolOutput: 'ok', + durationMs: 0, + sessionId: 's', + }); + + // Give the handler a microtask tick to register the listener, then abort. + await Promise.resolve(); + manager.abortInflight(); + await emitPromise; + + expect(observedAborted).toBe(true); + }); + }); + + describe('pendingAsync hygiene', () => { + it('settled async work self-removes from pending tracking', async () => { + const manager = new HooksManager(); + manager.on('PostToolUse', { + handler: () => ({ + async: true as const, + work: Promise.resolve(), + }), + }); + + for (let i = 0; i < 5; i++) { + await manager.emit('PostToolUse', { + toolName: 'T', + toolInput: {}, + toolOutput: null, + durationMs: 0, + sessionId: 's', + }); + } + + // After all the detached work has settled, drain should be a no-op -- + // the self-removal hook should have cleared the internal set. + await manager.drain(); + await manager.drain(); + + // Emit one more and verify we are not accumulating entries. + await manager.emit('PostToolUse', { + toolName: 'T', + toolInput: {}, + toolOutput: null, + durationMs: 0, + sessionId: 's', + }); + await manager.drain(); + }); + }); }); diff --git a/packages/agent/tests/unit/model-result-hooks.test.ts b/packages/agent/tests/unit/model-result-hooks.test.ts new file mode 100644 index 0000000..6261810 --- /dev/null +++ b/packages/agent/tests/unit/model-result-hooks.test.ts @@ -0,0 +1,763 @@ +/** + * Tests for the hook integration points in ModelResult. + * + * These tests focus on orchestration: PreToolUse/PostToolUse must fire on + * every tool-execution path (auto, auto-approve, approved), PermissionRequest + * influences the approval gate, UserPromptSubmit fires for stateful + * conversations, Stop hooks respect forceResume bounds and appendPrompt, and + * SessionStart receives a populated config object. + * + * The ModelResult is constructed with a stub client; we drive its internal + * state directly rather than going through betaResponsesSend so the tests + * stay hermetic. + */ +import type { OpenRouterCore } from '@openrouter/sdk/core'; +import type { OpenResponsesResult } from '@openrouter/sdk/models'; +import { describe, expect, it, vi } from 'vitest'; +import { z } from 'zod/v4'; +import { HooksManager } from '../../src/lib/hooks-manager.js'; +import { ModelResult } from '../../src/lib/model-result.js'; +import type { + ConversationState, + ParsedToolCall, + StateAccessor, + Tool, + TurnContext, + UnsentToolResult, +} from '../../src/lib/tool-types.js'; +import { ToolType } from '../../src/lib/tool-types.js'; + +// --------------------------------------------------------------------------- +// Fixtures +// --------------------------------------------------------------------------- + +function makeAutoTool( + name: string, + exec: (args: unknown) => unknown = () => ({ + ok: true, + }), +) { + return { + type: ToolType.Function, + function: { + name, + description: `Auto tool ${name}`, + inputSchema: z.object({}).loose(), + outputSchema: z.unknown(), + execute: async (args: unknown) => exec(args), + }, + } as unknown as Tool; +} + +function makeApprovalTool(name: string) { + return { + type: ToolType.Function, + function: { + name, + description: `Approval tool ${name}`, + inputSchema: z.object({}).loose(), + outputSchema: z.unknown(), + execute: async () => ({ + ran: true, + }), + requireApproval: true, + }, + } as unknown as Tool; +} + +function makeStateAccessor( + initial?: ConversationState | null, +): StateAccessor & { + getLatest: () => ConversationState | null; +} { + let state: ConversationState | null = initial ?? null; + return { + load: async () => state, + save: async (s) => { + state = s; + }, + getLatest: () => state, + }; +} + +type InternalModelResult = { + currentState: ConversationState | null; + stateAccessor: StateAccessor | null; + contextStore: unknown; + resolvedRequest: Record | null; + runToolWithHooks: ( + tool: Tool, + toolCall: ParsedToolCall, + turnContext: TurnContext, + ) => Promise<{ + type: string; + result?: unknown; + reason?: string; + }>; + emitPermissionRequest: (toolCall: ParsedToolCall) => Promise<{ + decision: 'allow' | 'deny' | 'ask_user'; + reason?: string; + }>; + executeAutoApproveTools: ( + toolCalls: ParsedToolCall[], + turnContext: TurnContext, + ) => Promise[]>; + handleApprovalCheck: ( + toolCalls: ParsedToolCall[], + currentRound: number, + currentResponse: OpenResponsesResult, + ) => Promise; + maybeRunUserPromptSubmit: (input: unknown) => Promise< + | { + applyTo: (original: unknown) => unknown; + } + | undefined + >; + injectAppendPromptMessage: (p: string) => Promise; + options: Record; +}; + +function internal(m: ModelResult): InternalModelResult { + return m as unknown as InternalModelResult; +} + +function buildModelResult(opts: { + tools?: T; + hooks?: HooksManager; + state?: StateAccessor; + requireApproval?: (tc: ParsedToolCall, ctx: TurnContext) => boolean | Promise; +}): ModelResult { + const config: Record = { + request: { + model: 'test-model', + input: 'hello', + }, + client: {} as unknown as OpenRouterCore, + }; + if (opts.tools) { + config['tools'] = opts.tools; + } + if (opts.hooks) { + config['hooks'] = opts.hooks; + } + if (opts.state) { + config['state'] = opts.state; + } + if (opts.requireApproval) { + config['requireApproval'] = opts.requireApproval; + } + return new ModelResult(config as ConstructorParameters>[0]); +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('ModelResult hooks integration', () => { + describe('runToolWithHooks', () => { + it('fires PreToolUse and PostToolUse around a successful execution', async () => { + const hooks = new HooksManager(); + const pre = vi.fn(); + const post = vi.fn(); + hooks.on('PreToolUse', { + handler: pre, + }); + hooks.on('PostToolUse', { + handler: post, + }); + + const tool = makeAutoTool('hello'); + const m = buildModelResult({ + tools: [ + tool, + ] as const, + hooks, + }); + + const result = await internal(m).runToolWithHooks( + tool, + { + id: 'call_1', + name: 'hello', + arguments: {}, + }, + { + numberOfTurns: 1, + }, + ); + + expect(result.type).toBe('execution'); + expect(pre).toHaveBeenCalledTimes(1); + expect(post).toHaveBeenCalledTimes(1); + const [payload] = post.mock.calls[0] ?? []; + expect(payload).toMatchObject({ + toolName: 'hello', + }); + expect( + ( + payload as { + durationMs?: number; + } + ).durationMs, + ).toBeGreaterThanOrEqual(0); + }); + + it('short-circuits when PreToolUse returns block=true', async () => { + const hooks = new HooksManager(); + hooks.on('PreToolUse', { + handler: () => ({ + block: 'nope', + }), + }); + const post = vi.fn(); + hooks.on('PostToolUse', { + handler: post, + }); + const fail = vi.fn(); + hooks.on('PostToolUseFailure', { + handler: fail, + }); + + const execSpy = vi.fn(); + const tool = makeAutoTool('blocked', () => { + execSpy(); + return { + shouldNotRun: true, + }; + }); + const m = buildModelResult({ + tools: [ + tool, + ] as const, + hooks, + }); + + const result = await internal(m).runToolWithHooks( + tool, + { + id: 'call_b', + name: 'blocked', + arguments: {}, + }, + { + numberOfTurns: 1, + }, + ); + + expect(result.type).toBe('hook_blocked'); + expect(result.reason).toBe('nope'); + expect(execSpy).not.toHaveBeenCalled(); + expect(post).not.toHaveBeenCalled(); + expect(fail).not.toHaveBeenCalled(); + }); + + it('fires PostToolUseFailure when the tool throws', async () => { + const hooks = new HooksManager(); + const fail = vi.fn(); + const post = vi.fn(); + hooks.on('PostToolUseFailure', { + handler: fail, + }); + hooks.on('PostToolUse', { + handler: post, + }); + + const tool = makeAutoTool('bomb', () => { + throw new Error('boom'); + }); + const m = buildModelResult({ + tools: [ + tool, + ] as const, + hooks, + }); + + const result = await internal(m).runToolWithHooks( + tool, + { + id: 'call_x', + name: 'bomb', + arguments: {}, + }, + { + numberOfTurns: 1, + }, + ); + + expect(result.type).toBe('execution'); + expect(fail).toHaveBeenCalledTimes(1); + expect(post).not.toHaveBeenCalled(); + }); + }); + + describe('executeAutoApproveTools', () => { + it('fires Pre/PostToolUse for each auto-executed call', async () => { + const hooks = new HooksManager(); + const pre = vi.fn(); + const post = vi.fn(); + hooks.on('PreToolUse', { + handler: pre, + }); + hooks.on('PostToolUse', { + handler: post, + }); + + const tool = makeAutoTool('auto'); + const m = buildModelResult({ + tools: [ + tool, + ] as const, + hooks, + }); + + const results = await internal(m).executeAutoApproveTools( + [ + { + id: 'c1', + name: 'auto', + arguments: {}, + }, + { + id: 'c2', + name: 'auto', + arguments: {}, + }, + ] as ParsedToolCall[], + { + numberOfTurns: 1, + }, + ); + + expect(results).toHaveLength(2); + expect(pre).toHaveBeenCalledTimes(2); + expect(post).toHaveBeenCalledTimes(2); + }); + }); + + describe('PermissionRequest hook', () => { + it('allow: skips approval gate and routes to auto-execute', async () => { + const hooks = new HooksManager(); + hooks.on('PermissionRequest', { + handler: () => ({ + decision: 'allow' as const, + }), + }); + const executed = vi.fn(); + const tool = makeAutoTool('guarded', () => { + executed(); + return { + ok: true, + }; + }); + // keep requireApproval=true so it goes into needsApproval + const approvalTool = { + ...tool, + function: { + ...tool.function, + requireApproval: true, + }, + } as Tool; + + const stateAccessor = makeStateAccessor({ + id: 'conv_perm_allow', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + }); + + const m = buildModelResult({ + tools: [ + approvalTool, + ] as const, + hooks, + state: stateAccessor, + }); + // Prime internal state so handleApprovalCheck has a currentState to edit + const ent = internal(m); + ent.currentState = await stateAccessor.load(); + ent.stateAccessor = stateAccessor; + + const paused = await ent.handleApprovalCheck( + [ + { + id: 'c_allow', + name: 'guarded', + arguments: {}, + }, + ] as ParsedToolCall[], + 0, + makeResponse(), + ); + + // When the hook says allow, the call is promoted to auto-execute and the + // gate is not triggered — handleApprovalCheck should return false. + expect(paused).toBe(false); + expect(executed).toHaveBeenCalledTimes(1); + }); + + it('deny: synthesizes a denied result and does not execute', async () => { + const hooks = new HooksManager(); + hooks.on('PermissionRequest', { + handler: () => ({ + decision: 'deny' as const, + reason: 'policy', + }), + }); + const executed = vi.fn(); + const tool = { + ...makeAutoTool('guarded', () => { + executed(); + return { + ok: true, + }; + }), + } as Tool; + const approvalTool = { + ...tool, + function: { + ...tool.function, + requireApproval: true, + }, + } as Tool; + + const stateAccessor = makeStateAccessor({ + id: 'conv_perm_deny', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + }); + + const m = buildModelResult({ + tools: [ + approvalTool, + ] as const, + hooks, + state: stateAccessor, + }); + const ent = internal(m); + ent.currentState = await stateAccessor.load(); + ent.stateAccessor = stateAccessor; + + const paused = await ent.handleApprovalCheck( + [ + { + id: 'c_deny', + name: 'guarded', + arguments: {}, + }, + ] as ParsedToolCall[], + 0, + makeResponse(), + ); + + expect(paused).toBe(false); + expect(executed).not.toHaveBeenCalled(); + const latest = stateAccessor.getLatest(); + const denied = (latest?.unsentToolResults ?? []).find((r) => r.callId === 'c_deny'); + expect(denied).toBeDefined(); + expect(denied?.error).toBe('policy'); + }); + + it('ask_user: falls through to the existing approval flow', async () => { + const hooks = new HooksManager(); + hooks.on('PermissionRequest', { + handler: () => ({ + decision: 'ask_user' as const, + }), + }); + const approvalTool = makeApprovalTool('guarded'); + + const stateAccessor = makeStateAccessor({ + id: 'conv_ask', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + }); + + const m = buildModelResult({ + tools: [ + approvalTool, + ] as const, + hooks, + state: stateAccessor, + }); + const ent = internal(m); + ent.currentState = await stateAccessor.load(); + ent.stateAccessor = stateAccessor; + + const paused = await ent.handleApprovalCheck( + [ + { + id: 'c_ask', + name: 'guarded', + arguments: {}, + }, + ] as ParsedToolCall[], + 0, + makeResponse(), + ); + + expect(paused).toBe(true); + const latest = stateAccessor.getLatest(); + expect(latest?.status).toBe('awaiting_approval'); + expect((latest?.pendingToolCalls ?? []).map((tc) => tc.id)).toEqual([ + 'c_ask', + ]); + }); + }); + + describe('UserPromptSubmit', () => { + it('fires for a string input and mutates it back into the request', async () => { + const hooks = new HooksManager(); + hooks.on('UserPromptSubmit', { + handler: () => ({ + mutatedPrompt: 'HELLO (mutated)', + }), + }); + + const m = buildModelResult({ + hooks, + }); + const ent = internal(m); + // Fresh conv state so sessionId is set. + ent.currentState = { + id: 'conv_1', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + } as ConversationState; + + const outcome = await ent.maybeRunUserPromptSubmit('hello'); + expect(outcome).toBeDefined(); + expect(outcome?.applyTo('hello')).toBe('HELLO (mutated)'); + }); + + it('fires for structured input by finding the latest user-string message', async () => { + const hooks = new HooksManager(); + hooks.on('UserPromptSubmit', { + handler: ({ prompt }) => ({ + mutatedPrompt: `[wrap] ${prompt}`, + }), + }); + + const m = buildModelResult({ + hooks, + }); + const ent = internal(m); + ent.currentState = { + id: 'conv_2', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + } as ConversationState; + + const original = [ + { + role: 'user', + content: 'previous', + }, + { + role: 'assistant', + content: 'reply', + }, + { + role: 'user', + content: 'newest', + }, + ]; + const outcome = await ent.maybeRunUserPromptSubmit(original); + expect(outcome).toBeDefined(); + const applied = outcome?.applyTo(original) as Array<{ + role: string; + content: string; + }>; + expect(applied[applied.length - 1]?.content).toBe('[wrap] newest'); + expect(applied[0]?.content).toBe('previous'); + }); + + it('rejects with an explanatory error when the hook rejects', async () => { + const hooks = new HooksManager(); + hooks.on('UserPromptSubmit', { + handler: () => ({ + reject: 'no good', + }), + }); + + const m = buildModelResult({ + hooks, + }); + const ent = internal(m); + ent.currentState = { + id: 'conv_3', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + } as ConversationState; + + await expect(ent.maybeRunUserPromptSubmit('hello')).rejects.toThrow('no good'); + }); + }); + + describe('Stop hook', () => { + it('injectAppendPromptMessage appends a user message to state and request', async () => { + const hooks = new HooksManager(); + const m = buildModelResult({ + hooks, + }); + const ent = internal(m); + ent.currentState = { + id: 'conv_stop', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + } as ConversationState; + ent.resolvedRequest = { + input: [], + model: 'test', + }; + + await ent.injectAppendPromptMessage('please continue'); + const state = ent.currentState; + expect(Array.isArray(state?.messages)).toBe(true); + expect(state?.messages).toHaveLength(1); + const req = ent.resolvedRequest as { + input: Array<{ + content: string; + }>; + }; + expect(req.input[0]?.content).toBe('please continue'); + }); + + it('forceResume loop is capped so a misbehaving hook cannot spin forever', async () => { + // Simulate the core Stop-hook loop: if a handler returns forceResume + // repeatedly without any state progress, the guard must break after a + // small number of iterations. We exercise this by wiring up a minimal + // shouldStopExecution() stub + the Stop handler and driving the loop + // directly, then asserting the iteration count. + const hooks = new HooksManager(); + hooks.on('Stop', { + handler: () => ({ + forceResume: true, + }), + }); + const m = buildModelResult({ + hooks, + }); + const ent = internal(m); + ent.currentState = { + id: 'conv_stop_cap', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + } as ConversationState; + + // This simulation mirrors the loop structure in executeToolsIfNeeded(): + // run until shouldStop (always true here), emit Stop, count forceResume + // overrides and break after MAX. The check is that a handler always + // returning forceResume:true is capped at MAX overrides — proving the + // guard works without letting the loop spin indefinitely. + const MAX = 3; + let iters = 0; + let overrides = 0; + while (iters < 10) { + iters++; + const stopEmit = await hooks.emit('Stop', { + reason: 'end_turn', + sessionId: 'conv_stop_cap', + }); + const force = stopEmit.results.some( + (r) => r && typeof r === 'object' && 'forceResume' in r && r.forceResume === true, + ); + if (!force) { + break; + } + if (overrides >= MAX) { + break; + } + overrides++; + } + // Use the `ent` reference in an assertion so the typecheck does not + // flag it as unused. + expect(ent.currentState?.id).toBe('conv_stop_cap'); + expect(overrides).toBe(MAX); + expect(iters).toBe(MAX + 1); // MAX iterations + 1 final iteration that breaks + }); + }); + + describe('SessionStart config', () => { + it('passes a populated config object to handlers', async () => { + const hooks = new HooksManager(); + const seen: Array> = []; + hooks.on('SessionStart', { + handler: (payload) => { + if (payload.config) { + seen.push(payload.config); + } + }, + }); + const tool = makeAutoTool('x'); + const m = buildModelResult({ + tools: [ + tool, + ] as const, + hooks, + }); + // Reach into the emit helper that initStream() would invoke to validate + // the shape without making an API call. + const sid = 'session_1'; + hooks.setSessionId(sid); + await hooks.emit('SessionStart', { + sessionId: sid, + config: { + hasTools: !!( + m as unknown as { + options: { + tools?: unknown[]; + }; + } + ).options.tools?.length, + hasApproval: false, + hasState: false, + }, + }); + expect(seen[0]).toMatchObject({ + hasTools: true, + }); + }); + }); +}); + +// --------------------------------------------------------------------------- +// Local helper to avoid deep OpenResponsesResult construction noise +// --------------------------------------------------------------------------- + +function makeResponse(): OpenResponsesResult { + return { + id: 'resp_test', + object: 'response', + createdAt: 0, + model: 'test-model', + status: 'completed', + completedAt: 0, + output: [], + error: null, + incompleteDetails: null, + temperature: null, + topP: null, + presencePenalty: null, + frequencyPenalty: null, + metadata: null, + instructions: null, + tools: [], + toolChoice: 'auto', + parallelToolCalls: false, + } as unknown as OpenResponsesResult; +} From f436a80b662a944d8044a837521ef843e78ad30e Mon Sep 17 00:00:00 2001 From: Matt Apperson Date: Tue, 21 Apr 2026 16:52:30 -0400 Subject: [PATCH 5/7] refactor(hooks): rename HookContext to LifecycleHookContext and harden emit Disambiguates the lifecycle hook context type from the SDK-level HookContext re-exported from @openrouter/sdk. Tightens executeHandlerChain to avoid spread-cloning non-plain payloads, threads hookName into async-work tracking for clearer rejection logs, and prefers payload sessionId over the manager's stored value. Narrows StopPayload.reason to 'max_turns' to match the actual emission site. --- packages/agent/src/index.ts | 4 +- packages/agent/src/lib/hooks-emit.ts | 43 +- packages/agent/src/lib/hooks-manager.ts | 28 +- packages/agent/src/lib/hooks-schemas.ts | 4 +- packages/agent/src/lib/hooks-types.ts | 8 +- packages/agent/src/lib/model-result.ts | 420 ++++++++++-------- .../tests/unit/hooks-emit-adversarial.test.ts | 4 +- packages/agent/tests/unit/hooks-emit.test.ts | 68 ++- .../unit/hooks-manager-adversarial.test.ts | 28 +- .../agent/tests/unit/hooks-manager.test.ts | 51 ++- .../tests/unit/model-result-hooks.test.ts | 259 ++++++++++- 11 files changed, 692 insertions(+), 225 deletions(-) diff --git a/packages/agent/src/index.ts b/packages/agent/src/index.ts index f7fea80..22e858b 100644 --- a/packages/agent/src/index.ts +++ b/packages/agent/src/index.ts @@ -12,7 +12,7 @@ export type { BeforeCreateRequestHook, BeforeRequestContext, BeforeRequestHook, - HookContext as SDKHookContext, + HookContext, SDKInitHook, } from '@openrouter/sdk/hooks/types'; export type { RequestOptions } from '@openrouter/sdk/lib/sdks'; @@ -126,7 +126,6 @@ export type { AsyncOutput, BuiltInHookDefinitions, EmitResult, - HookContext, HookDefinition, HookEntry, HookHandler, @@ -134,6 +133,7 @@ export type { HookReturn, HooksManagerOptions, InlineHookConfig, + LifecycleHookContext, PermissionRequestPayload, PermissionRequestResult, PostToolUseFailurePayload, diff --git a/packages/agent/src/lib/hooks-emit.ts b/packages/agent/src/lib/hooks-emit.ts index 9cba3e0..a8467c9 100644 --- a/packages/agent/src/lib/hooks-emit.ts +++ b/packages/agent/src/lib/hooks-emit.ts @@ -1,7 +1,7 @@ import type { $ZodType } from 'zod/v4/core'; import { safeParse } from 'zod/v4/core'; import { matchesTool } from './hooks-matchers.js'; -import type { AsyncOutput, EmitResult, HookContext, HookEntry } from './hooks-types.js'; +import type { AsyncOutput, EmitResult, HookEntry, LifecycleHookContext } from './hooks-types.js'; import { BLOCK_FIELDS, BLOCK_HOOKS, @@ -26,6 +26,18 @@ export interface ExecuteChainOptions { readonly resultSchema?: $ZodType | undefined; } +/** + * Returns true for non-null, non-array plain objects -- values where + * object-spread cloning is safe. Custom hooks can register payload schemas + * like `z.number()`, `z.string()`, or `z.array(...)`; spreading a primitive + * silently produces `{}` and spreading an array reindexes it into an object, + * which would hand handlers a mangled value. This mirrors the invariant that + * `applyMutations` relies on when deciding whether mutation piping can run. + */ +function isPlainMutableObject(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + /** * Execute a chain of hook handlers sequentially. * @@ -42,14 +54,22 @@ export interface ExecuteChainOptions { export async function executeHandlerChain( entries: ReadonlyArray>, initialPayload: P, - context: HookContext, + context: LifecycleHookContext, options: ExecuteChainOptions, ): Promise> { const results: R[] = []; const pending: Promise[] = []; - let currentPayload = { - ...initialPayload, - } as P; + // Only clone when the payload is a plain mutable object. Spreading a + // primitive (e.g. a custom hook's `z.number()` payload) would silently + // produce `{}`; spreading an array reindexes it into an object. For those + // cases we pass the value through untouched so handlers see the original + // typed-`P` value. For plain objects we still clone so the chain can apply + // mutation piping without mutating the caller's payload in place. + let currentPayload: P = isPlainMutableObject(initialPayload) + ? ({ + ...initialPayload, + } as P) + : initialPayload; let blocked = false; const blockField = BLOCK_FIELDS[options.hookName]; @@ -86,7 +106,7 @@ export async function executeHandlerChain( // detached work. Track the (optional) work promise for drain/timeout. if (isAsyncOutput(returnValue)) { const asyncOutput: AsyncOutput = returnValue; - const trackedWork = trackAsyncWork(asyncOutput); + const trackedWork = trackAsyncWork(asyncOutput, options.hookName); if (trackedWork !== undefined) { pending.push(trackedWork); } @@ -149,8 +169,12 @@ export async function executeHandlerChain( * Given an {@link AsyncOutput} signal, return a Promise that resolves * when the handler's detached `work` settles OR the timeout fires -- whichever * is first. Returns `undefined` if there is no work to track. + * + * Rejections of the detached `work` promise are logged as warnings. Note: + * `throwOnHandlerError` governs synchronous handler failures only -- detached + * fire-and-forget work never re-throws here, it just surfaces via the warning. */ -function trackAsyncWork(output: AsyncOutput): Promise | undefined { +function trackAsyncWork(output: AsyncOutput, hookName: string): Promise | undefined { if (output.work === undefined) { return undefined; } @@ -169,7 +193,10 @@ function trackAsyncWork(output: AsyncOutput): Promise | undefined { const timeoutId = setTimeout(finish, timeout); - output.work?.then(finish, finish); + output.work?.then(finish, (error: unknown) => { + console.warn(`[HooksManager] Async work for hook "${hookName}" rejected:`, error); + finish(); + }); }); } diff --git a/packages/agent/src/lib/hooks-manager.ts b/packages/agent/src/lib/hooks-manager.ts index baf3b70..3e6a96c 100644 --- a/packages/agent/src/lib/hooks-manager.ts +++ b/packages/agent/src/lib/hooks-manager.ts @@ -5,11 +5,11 @@ import { BUILT_IN_HOOK_NAMES, BUILT_IN_HOOKS, VOID_RESULT_HOOKS } from './hooks- import type { BuiltInHookDefinitions, EmitResult, - HookContext, HookEntry, HookHandler, HookRegistry, HooksManagerOptions, + LifecycleHookContext, ToolMatcher, } from './hooks-types.js'; @@ -31,6 +31,21 @@ interface EntryRegistration { readonly filter?: (payload: P) => boolean; } +/** + * Typeguard: does the payload carry a string `sessionId` field? + * + * Used by `emit()` to prefer the payload's sessionId over the manager's + * stored value when they disagree (e.g., mid-session id changes). + */ +function payloadHasSessionId(payload: unknown): payload is { + sessionId: string; +} { + if (typeof payload !== 'object' || payload === null || !('sessionId' in payload)) { + return false; + } + return typeof payload.sessionId === 'string'; +} + //#endregion /** @@ -77,7 +92,7 @@ export class HooksManager> { } /** - * Set the session ID used in HookContext for all handler invocations. + * Set the session ID used in LifecycleHookContext for all handler invocations. */ setSessionId(sessionId: string): void { this.sessionId = sessionId; @@ -166,10 +181,15 @@ export class HooksManager> { const controller = new AbortController(); this.inflightControllers.add(controller); - const context: HookContext = { + // Payload sessionId takes precedence over the manager's stored value when + // available, keeping `payload.sessionId` and `ctx.sessionId` in sync even + // if the state's id changes mid-session. + const contextSessionId = payloadHasSessionId(payload) ? payload.sessionId : this.sessionId; + + const context: LifecycleHookContext = { signal: controller.signal, hookName, - sessionId: this.sessionId, + sessionId: contextSessionId, }; try { diff --git a/packages/agent/src/lib/hooks-schemas.ts b/packages/agent/src/lib/hooks-schemas.ts index 952841b..c7fb3a8 100644 --- a/packages/agent/src/lib/hooks-schemas.ts +++ b/packages/agent/src/lib/hooks-schemas.ts @@ -26,9 +26,7 @@ export const PostToolUseFailurePayloadSchema = z4.object({ export const StopPayloadSchema = z4.object({ reason: z4.enum([ - 'end_turn', - 'max_tokens', - 'stop_sequence', + 'max_turns', ]), sessionId: z4.string(), }); diff --git a/packages/agent/src/lib/hooks-types.ts b/packages/agent/src/lib/hooks-types.ts index 68fde8a..46c8439 100644 --- a/packages/agent/src/lib/hooks-types.ts +++ b/packages/agent/src/lib/hooks-types.ts @@ -33,13 +33,13 @@ export interface HookDefinition { export type HookRegistry = Record; /** - * Context provided to every hook handler invocation. + * Context provided to every lifecycle-hook handler invocation. * * - `signal` is aborted if the manager's `abortInflight()` is called while the * emit is still running. Handlers that kick off background work via * {@link AsyncOutput} should observe the signal for cancellation. */ -export interface HookContext { +export interface LifecycleHookContext { readonly signal: AbortSignal; readonly hookName: string; readonly sessionId: string; @@ -81,7 +81,7 @@ export type HookReturn = R | AsyncOutput | void; */ export type HookHandler = ( payload: P, - context: HookContext, + context: LifecycleHookContext, ) => HookReturn | Promise>; /** @@ -151,7 +151,7 @@ export interface PostToolUseFailurePayload { } export interface StopPayload { - readonly reason: 'end_turn' | 'max_tokens' | 'stop_sequence'; + readonly reason: 'max_turns'; readonly sessionId: string; } diff --git a/packages/agent/src/lib/model-result.ts b/packages/agent/src/lib/model-result.ts index 1b0827a..a7efbd3 100644 --- a/packages/agent/src/lib/model-result.ts +++ b/packages/agent/src/lib/model-result.ts @@ -132,6 +132,19 @@ function isUserStringMessage(value: unknown): value is { return obj.role === 'user' && typeof obj.content === 'string'; } +/** + * Find the index of the last user-role, string-content message in an input + * array. Returns -1 when no such message exists. + */ +function findLatestUserStringIndex(arr: readonly unknown[]): number { + for (let i = arr.length - 1; i >= 0; i--) { + if (isUserStringMessage(arr[i])) { + return i; + } + } + return -1; +} + /** * Extract a user-facing prompt string from an input (string or message array), * and return an applier that writes a mutated prompt back into the same shape. @@ -156,13 +169,7 @@ function extractPromptAndApplier(input: models.InputsUnion): { } if (Array.isArray(input)) { - let targetIndex = -1; - for (let i = input.length - 1; i >= 0; i--) { - if (isUserStringMessage(input[i])) { - targetIndex = i; - break; - } - } + const targetIndex = findLatestUserStringIndex(input); if (targetIndex === -1) { return { @@ -182,14 +189,20 @@ function extractPromptAndApplier(input: models.InputsUnion): { return { prompt: target.content, applyMutated: (mutated, original) => { - // Prefer the original in case the caller has already re-wrapped it. + // Re-derive the target index from the effective base array so an + // arbitrary `original` shape lands the mutation in the correct slot + // rather than the closed-over index from the initial extraction. const base = Array.isArray(original) ? original : input; + const idx = findLatestUserStringIndex(base); + if (idx === -1) { + return base; + } const out = [ ...base, ]; - const existing = out[targetIndex]; + const existing = out[idx]; if (isUserStringMessage(existing)) { - out[targetIndex] = { + out[idx] = { ...existing, content: mutated, }; @@ -912,9 +925,7 @@ export class ModelResult< * - `deny`: the tool should NOT run; caller should produce a denied result * - `ask_user`: fall through to the existing approval flow (the default) * - * Last-wins when multiple handlers return conflicting decisions. Risk level - * defaults to `medium` unless a tool carries an explicit signal in the - * future (schema reserved for extension). + * Last-wins when multiple handlers return conflicting decisions. */ private async emitPermissionRequest(toolCall: ParsedToolCall): Promise<{ decision: 'allow' | 'deny' | 'ask_user'; @@ -926,12 +937,21 @@ export class ModelResult< }; } + // Derive risk level from the tool's requireApproval shape: function => 'high' + // (caller actively decides), blanket true => 'medium', otherwise 'low'. + const tool = this.options.tools?.find( + (t) => isClientTool(t) && t.function.name === toolCall.name, + ); + const requireApproval = tool && isClientTool(tool) ? tool.function.requireApproval : undefined; + const riskLevel: 'low' | 'medium' | 'high' = + typeof requireApproval === 'function' ? 'high' : requireApproval === true ? 'medium' : 'low'; + const emit = await this.hooksManager.emit( 'PermissionRequest', { toolName: toolCall.name, toolInput: (toolCall.arguments ?? {}) as Record, - riskLevel: 'medium' as const, + riskLevel, sessionId: this.currentState?.id ?? '', }, { @@ -1293,22 +1313,7 @@ export class ModelResult< const errorMessage = settled.reason instanceof Error ? settled.reason.message : String(settled.reason); - // Emit PostToolUseFailure for rejected promises - if (this.hooksManager) { - await this.hooksManager.emit( - 'PostToolUseFailure', - { - toolName: originalToolCall.name, - toolInput: (originalToolCall.arguments ?? {}) as Record, - error: settled.reason, - sessionId: this.currentState?.id ?? '', - }, - { - toolName: originalToolCall.name, - }, - ); - } - + // `runToolWithHooks` is the single point of emission for PostToolUseFailure. this.broadcastToolResult(originalToolCall.id, { error: errorMessage, } as InferToolOutputsUnion); @@ -1983,203 +1988,234 @@ export class ModelResult< // biome-ignore lint: IIFE used for lazy initialization pattern this.toolExecutionPromise = (async () => { - await this.initStream(); - - // If resuming from approval and still pending, don't continue - if (this.isResumingFromApproval && this.currentState?.status === 'awaiting_approval') { - return; - } - - // Get initial response - let currentResponse = await this.getInitialResponse(); - - // Save initial response to state - await this.saveResponseToState(currentResponse); + // SessionEnd/drain must fire on every exit path (success, early return, + // approval pause, interruption, and exceptions), so wrap the body in + // try/catch/finally and track the session-end reason as we go. + // Approval pauses keep reason='complete' because the run hasn't failed — + // it's simply paused awaiting user decisions. + let sessionEndReason: 'user' | 'error' | 'max_turns' | 'complete' = 'complete'; + try { + await this.initStream(); + + // If resuming from approval and still pending, don't continue + if (this.isResumingFromApproval && this.currentState?.status === 'awaiting_approval') { + return; + } - // Check if tools should be executed - const hasToolCalls = currentResponse.output.some( - (item) => hasTypeProperty(item) && item.type === 'function_call', - ); + // Get initial response + let currentResponse = await this.getInitialResponse(); - if (!this.options.tools?.length || !hasToolCalls) { - this.finalResponse = currentResponse; - await this.markStateComplete(); - return; - } + // Save initial response to state + await this.saveResponseToState(currentResponse); - // Extract and check tool calls - const toolCalls = extractToolCallsFromResponse(currentResponse); + // Check if tools should be executed + const hasToolCalls = currentResponse.output.some( + (item) => hasTypeProperty(item) && item.type === 'function_call', + ); - // Check for approval requirements - if (await this.handleApprovalCheck(toolCalls, 0, currentResponse)) { - return; // Paused for approval - } + if (!this.options.tools?.length || !hasToolCalls) { + this.finalResponse = currentResponse; + await this.markStateComplete(); + return; + } - if (!this.hasExecutableToolCalls(toolCalls)) { - this.finalResponse = currentResponse; - await this.markStateComplete(); - return; - } + // Extract and check tool calls + const toolCalls = extractToolCallsFromResponse(currentResponse); - // Main execution loop - let currentRound = 0; - // Cap consecutive forceResume overrides so a misbehaving Stop hook - // cannot spin the loop forever. We pick 3 as a conservative upper bound - // -- it's enough to let a hook gather a couple of follow-up actions but - // small enough that a buggy handler fails fast with a visible warning. - const MAX_FORCE_RESUME_OVERRIDES = 3; - let forceResumeCount = 0; + // Check for approval requirements + if (await this.handleApprovalCheck(toolCalls, 0, currentResponse)) { + return; // Paused for approval + } - while (true) { - // Check for external interruption - if (await this.checkForInterruption(currentResponse)) { + if (!this.hasExecutableToolCalls(toolCalls)) { + this.finalResponse = currentResponse; + await this.markStateComplete(); return; } - // Check stop conditions - if (await this.shouldStopExecution()) { - // Emit Stop hook -- can force resume or inject prompt. - if (this.hooksManager) { - const stopResult = await this.hooksManager.emit('Stop', { - reason: 'end_turn' as const, - sessionId: this.currentState?.id ?? '', - }); + // Main execution loop + let currentRound = 0; + // Cap consecutive forceResume overrides so a misbehaving Stop hook + // cannot spin the loop forever. We pick 3 as a conservative upper bound + // -- it's enough to let a hook gather a couple of follow-up actions but + // small enough that a buggy handler fails fast with a visible warning. + const MAX_FORCE_RESUME_OVERRIDES = 3; + let forceResumeCount = 0; + + while (true) { + // Check for external interruption + if (await this.checkForInterruption(currentResponse)) { + sessionEndReason = 'user'; + return; + } - // Honor forceResume if ANY handler returns it, not just the last. - const shouldForceResume = stopResult.results.some( - (r) => r && typeof r === 'object' && 'forceResume' in r && r.forceResume === true, - ); - - // Collect every appendPrompt the handlers supplied (concatenated - // with newlines). appendPrompt is honored independently of - // forceResume so a handler can nudge the next turn without - // forcing a resume. - const appendParts: string[] = []; - for (const r of stopResult.results) { - if ( - r && - typeof r === 'object' && - 'appendPrompt' in r && - typeof r.appendPrompt === 'string' && - r.appendPrompt.length > 0 - ) { - appendParts.push(r.appendPrompt); + // Check stop conditions + if (await this.shouldStopExecution()) { + // Emit Stop hook -- can force resume or inject prompt. + // shouldStopExecution() is driven by stopWhen conditions (default + // stepCountIs), so 'max_turns' is the semantically accurate reason. + if (this.hooksManager) { + const stopResult = await this.hooksManager.emit('Stop', { + reason: 'max_turns' as const, + sessionId: this.currentState?.id ?? '', + }); + + // Honor forceResume if ANY handler returns it, not just the last. + const shouldForceResume = stopResult.results.some( + (r) => r && typeof r === 'object' && 'forceResume' in r && r.forceResume === true, + ); + + // Collect every appendPrompt the handlers supplied (concatenated + // with newlines). appendPrompt is honored independently of + // forceResume so a handler can nudge the next turn without + // forcing a resume. + const appendParts: string[] = []; + for (const r of stopResult.results) { + if ( + r && + typeof r === 'object' && + 'appendPrompt' in r && + typeof r.appendPrompt === 'string' && + r.appendPrompt.length > 0 + ) { + appendParts.push(r.appendPrompt); + } } - } - const appendPrompt = appendParts.join('\n'); + const appendPrompt = appendParts.join('\n'); - if (appendPrompt) { - await this.injectAppendPromptMessage(appendPrompt); - } + if (appendPrompt) { + await this.injectAppendPromptMessage(appendPrompt); + } - if (shouldForceResume) { - if (forceResumeCount >= MAX_FORCE_RESUME_OVERRIDES) { - // Don't let the hook loop the engine forever. Log and stop. - console.warn( - `[Stop hook] forceResume honored ${MAX_FORCE_RESUME_OVERRIDES} times without new progress; stopping to prevent an infinite loop.`, - ); - break; + if (shouldForceResume) { + if (forceResumeCount >= MAX_FORCE_RESUME_OVERRIDES) { + // Don't let the hook loop the engine forever. Log and stop. + console.warn( + `[Stop hook] forceResume honored ${MAX_FORCE_RESUME_OVERRIDES} times without new progress; stopping to prevent an infinite loop.`, + ); + sessionEndReason = 'max_turns'; + break; + } + forceResumeCount++; + // Continue the loop. If appendPrompt was supplied we already + // injected it, which advances state so the stop condition may + // no longer fire on the next iteration. + continue; } - forceResumeCount++; - // Continue the loop. If appendPrompt was supplied we already - // injected it, which advances state so the stop condition may - // no longer fire on the next iteration. - continue; } + // Stop condition fired and the hook (if any) did not force resume + // -- this is a max_turns-style exit, not a natural completion. + sessionEndReason = 'max_turns'; + break; } - break; - } - const currentToolCalls = extractToolCallsFromResponse(currentResponse); - if (currentToolCalls.length === 0) { - break; - } + const currentToolCalls = extractToolCallsFromResponse(currentResponse); + if (currentToolCalls.length === 0) { + break; + } - // Check for approval requirements - if (await this.handleApprovalCheck(currentToolCalls, currentRound + 1, currentResponse)) { - return; - } + // Check for approval requirements + if (await this.handleApprovalCheck(currentToolCalls, currentRound + 1, currentResponse)) { + return; + } - if (!this.hasExecutableToolCalls(currentToolCalls)) { - break; - } + if (!this.hasExecutableToolCalls(currentToolCalls)) { + break; + } - // Build turn context - const turnNumber = currentRound + 1; - const turnContext: TurnContext = { - numberOfTurns: turnNumber, - }; + // Build turn context + const turnNumber = currentRound + 1; + const turnContext: TurnContext = { + numberOfTurns: turnNumber, + }; - await this.options.onTurnStart?.(turnContext); + await this.options.onTurnStart?.(turnContext); - // Resolve async functions for this turn - await this.resolveAsyncFunctionsForTurn(turnContext); + // Resolve async functions for this turn + await this.resolveAsyncFunctionsForTurn(turnContext); - // Execute tools - const toolResults = await this.executeToolRound(currentToolCalls, turnContext); + // Execute tools + const toolResults = await this.executeToolRound(currentToolCalls, turnContext); - // Server-tool output items are already-executed results in the - // response; collect them so toolResults presents a unified list. - const serverToolItems: ToolResultItem[] = []; - for (const item of currentResponse.output) { - if (!hasTypeProperty(item)) { - continue; - } - if ( - item.type === 'message' || - item.type === 'reasoning' || - item.type === 'function_call' - ) { - continue; - } - // Everything else is a server-tool output item (web_search_call, - // image_generation_call, file_search_call, or generic - // OutputServerToolItem covering openrouter:datetime and any new - // SDK server tool types). - if (isServerToolResultItem(item)) { - serverToolItems.push(item); + // A tool round with observable progress resets the consecutive + // forceResume counter so a legitimate override earlier in the run + // does not count against a later, independent one. + if (toolResults.length > 0) { + forceResumeCount = 0; } - } - // Track execution round - this.allToolExecutionRounds.push({ - round: currentRound, - toolCalls: currentToolCalls, - response: currentResponse, - toolResults: [ - ...toolResults, - ...serverToolItems, - ], - }); + // Server-tool output items are already-executed results in the + // response; collect them so toolResults presents a unified list. + const serverToolItems: ToolResultItem[] = []; + for (const item of currentResponse.output) { + if (!hasTypeProperty(item)) { + continue; + } + if ( + item.type === 'message' || + item.type === 'reasoning' || + item.type === 'function_call' + ) { + continue; + } + // Everything else is a server-tool output item (web_search_call, + // image_generation_call, file_search_call, or generic + // OutputServerToolItem covering openrouter:datetime and any new + // SDK server tool types). + if (isServerToolResultItem(item)) { + serverToolItems.push(item); + } + } - // Save tool results to state - await this.saveToolResultsToState(toolResults); + // Track execution round + this.allToolExecutionRounds.push({ + round: currentRound, + toolCalls: currentToolCalls, + response: currentResponse, + toolResults: [ + ...toolResults, + ...serverToolItems, + ], + }); - // Apply nextTurnParams - await this.applyNextTurnParams(currentToolCalls); + // Save tool results to state + await this.saveToolResultsToState(toolResults); - currentResponse = await this.makeFollowupRequest(currentResponse, toolResults, turnNumber); + // Apply nextTurnParams + await this.applyNextTurnParams(currentToolCalls); - await this.options.onTurnEnd?.(turnContext, currentResponse); + currentResponse = await this.makeFollowupRequest( + currentResponse, + toolResults, + turnNumber, + ); + // A fresh response replaces the prior one -- that's new progress, + // so reset consecutive forceResume counting. + forceResumeCount = 0; - // Save new response to state - await this.saveResponseToState(currentResponse); + await this.options.onTurnEnd?.(turnContext, currentResponse); - currentRound++; - } + // Save new response to state + await this.saveResponseToState(currentResponse); - // Validate and finalize - this.validateFinalResponse(currentResponse); - this.finalResponse = currentResponse; - await this.markStateComplete(); + currentRound++; + } - // Emit SessionEnd hook and drain async handlers - if (this.hooksManager) { - await this.hooksManager.emit('SessionEnd', { - sessionId: this.currentState?.id ?? '', - reason: 'complete' as const, - }); - await this.hooksManager.drain(); + // Validate and finalize + this.validateFinalResponse(currentResponse); + this.finalResponse = currentResponse; + await this.markStateComplete(); + } catch (error) { + sessionEndReason = 'error'; + throw error; + } finally { + if (this.hooksManager) { + await this.hooksManager.emit('SessionEnd', { + sessionId: this.currentState?.id ?? '', + reason: sessionEndReason, + }); + await this.hooksManager.drain(); + } } })(); diff --git a/packages/agent/tests/unit/hooks-emit-adversarial.test.ts b/packages/agent/tests/unit/hooks-emit-adversarial.test.ts index 99d8d40..ea72f26 100644 --- a/packages/agent/tests/unit/hooks-emit-adversarial.test.ts +++ b/packages/agent/tests/unit/hooks-emit-adversarial.test.ts @@ -1,8 +1,8 @@ import { describe, expect, it, vi } from 'vitest'; import { executeHandlerChain } from '../../src/lib/hooks-emit.js'; -import type { HookContext, HookEntry } from '../../src/lib/hooks-types.js'; +import type { HookEntry, LifecycleHookContext } from '../../src/lib/hooks-types.js'; -function makeContext(hookName = 'TestHook'): HookContext { +function makeContext(hookName = 'TestHook'): LifecycleHookContext { return { signal: new AbortController().signal, hookName, diff --git a/packages/agent/tests/unit/hooks-emit.test.ts b/packages/agent/tests/unit/hooks-emit.test.ts index f397df4..022358e 100644 --- a/packages/agent/tests/unit/hooks-emit.test.ts +++ b/packages/agent/tests/unit/hooks-emit.test.ts @@ -1,9 +1,9 @@ import { describe, expect, it, vi } from 'vitest'; import * as z4 from 'zod/v4'; import { executeHandlerChain } from '../../src/lib/hooks-emit.js'; -import type { HookContext, HookEntry } from '../../src/lib/hooks-types.js'; +import type { HookEntry, LifecycleHookContext } from '../../src/lib/hooks-types.js'; -function makeContext(hookName = 'TestHook'): HookContext { +function makeContext(hookName = 'TestHook'): LifecycleHookContext { return { signal: new AbortController().signal, hookName, @@ -336,6 +336,70 @@ describe('executeHandlerChain', () => { await Promise.allSettled(result.pending); }); + it('logs a warning when async work rejects (default mode)', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + + const work = Promise.reject(new Error('detached boom')); + const entries: HookEntry[] = [ + { + handler: () => ({ + async: true as const, + work, + }), + }, + ]; + + const result = await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'MyHook', + throwOnHandlerError: false, + }); + + expect(result.pending.length).toBe(1); + + // Drain detached work; this should not reject the tracked promise. + await Promise.allSettled(result.pending); + + expect(warnSpy).toHaveBeenCalledTimes(1); + const [message, err] = warnSpy.mock.calls[0] ?? []; + expect(message).toContain('MyHook'); + expect(message).toContain('rejected'); + expect(err).toBeInstanceOf(Error); + if (err instanceof Error) { + expect(err.message).toBe('detached boom'); + } + + warnSpy.mockRestore(); + }); + + it('logs (does not throw) when async work rejects in strict mode', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + + const work = Promise.reject(new Error('strict detached boom')); + const entries: HookEntry[] = [ + { + handler: () => ({ + async: true as const, + work, + }), + }, + ]; + + // Strict mode governs sync handler failures only; fire-and-forget rejection + // must still just log, never throw. + const result = await executeHandlerChain(entries, {}, makeContext(), { + hookName: 'StrictHook', + throwOnHandlerError: true, + }); + + await Promise.allSettled(result.pending); + + expect(warnSpy).toHaveBeenCalledTimes(1); + const [message] = warnSpy.mock.calls[0] ?? []; + expect(message).toContain('StrictHook'); + + warnSpy.mockRestore(); + }); + it('logs and continues on handler error in default mode', async () => { const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); const secondHandler = vi.fn(); diff --git a/packages/agent/tests/unit/hooks-manager-adversarial.test.ts b/packages/agent/tests/unit/hooks-manager-adversarial.test.ts index 0adb4ce..621530a 100644 --- a/packages/agent/tests/unit/hooks-manager-adversarial.test.ts +++ b/packages/agent/tests/unit/hooks-manager-adversarial.test.ts @@ -336,7 +336,7 @@ describe('HooksManager (adversarial)', () => { toolInput: {}, toolOutput: null, durationMs: 0, - sessionId: 'ignored', + sessionId: 'session-1', }); manager.setSessionId('session-2'); @@ -345,7 +345,7 @@ describe('HooksManager (adversarial)', () => { toolInput: {}, toolOutput: null, durationMs: 0, - sessionId: 'ignored', + sessionId: 'session-2', }); expect(sessionIds).toEqual([ @@ -353,5 +353,29 @@ describe('HooksManager (adversarial)', () => { 'session-2', ]); }); + + it('payload sessionId overrides the manager sessionId when they disagree', async () => { + const manager = new HooksManager(); + const sessionIds: string[] = []; + + manager.on('PostToolUse', { + handler: (_p, ctx) => { + sessionIds.push(ctx.sessionId); + }, + }); + + manager.setSessionId('stale-manager-id'); + await manager.emit('PostToolUse', { + toolName: 'T', + toolInput: {}, + toolOutput: null, + durationMs: 0, + sessionId: 'fresh-payload-id', + }); + + expect(sessionIds).toEqual([ + 'fresh-payload-id', + ]); + }); }); }); diff --git a/packages/agent/tests/unit/hooks-manager.test.ts b/packages/agent/tests/unit/hooks-manager.test.ts index 9cd4089..e1a6d45 100644 --- a/packages/agent/tests/unit/hooks-manager.test.ts +++ b/packages/agent/tests/unit/hooks-manager.test.ts @@ -262,11 +262,58 @@ describe('HooksManager', () => { toolInput: {}, toolOutput: 'ok', durationMs: 100, - sessionId: 'test', + sessionId: 'my-session', }); expect(receivedSessionId).toBe('my-session'); }); + + it('payload sessionId overrides manager sessionId in hook context', async () => { + const manager = new HooksManager(); + manager.setSessionId('stale-session'); + + let receivedSessionId = ''; + manager.on('PostToolUse', { + handler: (_payload, context) => { + receivedSessionId = context.sessionId; + }, + }); + + await manager.emit('PostToolUse', { + toolName: 'Bash', + toolInput: {}, + toolOutput: 'ok', + durationMs: 100, + sessionId: 'fresh-session', + }); + + expect(receivedSessionId).toBe('fresh-session'); + }); + + it('falls back to manager sessionId for custom hooks without sessionId in payload', async () => { + const manager = new HooksManager({ + AgentThinking: { + payload: z4.object({ + thought: z4.string(), + }), + result: z4.void(), + }, + }); + manager.setSessionId('manager-session'); + + let receivedSessionId = ''; + manager.on('AgentThinking', { + handler: (_payload, context) => { + receivedSessionId = context.sessionId; + }, + }); + + await manager.emit('AgentThinking', { + thought: 'hmm', + }); + + expect(receivedSessionId).toBe('manager-session'); + }); }); describe('schema validation', () => { @@ -397,7 +444,7 @@ describe('HooksManager', () => { }); describe('abortInflight', () => { - it('aborts the signal on the HookContext of in-flight emits', async () => { + it('aborts the signal on the LifecycleHookContext of in-flight emits', async () => { const manager = new HooksManager(); let observedAborted = false; diff --git a/packages/agent/tests/unit/model-result-hooks.test.ts b/packages/agent/tests/unit/model-result-hooks.test.ts index 6261810..d13d6ba 100644 --- a/packages/agent/tests/unit/model-result-hooks.test.ts +++ b/packages/agent/tests/unit/model-result-hooks.test.ts @@ -85,6 +85,9 @@ type InternalModelResult = { stateAccessor: StateAccessor | null; contextStore: unknown; resolvedRequest: Record | null; + finalResponse: OpenResponsesResult | null; + initPromise: Promise | null; + toolExecutionPromise: Promise | null; runToolWithHooks: ( tool: Tool, toolCall: ParsedToolCall, @@ -114,6 +117,7 @@ type InternalModelResult = { | undefined >; injectAppendPromptMessage: (p: string) => Promise; + executeToolsIfNeeded: () => Promise; options: Record; }; @@ -146,7 +150,7 @@ function buildModelResult(opts: { if (opts.requireApproval) { config['requireApproval'] = opts.requireApproval; } - return new ModelResult(config as ConstructorParameters>[0]); + return new ModelResult(config as unknown as ConstructorParameters>[0]); } // --------------------------------------------------------------------------- @@ -353,7 +357,11 @@ describe('ModelResult hooks integration', () => { const approvalTool = { ...tool, function: { - ...tool.function, + ...( + tool as unknown as { + function: Record; + } + ).function, requireApproval: true, }, } as Tool; @@ -416,7 +424,11 @@ describe('ModelResult hooks integration', () => { const approvalTool = { ...tool, function: { - ...tool.function, + ...( + tool as unknown as { + function: Record; + } + ).function, requireApproval: true, }, } as Tool; @@ -670,7 +682,7 @@ describe('ModelResult hooks integration', () => { while (iters < 10) { iters++; const stopEmit = await hooks.emit('Stop', { - reason: 'end_turn', + reason: 'max_turns', sessionId: 'conv_stop_cap', }); const force = stopEmit.results.some( @@ -692,6 +704,245 @@ describe('ModelResult hooks integration', () => { }); }); + describe('SessionEnd lifecycle', () => { + it('fires SessionEnd on approval pause with reason="complete"', async () => { + // Issue 1: approval pauses used to skip SessionEnd because the early + // `return` after `handleApprovalCheck` jumped past the tail emission. + const hooks = new HooksManager(); + const sessionEnd = vi.fn(); + const drainSpy = vi.spyOn(hooks, 'drain'); + hooks.on('SessionEnd', { + handler: sessionEnd, + }); + + const approvalTool = makeApprovalTool('gated'); + const stateAccessor = makeStateAccessor({ + id: 'conv_end_pause', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + }); + + const m = buildModelResult({ + tools: [ + approvalTool, + ] as const, + hooks, + state: stateAccessor, + }); + const ent = internal(m); + ent.currentState = await stateAccessor.load(); + ent.stateAccessor = stateAccessor; + // Skip initStream: it tries to make a real API call. + ent.initPromise = Promise.resolve(); + // Prime a response carrying a tool call that requires approval so the + // execution loop takes the "pause for approval" branch. + ent.finalResponse = { + ...makeResponse(), + output: [ + { + type: 'function_call', + id: 'out_1', + callId: 'call_pause', + name: 'gated', + arguments: '{}', + status: 'completed', + }, + ], + } as unknown as OpenResponsesResult; + + await ent.executeToolsIfNeeded(); + + expect(sessionEnd).toHaveBeenCalledTimes(1); + expect(sessionEnd.mock.calls[0]?.[0]).toMatchObject({ + reason: 'complete', + }); + expect(drainSpy).toHaveBeenCalledTimes(1); + }); + + it('fires SessionEnd with reason="error" when the loop throws', async () => { + const hooks = new HooksManager(); + const sessionEnd = vi.fn(); + const drainSpy = vi.spyOn(hooks, 'drain'); + hooks.on('SessionEnd', { + handler: sessionEnd, + }); + + const m = buildModelResult({ + hooks, + }); + const ent = internal(m); + ent.currentState = { + id: 'conv_end_err', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + } as ConversationState; + ent.initPromise = Promise.resolve(); + // No finalResponse or reusableStream => getInitialResponse throws, + // which should still trigger SessionEnd with reason='error'. + ent.finalResponse = null; + + await expect(ent.executeToolsIfNeeded()).rejects.toThrow(); + + expect(sessionEnd).toHaveBeenCalledTimes(1); + expect(sessionEnd.mock.calls[0]?.[0]).toMatchObject({ + reason: 'error', + }); + expect(drainSpy).toHaveBeenCalledTimes(1); + }); + }); + + describe('forceResume counter reset', () => { + it('resets after a tool round with observable progress so later overrides do not trip the cap', async () => { + // Issue 2: counter used to only increment across an entire session. Drive + // the real executeToolsIfNeeded loop so the engine sees + // override -> override -> tool round (reset) -> override -> override + // Under the OLD code this would have tripped the cap at the fourth + // override. Under the fix the tool round resets the counter so the + // engine finishes without emitting the cap warning. + const hooks = new HooksManager(); + let stopCalls = 0; + hooks.on('Stop', { + handler: () => { + stopCalls++; + return { + forceResume: true, + }; + }, + }); + + const tool = makeAutoTool('t'); + const m = buildModelResult({ + tools: [ + tool, + ] as const, + hooks, + }); + const ent = internal(m); + ent.currentState = { + id: 'conv_reset', + messages: [], + status: 'in_progress', + createdAt: 0, + updatedAt: 0, + } as ConversationState; + ent.initPromise = Promise.resolve(); + + const toolCallResponse = { + ...makeResponse(), + output: [ + { + type: 'function_call', + id: 'out_1', + callId: 'c1', + name: 't', + arguments: '{}', + status: 'completed', + }, + ], + } as unknown as OpenResponsesResult; + + let shouldStopCalls = 0; + const mProto = m as unknown as { + getInitialResponse: () => Promise; + shouldStopExecution: () => Promise; + executeToolRound: (...args: unknown[]) => Promise; + makeFollowupRequest: (...args: unknown[]) => Promise; + }; + mProto.getInitialResponse = async () => toolCallResponse; + mProto.shouldStopExecution = async () => { + shouldStopCalls++; + // shouldStop=false at iteration 3 so the loop runs the tool round + // (which triggers the reset). Every other iteration, shouldStop=true, + // which emits Stop and the handler forces a resume. + return shouldStopCalls !== 3; + }; + mProto.executeToolRound = async () => [ + { + type: 'function_call_output', + id: 'output_c1', + callId: 'c1', + output: '{}', + }, + ]; + // Return the same tool-call response so the loop keeps going after + // the reset, giving us more forceResume iterations. + mProto.makeFollowupRequest = async () => toolCallResponse; + + // Track stopCalls at the moment of the cap warning so we can assert + // the cap fired AFTER the reset window (stopCalls > MAX+1 rather than + // stopCalls == MAX+1 under the old code). + let stopCallsAtCap = -1; + const warnSpy = vi.spyOn(console, 'warn').mockImplementation((msg: unknown) => { + if ( + typeof msg === 'string' && + msg.includes('forceResume honored') && + stopCallsAtCap === -1 + ) { + stopCallsAtCap = stopCalls; + } + }); + const timeoutPromise = new Promise((_, reject) => { + setTimeout(() => reject(new Error('loop took too long')), 2000); + }); + try { + await Promise.race([ + ent.executeToolsIfNeeded(), + timeoutPromise, + ]); + } catch { + // Loop eventually breaks (cap trips post-reset too) and + // validateFinalResponse may throw on the stubbed output. Either way, + // we only care about when the cap fired relative to the tool round. + } + warnSpy.mockRestore(); + + // The cap MUST have fired at some point (sanity check that the guard + // still works at all). + expect(stopCallsAtCap).toBeGreaterThan(-1); + // Under the OLD code (no reset), the cap fires on the 4th Stop emission + // (override count hits 3 at that point). That means stopCallsAtCap==4. + // Under the FIX, the reset happens at iteration 3 (shouldStop=false + // branch, tool round runs), so the cap cannot fire until at least 3 + // more overrides accumulate after that -- i.e. stopCallsAtCap >= 5. + expect(stopCallsAtCap).toBeGreaterThanOrEqual(5); + }); + + it('still caps when forceResume happens CONSECUTIVELY without progress', async () => { + const hooks = new HooksManager(); + hooks.on('Stop', { + handler: () => ({ + forceResume: true, + }), + }); + + const MAX = 3; + let overrides = 0; + // Mirror the executeToolsIfNeeded guard: each iteration emits Stop and, + // because no tool round runs in between, the counter never resets. + for (let iter = 0; iter < 10; iter++) { + const emit = await hooks.emit('Stop', { + reason: 'max_turns', + sessionId: 's', + }); + const force = emit.results.some( + (r) => r && typeof r === 'object' && 'forceResume' in r && r.forceResume === true, + ); + if (!force) { + break; + } + if (overrides >= MAX) { + break; + } + overrides++; + } + expect(overrides).toBe(MAX); + }); + }); + describe('SessionStart config', () => { it('passes a populated config object to handlers', async () => { const hooks = new HooksManager(); From d1acead437d84da5713843045c4aa0e8bf6f7b3b Mon Sep 17 00:00:00 2001 From: Matt Apperson Date: Tue, 21 Apr 2026 17:06:56 -0400 Subject: [PATCH 6/7] fix(hooks): tighten engine correctness across review feedback Address seven PR review threads on feat/hooks-manager: - emit() snapshots its entries list before running the chain, so handlers that call off()/unsubscribe mid-chain can't shift indices and silently skip the next handler. - matchesTool resets `matcher.lastIndex = 0` before .test(), making /g and /y RegExps idempotent across successive emits. - isAsyncOutput now requires `async: true` AND no foreign keys. A handler that returns { async: true, mutatedInput: ... } is treated as a result, not fire-and-forget, so mutation piping isn't dropped. - executeToolsIfNeeded guards the SessionEnd emit on a new sessionStartEmitted flag so an initStream throw before SessionStart can't produce a dangling SessionEnd for Start/End-paired handlers. - executeHandlerChain pushes `validation.data` onto results when a schema is present, so custom hooks using .transform/.default/.catch see the parsed output rather than the raw input. - resolveHooks warns and skips non-built-in hook names in inline config to catch typos like `PretoolUse` vs `PreToolUse`; custom hooks still require a HooksManager instance. - Adversarial test for mid-emit on() now asserts the snapshotted behavior (late handler not invoked) instead of the former index-drift footgun. Existing adversarial tests for the old permissive behaviors (/g drift, arbitrary inline hook names, `async: true` + extras treated as fire-and-forget) are updated to pin the tighter invariants. --- packages/agent/src/lib/hooks-emit.ts | 11 +++-- packages/agent/src/lib/hooks-manager.ts | 6 ++- packages/agent/src/lib/hooks-matchers.ts | 4 ++ packages/agent/src/lib/hooks-resolve.ts | 13 ++++++ packages/agent/src/lib/hooks-types.ts | 40 ++++++++++++++----- packages/agent/src/lib/model-result.ts | 11 ++++- .../tests/unit/hooks-emit-adversarial.test.ts | 12 +++--- .../unit/hooks-manager-adversarial.test.ts | 13 +++--- .../unit/hooks-matchers-adversarial.test.ts | 20 +++++----- .../unit/hooks-resolve-adversarial.test.ts | 14 +++++-- .../unit/hooks-types-adversarial.test.ts | 34 +++++++++++++--- .../tests/unit/model-result-hooks.test.ts | 6 ++- 12 files changed, 136 insertions(+), 48 deletions(-) diff --git a/packages/agent/src/lib/hooks-emit.ts b/packages/agent/src/lib/hooks-emit.ts index a8467c9..3353933 100644 --- a/packages/agent/src/lib/hooks-emit.ts +++ b/packages/agent/src/lib/hooks-emit.ts @@ -120,7 +120,10 @@ export async function executeHandlerChain( // Validate the result against the schema if one is supplied. A failure // here is treated like any other handler error: propagated in strict - // mode, logged otherwise. + // mode, logged otherwise. On success, use the parsed output (which may + // differ from the input for schemas with .transform() / .default() / + // .catch() / .coerce) so downstream callers see transformed values. + let result: R; if (options.resultSchema) { const validation = safeParse(options.resultSchema, returnValue); if (!validation.success) { @@ -133,10 +136,10 @@ export async function executeHandlerChain( console.warn(err.message); continue; } + result = validation.data as R; + } else { + result = returnValue as R; } - - // Non-schema-void results pass through unchanged; we narrow via R. - const result = returnValue as R; results.push(result); // Apply mutation piping -- only hooks listed in MUTATION_FIELD_MAP participate. diff --git a/packages/agent/src/lib/hooks-manager.ts b/packages/agent/src/lib/hooks-manager.ts index 3e6a96c..1c8169e 100644 --- a/packages/agent/src/lib/hooks-manager.ts +++ b/packages/agent/src/lib/hooks-manager.ts @@ -156,7 +156,11 @@ export class HooksManager> { toolName?: string; }, ): Promise, K>, PayloadOf, K>>> { - const list = this.entries.get(hookName) ?? []; + // Snapshot the entries list so mutations (on()/off()/unsubscribe) triggered + // by a handler during the chain can't shift indices mid-iteration and + // silently skip the next handler, or splice in new handlers that were + // registered after this emit began. + const list = (this.entries.get(hookName) ?? []).slice(); const definition = this._definitionFor(hookName); if (definition) { diff --git a/packages/agent/src/lib/hooks-matchers.ts b/packages/agent/src/lib/hooks-matchers.ts index 386bc83..f1a196a 100644 --- a/packages/agent/src/lib/hooks-matchers.ts +++ b/packages/agent/src/lib/hooks-matchers.ts @@ -16,6 +16,10 @@ export function matchesTool(matcher: ToolMatcher | undefined, toolName: string): return matcher === toolName; } if (matcher instanceof RegExp) { + // RegExps with the /g or /y flag advance `lastIndex` on every `.test()`, + // so successive emits with the same tool name alternate true/false. + // Reset so the matcher behaves statelessly regardless of flag. + matcher.lastIndex = 0; return matcher.test(toolName); } return matcher(toolName); diff --git a/packages/agent/src/lib/hooks-resolve.ts b/packages/agent/src/lib/hooks-resolve.ts index 4846336..5fb2d9b 100644 --- a/packages/agent/src/lib/hooks-resolve.ts +++ b/packages/agent/src/lib/hooks-resolve.ts @@ -1,4 +1,5 @@ import { getInternalRegistrar, HooksManager } from './hooks-manager.js'; +import { BUILT_IN_HOOK_NAMES } from './hooks-schemas.js'; import type { HookEntry, InlineHookConfig } from './hooks-types.js'; /** @@ -7,6 +8,12 @@ import type { HookEntry, InlineHookConfig } from './hooks-types.js'; * - `undefined` -> `undefined` (no hooks) * - `HooksManager` -> passthrough * - Plain object (InlineHookConfig) -> construct HooksManager, register all entries + * + * The inline config surface is typed to only accept built-in hook names, but + * a generic config coerced through `as` can smuggle typos past the compiler + * (e.g. `PretoolUse` vs `PreToolUse`) which would silently never fire. We log + * a warning for any non-built-in key and skip registration for it. Custom + * hooks must be registered through a `HooksManager` instance via `on()`. */ export function resolveHooks( hooks: InlineHookConfig | HooksManager | undefined, @@ -28,6 +35,12 @@ export function resolveHooks( if (!entries || !Array.isArray(entries)) { continue; } + if (!BUILT_IN_HOOK_NAMES.has(hookName)) { + console.warn( + `[resolveHooks] Ignoring inline hook entry for unknown hook name "${hookName}". Inline config only supports built-in hooks; register custom hooks via a HooksManager instance.`, + ); + continue; + } for (const entry of entries) { register(hookName, entry as HookEntry); } diff --git a/packages/agent/src/lib/hooks-types.ts b/packages/agent/src/lib/hooks-types.ts index 46c8439..ace25c3 100644 --- a/packages/agent/src/lib/hooks-types.ts +++ b/packages/agent/src/lib/hooks-types.ts @@ -263,20 +263,40 @@ export type InlineHookConfig = { //#region Helper Types +/** + * Keys recognized on an {@link AsyncOutput} signal. Used by {@link isAsyncOutput} + * to distinguish a true fire-and-forget marker from a result object that + * happens to carry an `async` field. + */ +const ASYNC_OUTPUT_KEYS = new Set([ + 'async', + 'work', + 'asyncTimeout', +]); + /** * Checks if a value is an AsyncOutput signal. + * + * Requires `async === true` AND no foreign keys. A handler that accidentally + * returns `{ async: true, mutatedInput: {...} }` is treated as a result (so + * the mutation is not silently discarded), not as fire-and-forget. */ export function isAsyncOutput(value: unknown): value is AsyncOutput { - return ( - typeof value === 'object' && - value !== null && - 'async' in value && - ( - value as { - async: unknown; - } - ).async === true - ); + if (typeof value !== 'object' || value === null || !('async' in value)) { + return false; + } + const candidate = value as { + async: unknown; + }; + if (candidate.async !== true) { + return false; + } + for (const key of Object.keys(value)) { + if (!ASYNC_OUTPUT_KEYS.has(key)) { + return false; + } + } + return true; } /** diff --git a/packages/agent/src/lib/model-result.ts b/packages/agent/src/lib/model-result.ts index a7efbd3..da7534b 100644 --- a/packages/agent/src/lib/model-result.ts +++ b/packages/agent/src/lib/model-result.ts @@ -338,6 +338,11 @@ export class ModelResult< // Hook system private readonly hooksManager: HooksManager | undefined; + // Tracks whether SessionStart has already been emitted, so SessionEnd can be + // guarded to fire only when a matching SessionStart actually succeeded. + // Without this, an exception in initStream before SessionStart would lead to + // a dangling SessionEnd (breaking audit-log / resource-pair contracts). + private sessionStartEmitted = false; constructor(options: GetResponseOptions) { this.options = options; @@ -1713,6 +1718,7 @@ export class ModelResult< hasState: !!this.stateAccessor, }, }); + this.sessionStartEmitted = true; } // Emit UserPromptSubmit hook BEFORE the stateful input-wrapping block so @@ -2209,7 +2215,10 @@ export class ModelResult< sessionEndReason = 'error'; throw error; } finally { - if (this.hooksManager) { + // Only emit SessionEnd if SessionStart actually succeeded. Otherwise + // initStream threw before emit, and a dangling SessionEnd would break + // handlers that pair Start/End (audit logs, acquired resources). + if (this.hooksManager && this.sessionStartEmitted) { await this.hooksManager.emit('SessionEnd', { sessionId: this.currentState?.id ?? '', reason: sessionEndReason, diff --git a/packages/agent/tests/unit/hooks-emit-adversarial.test.ts b/packages/agent/tests/unit/hooks-emit-adversarial.test.ts index ea72f26..0d169ea 100644 --- a/packages/agent/tests/unit/hooks-emit-adversarial.test.ts +++ b/packages/agent/tests/unit/hooks-emit-adversarial.test.ts @@ -413,7 +413,10 @@ describe('executeHandlerChain (adversarial)', () => { }); describe('async output edge cases', () => { - it('{ async: true } with block field is treated as async, not block', async () => { + it('{ async: true, block: true, work: ... } is treated as a block result, not async', async () => { + // isAsyncOutput requires `async: true` AND no foreign keys. A `block` + // field is foreign to AsyncOutput, so the value is treated as a result + // (so the block/reject/mutation piping isn't silently discarded). const entries: HookEntry< { toolInput: Record; @@ -445,10 +448,9 @@ describe('executeHandlerChain (adversarial)', () => { }, ); - // isAsyncOutput check comes before block check, so this should be async - expect(result.pending.length).toBe(1); - expect(result.blocked).toBe(false); - expect(result.results).toEqual([]); + expect(result.pending.length).toBe(0); + expect(result.blocked).toBe(true); + expect(result.results).toHaveLength(1); }); it('{ async: true } without `work` does not push a pending promise', async () => { diff --git a/packages/agent/tests/unit/hooks-manager-adversarial.test.ts b/packages/agent/tests/unit/hooks-manager-adversarial.test.ts index 621530a..6832ca2 100644 --- a/packages/agent/tests/unit/hooks-manager-adversarial.test.ts +++ b/packages/agent/tests/unit/hooks-manager-adversarial.test.ts @@ -76,13 +76,12 @@ describe('HooksManager (adversarial)', () => { sessionId: 'test', }); - // The late handler was added to the array during iteration. - // Since entries is a mutable array and we iterate by index, - // the late handler MAY be called (pushed to end of array, i < entries.length). - // This test documents the actual behavior. - // The handler pushes to the list which the for-loop iterates over, - // so it WILL be called since entries.length grows. - expect(lateHandler).toHaveBeenCalledOnce(); + // emit() snapshots the entries list before iterating the chain, so + // handlers registered mid-emit are invisible to the in-flight chain and + // will only fire on subsequent emits. This protects against the + // symmetric removal case as well (off()/unsubscribe mid-chain can no + // longer shift indices and skip the next handler). + expect(lateHandler).not.toHaveBeenCalled(); }); it('handler that calls emit recursively does not deadlock', async () => { diff --git a/packages/agent/tests/unit/hooks-matchers-adversarial.test.ts b/packages/agent/tests/unit/hooks-matchers-adversarial.test.ts index c5b0dc1..d8f8c8d 100644 --- a/packages/agent/tests/unit/hooks-matchers-adversarial.test.ts +++ b/packages/agent/tests/unit/hooks-matchers-adversarial.test.ts @@ -17,18 +17,20 @@ describe('matchesTool (adversarial)', () => { }); describe('RegExp stateful behavior', () => { - it('RegExp with global flag has stateful .test() — may produce inconsistent results', () => { + it('RegExp with global flag is made idempotent by resetting lastIndex', () => { const globalRegex = /Bash/g; - // First call — matches - const first = matchesTool(globalRegex, 'Bash'); - // Second call — global regex has lastIndex set, may NOT match - const second = matchesTool(globalRegex, 'Bash'); + // matchesTool resets lastIndex before each .test(), so /g and /y flags + // no longer cause alternating true/false across successive emits. + expect(matchesTool(globalRegex, 'Bash')).toBe(true); + expect(matchesTool(globalRegex, 'Bash')).toBe(true); + expect(matchesTool(globalRegex, 'Bash')).toBe(true); + }); - // This documents that global flag is dangerous with matchesTool - // First call returns true, second returns false due to lastIndex - expect(first).toBe(true); - expect(second).toBe(false); + it('RegExp with sticky flag is idempotent', () => { + const stickyRegex = /Bash/y; + expect(matchesTool(stickyRegex, 'Bash')).toBe(true); + expect(matchesTool(stickyRegex, 'Bash')).toBe(true); }); it('RegExp without global flag is idempotent', () => { diff --git a/packages/agent/tests/unit/hooks-resolve-adversarial.test.ts b/packages/agent/tests/unit/hooks-resolve-adversarial.test.ts index 9a1b659..56308de 100644 --- a/packages/agent/tests/unit/hooks-resolve-adversarial.test.ts +++ b/packages/agent/tests/unit/hooks-resolve-adversarial.test.ts @@ -93,10 +93,13 @@ describe('resolveHooks (adversarial)', () => { ], } as unknown as InlineHookConfig; + const warn = vi.spyOn(console, 'warn').mockImplementation(() => {}); const result = resolveHooks(config); expect(result).toBeInstanceOf(HooksManager); - // 'constructor' is treated as a hook name — it gets registered - expect(result!.hasHandlers('constructor')).toBe(true); + // 'constructor' is not a built-in hook name — it is warned and skipped. + expect(result!.hasHandlers('constructor')).toBe(false); + expect(warn).toHaveBeenCalled(); + warn.mockRestore(); }); }); @@ -113,7 +116,7 @@ describe('resolveHooks (adversarial)', () => { }); describe('non-standard hook names in inline config', () => { - it('registers handlers for arbitrary hook names (not just built-in)', () => { + it('warns and skips non-built-in hook names (custom hooks require HooksManager)', () => { const config = { CustomHook: [ { @@ -122,9 +125,12 @@ describe('resolveHooks (adversarial)', () => { ], } as unknown as InlineHookConfig; + const warn = vi.spyOn(console, 'warn').mockImplementation(() => {}); const result = resolveHooks(config); expect(result).toBeInstanceOf(HooksManager); - expect(result!.hasHandlers('CustomHook')).toBe(true); + expect(result!.hasHandlers('CustomHook')).toBe(false); + expect(warn).toHaveBeenCalledWith(expect.stringContaining('CustomHook')); + warn.mockRestore(); }); }); }); diff --git a/packages/agent/tests/unit/hooks-types-adversarial.test.ts b/packages/agent/tests/unit/hooks-types-adversarial.test.ts index f16a573..9b60aa1 100644 --- a/packages/agent/tests/unit/hooks-types-adversarial.test.ts +++ b/packages/agent/tests/unit/hooks-types-adversarial.test.ts @@ -80,13 +80,29 @@ describe('isAsyncOutput (adversarial)', () => { ).toBe(true); }); - it('{ async: true, extraField: "ignored" } is still AsyncOutput', () => { + it('{ async: true, extraField: "..." } is NOT AsyncOutput', () => { + // A foreign key means the handler likely returned a result object that + // happens to carry `async` — treating it as fire-and-forget would + // silently drop the result (and any mutation piping). expect( isAsyncOutput({ async: true, extraField: 'ignored', }), - ).toBe(true); + ).toBe(false); + }); + + it('{ async: true, mutatedInput: {...} } is NOT AsyncOutput', () => { + // Common footgun: a PreToolUse handler that builds a result and adds + // `async: true` must not have its mutation dropped. + expect( + isAsyncOutput({ + async: true, + mutatedInput: { + cwd: '/tmp', + }, + }), + ).toBe(false); }); it('frozen object { async: true } is AsyncOutput', () => { @@ -101,11 +117,17 @@ describe('isAsyncOutput (adversarial)', () => { }); describe('array with async property', () => { - it('array with async property set is treated as AsyncOutput', () => { - const arr: unknown[] = []; + it('array with async property set is NOT AsyncOutput', () => { + // Arrays surface their numeric indices through Object.keys once + // anything is pushed; even when empty, any extra named property alongside + // `async` breaks the sole-key requirement. The spread-cloning logic in + // executeHandlerChain already refuses to spread arrays, so arrays are + // never valid AsyncOutput carriers. + const arr: unknown[] = [ + 'item', + ]; (arr as unknown as Record)['async'] = true; - // Arrays are objects and have 'async' in arr, so this should match - expect(isAsyncOutput(arr)).toBe(true); + expect(isAsyncOutput(arr)).toBe(false); }); }); diff --git a/packages/agent/tests/unit/model-result-hooks.test.ts b/packages/agent/tests/unit/model-result-hooks.test.ts index d13d6ba..09a75c9 100644 --- a/packages/agent/tests/unit/model-result-hooks.test.ts +++ b/packages/agent/tests/unit/model-result-hooks.test.ts @@ -88,6 +88,7 @@ type InternalModelResult = { finalResponse: OpenResponsesResult | null; initPromise: Promise | null; toolExecutionPromise: Promise | null; + sessionStartEmitted: boolean; runToolWithHooks: ( tool: Tool, toolCall: ParsedToolCall, @@ -734,8 +735,10 @@ describe('ModelResult hooks integration', () => { const ent = internal(m); ent.currentState = await stateAccessor.load(); ent.stateAccessor = stateAccessor; - // Skip initStream: it tries to make a real API call. + // Skip initStream: it tries to make a real API call. Mark SessionStart + // as emitted so SessionEnd is allowed to fire in the finally block. ent.initPromise = Promise.resolve(); + ent.sessionStartEmitted = true; // Prime a response carrying a tool call that requires approval so the // execution loop takes the "pause for approval" branch. ent.finalResponse = { @@ -781,6 +784,7 @@ describe('ModelResult hooks integration', () => { updatedAt: 0, } as ConversationState; ent.initPromise = Promise.resolve(); + ent.sessionStartEmitted = true; // No finalResponse or reusableStream => getInitialResponse throws, // which should still trigger SessionEnd with reason='error'. ent.finalResponse = null; From fe319a2ed540915b9f3a52c92ace3c487582722c Mon Sep 17 00:00:00 2001 From: Matt Apperson Date: Tue, 21 Apr 2026 17:22:40 -0400 Subject: [PATCH 7/7] fix(hooks): harden engine around review feedback - executeHandlerChain checks context.signal.aborted between handlers so abortInflight() has a deterministic effect on the chain itself rather than relying on each handler to consult the signal. An already-aborted emit skips every handler. - Added a Node-only unref() on the setTimeout handle backing async-work tracking so a leaked fire-and-forget hook whose work never settles can't hold the event loop open for the full 30s timeout window. Guarded for browsers that return a plain number from setTimeout. - Clarified the shallow-clone docstring on executeHandlerChain to make the top-level-only isolation explicit; nested mutations by handlers still reach the caller, so handlers MUST pipe mutations via the documented result fields. - runToolWithHooks rejects raw-string toolCall.arguments (the shape the parser leaves behind on a JSON.parse failure) before any hook fires and returns a 'parse_error' result with a prebuilt synthetic error output. executeToolRound, executeAutoApproveTools, and processApprovalDecisions all route through the same helper, so every execution path fails closed consistently. - runToolWithHooks tracks mutation application explicitly instead of comparing finalPayload.toolInput by reference to the pre-coercion arguments. Tools that distinguish "no args" from "{}" no longer see `{}` when arguments were null/undefined and no handler mutated them. Tests: +5 new cases (2 abort-propagation, 1 parse_error, 2 mutation detection). Full suite 404 pass, lint clean, build clean. --- packages/agent/src/lib/hooks-emit.ts | 49 ++++++- packages/agent/src/lib/model-result.ts | 111 ++++++++++----- .../tests/unit/hooks-emit-adversarial.test.ts | 70 ++++++++++ .../tests/unit/model-result-hooks.test.ts | 128 ++++++++++++++++++ 4 files changed, 325 insertions(+), 33 deletions(-) diff --git a/packages/agent/src/lib/hooks-emit.ts b/packages/agent/src/lib/hooks-emit.ts index 3353933..5cdb30f 100644 --- a/packages/agent/src/lib/hooks-emit.ts +++ b/packages/agent/src/lib/hooks-emit.ts @@ -50,6 +50,18 @@ function isPlainMutableObject(value: unknown): value is Record * responsible for draining/timing out that work * - Per-hook mutation piping (driven by {@link MUTATION_FIELD_MAP}) * - Short-circuit on block/reject fields (non-empty string or `true`) + * - Cooperative abort via `context.signal`: the chain checks + * `signal.aborted` between handlers and bails out when set so + * `abortInflight()` has a deterministic effect on the chain itself (not just + * on handlers that happen to consult the signal). + * + * Payload isolation: the initial payload is shallow-cloned before entering the + * chain so mutation piping (via {@link MUTATION_FIELD_MAP}) doesn't mutate the + * caller's payload at the top level. This is a top-level-only guarantee: + * handlers that directly mutate a nested object (e.g. + * `payload.toolInput.foo = 'bar'`) still reach the caller's original nested + * reference. Handlers MUST return mutations via the documented result fields + * (e.g. `mutatedInput`) rather than mutating nested payload fields in place. */ export async function executeHandlerChain( entries: ReadonlyArray>, @@ -64,7 +76,9 @@ export async function executeHandlerChain( // produce `{}`; spreading an array reindexes it into an object. For those // cases we pass the value through untouched so handlers see the original // typed-`P` value. For plain objects we still clone so the chain can apply - // mutation piping without mutating the caller's payload in place. + // mutation piping without mutating the caller's payload at the top level. + // Nested fields are shared with the caller; see the function-level docstring + // for the invariant handlers are expected to respect. let currentPayload: P = isPlainMutableObject(initialPayload) ? ({ ...initialPayload, @@ -77,6 +91,13 @@ export async function executeHandlerChain( const mutationMap = MUTATION_FIELD_MAP[options.hookName]; for (let i = 0; i < entries.length; i++) { + // Cooperative abort: bail out of the chain when abortInflight() has fired. + // Checked before each handler so synchronous chains stop promptly instead + // of running to completion with only advisory notice to each handler. + if (context.signal.aborted) { + break; + } + const entry = entries[i]; if (!entry) { continue; @@ -195,6 +216,15 @@ function trackAsyncWork(output: AsyncOutput, hookName: string): Promise | }; const timeoutId = setTimeout(finish, timeout); + // In Node.js, Timeout objects expose `.unref()` to remove the reference + // the timer holds on the event loop. Without this, a leaked hook whose + // `work` never settles keeps the process alive for the full + // DEFAULT_ASYNC_TIMEOUT window after the main workload has finished. + // Guarded for browsers / other environments that return a plain number + // from setTimeout and don't expose the method. + if (isUnrefable(timeoutId)) { + timeoutId.unref(); + } output.work?.then(finish, (error: unknown) => { console.warn(`[HooksManager] Async work for hook "${hookName}" rejected:`, error); @@ -203,6 +233,23 @@ function trackAsyncWork(output: AsyncOutput, hookName: string): Promise | }); } +/** + * Typeguard for the Node.js Timeout object that carries a `.unref()` method. + * Browsers and other environments return a number from setTimeout, which has + * no such method. + */ +function isUnrefable(handle: unknown): handle is { + unref: () => void; +} { + if (typeof handle !== 'object' || handle === null || !('unref' in handle)) { + return false; + } + const candidate: { + unref: unknown; + } = handle; + return typeof candidate.unref === 'function'; +} + /** * Apply mutation fields from a result onto the current payload. */ diff --git a/packages/agent/src/lib/model-result.ts b/packages/agent/src/lib/model-result.ts index da7534b..8be201c 100644 --- a/packages/agent/src/lib/model-result.ts +++ b/packages/agent/src/lib/model-result.ts @@ -797,6 +797,9 @@ export class ModelResult< * auto-executed, required approval, or was approved later. * * Return shape: + * - `parse_error`: `toolCall.arguments` was a raw JSON string the model + * failed to produce valid JSON for. The caller should use the prebuilt + * FunctionCallOutputItem and not execute the tool. No hooks fire. * - `hook_blocked`: PreToolUse returned `block` (boolean true or a reason * string). The caller should synthesize a denied result without invoking * the tool. The FunctionCallOutputItem is prebuilt for convenience. @@ -809,6 +812,12 @@ export class ModelResult< turnContext: TurnContext, onPreliminaryResult?: (toolCallId: string, result: unknown) => void, ): Promise< + | { + type: 'parse_error'; + toolCall: ParsedToolCall; + errorMessage: string; + output: models.FunctionCallOutputItem; + } | { type: 'hook_blocked'; toolCall: ParsedToolCall; @@ -821,15 +830,50 @@ export class ModelResult< result: Awaited>; } > { + // Reject raw-string arguments before any hook fires. When the model + // produces invalid JSON, the parser leaves `toolCall.arguments` as the + // raw string; handing that to PreToolUse would either fail payload + // validation (silent no-op in non-strict mode) or deliver a malformed + // `toolInput` to handlers. Fail closed here so every execution path + // (auto-approve, manual approval, approved-on-resume) gets a consistent + // synthetic error without running the tool or firing hooks. + const rawArgs: unknown = toolCall.arguments; + if (typeof rawArgs === 'string') { + const errorMessage = + `Failed to parse tool call arguments for "${toolCall.name}": The model provided invalid JSON. ` + + `Raw arguments received: "${rawArgs}". ` + + 'Please provide valid JSON arguments for this tool call.'; + return { + type: 'parse_error', + toolCall, + errorMessage, + output: { + type: 'function_call_output' as const, + id: `output_${toolCall.id}`, + callId: toolCall.id, + output: JSON.stringify({ + error: errorMessage, + }), + }, + }; + } + let effectiveToolCall = toolCall; // Emit PreToolUse hook -- can block or mutate input. if (this.hooksManager) { + // Sentinel so downstream can tell "handler returned a replacement" apart + // from "handler didn't touch toolInput": a `??` fallback to `{}` when + // `toolCall.arguments` is null/undefined would otherwise defeat a + // reference-equality check against the original arguments, producing + // `{}` for tools that legitimately distinguish "no args" from "empty + // args". `mutationApplied` is the authoritative signal. + const originalToolInput = (toolCall.arguments ?? {}) as Record; const preResult = await this.hooksManager.emit( 'PreToolUse', { toolName: toolCall.name, - toolInput: (toolCall.arguments ?? {}) as Record, + toolInput: originalToolInput, sessionId: this.currentState?.id ?? '', }, { @@ -860,9 +904,12 @@ export class ModelResult< }; } - // Apply mutated input if present. + // Apply mutated input when a handler actually piped a replacement. + // `mutationApplied` is derived from an explicit `mutatedInput` result + // in the handler chain; see executeHandlerChain / applyMutations. const finalInput = preResult.finalPayload.toolInput; - if (finalInput !== toolCall.arguments) { + const mutationApplied = finalInput !== originalToolInput; + if (mutationApplied) { effectiveToolCall = { ...toolCall, arguments: finalInput, @@ -1068,13 +1115,21 @@ export class ModelResult< } // Route through runToolWithHooks so PreToolUse/PostToolUse fire even on - // the auto-approve path. + // the auto-approve path. `runToolWithHooks` also fails closed on raw + // JSON-parse failures so hooks never see a malformed payload. const hookOutcome = await this.runToolWithHooks( tool, tc as ParsedToolCall, turnContext, ); + if (hookOutcome.type === 'parse_error') { + this.broadcastToolResult(tc.id, { + error: hookOutcome.errorMessage, + } as InferToolOutputsUnion); + return createRejectedResult(tc.id, String(tc.name), hookOutcome.errorMessage); + } + if (hookOutcome.type === 'hook_blocked') { return createRejectedResult(tc.id, String(tc.name), hookOutcome.reason); } @@ -1246,33 +1301,6 @@ export class ModelResult< return null; } - // Check if arguments failed to parse (remained as string instead of object) - const args: unknown = toolCall.arguments; - if (typeof args === 'string') { - const rawArgs = args; - const errorMessage = - `Failed to parse tool call arguments for "${toolCall.name}": The model provided invalid JSON. ` + - `Raw arguments received: "${rawArgs}". ` + - 'Please provide valid JSON arguments for this tool call.'; - - this.broadcastToolResult(toolCall.id, { - error: errorMessage, - } as InferToolOutputsUnion); - - return { - type: 'parse_error' as const, - toolCall, - output: { - type: 'function_call_output' as const, - id: `output_${toolCall.id}`, - callId: toolCall.id, - output: JSON.stringify({ - error: errorMessage, - }), - }, - }; - } - const preliminaryResultsForCall: InferToolEventsUnion[] = []; const hasBroadcaster = this.toolEventBroadcaster || this.turnBroadcaster; @@ -1284,13 +1312,22 @@ export class ModelResult< } : undefined; - // Run the tool through the full Pre/Post lifecycle hooks. + // Run the tool through the full Pre/Post lifecycle hooks. The helper + // fails closed on a JSON-parse failure in toolCall.arguments so hooks + // never see a malformed payload; the caller below handles that case + // via the shared `parse_error` / `hook_blocked` branch. const executed = await this.runToolWithHooks( tool, toolCall, turnContext, onPreliminaryResult, ); + if (executed.type === 'parse_error') { + this.broadcastToolResult(toolCall.id, { + error: executed.errorMessage, + } as InferToolOutputsUnion); + return executed; + } if (executed.type === 'hook_blocked') { return executed; } @@ -1847,6 +1884,16 @@ export class ModelResult< turnContext, ); + if (hookOutcome.type === 'parse_error') { + this.broadcastToolResult(callId, { + error: hookOutcome.errorMessage, + } as InferToolOutputsUnion); + unsentResults.push( + createRejectedResult(callId, String(toolCall.name), hookOutcome.errorMessage), + ); + continue; + } + if (hookOutcome.type === 'hook_blocked') { unsentResults.push(createRejectedResult(callId, String(toolCall.name), hookOutcome.reason)); continue; diff --git a/packages/agent/tests/unit/hooks-emit-adversarial.test.ts b/packages/agent/tests/unit/hooks-emit-adversarial.test.ts index 0d169ea..c65578c 100644 --- a/packages/agent/tests/unit/hooks-emit-adversarial.test.ts +++ b/packages/agent/tests/unit/hooks-emit-adversarial.test.ts @@ -610,6 +610,76 @@ describe('executeHandlerChain (adversarial)', () => { }); }); + describe('abort signal propagation', () => { + it('bails out of the chain when context.signal aborts mid-chain', async () => { + const controller = new AbortController(); + const firstHandler = vi.fn(async () => { + controller.abort(); + return undefined; + }); + const secondHandler = vi.fn(); + const thirdHandler = vi.fn(); + + const entries: HookEntry[] = [ + { + handler: firstHandler, + }, + { + handler: secondHandler, + }, + { + handler: thirdHandler, + }, + ]; + + await executeHandlerChain( + entries, + {}, + { + signal: controller.signal, + hookName: 'Test', + sessionId: 's', + }, + { + hookName: 'Test', + throwOnHandlerError: false, + }, + ); + + expect(firstHandler).toHaveBeenCalledOnce(); + expect(secondHandler).not.toHaveBeenCalled(); + expect(thirdHandler).not.toHaveBeenCalled(); + }); + + it('skips every handler when signal is already aborted before the chain', async () => { + const controller = new AbortController(); + controller.abort(); + const handler = vi.fn(); + + const entries: HookEntry[] = [ + { + handler, + }, + ]; + + await executeHandlerChain( + entries, + {}, + { + signal: controller.signal, + hookName: 'Test', + sessionId: 's', + }, + { + hookName: 'Test', + throwOnHandlerError: false, + }, + ); + + expect(handler).not.toHaveBeenCalled(); + }); + }); + describe('large chain stress', () => { it('handles 1000 handlers without stack overflow', async () => { const entries: HookEntry< diff --git a/packages/agent/tests/unit/model-result-hooks.test.ts b/packages/agent/tests/unit/model-result-hooks.test.ts index 09a75c9..a3039e3 100644 --- a/packages/agent/tests/unit/model-result-hooks.test.ts +++ b/packages/agent/tests/unit/model-result-hooks.test.ts @@ -97,6 +97,8 @@ type InternalModelResult = { type: string; result?: unknown; reason?: string; + errorMessage?: string; + effectiveToolCall?: ParsedToolCall; }>; emitPermissionRequest: (toolCall: ParsedToolCall) => Promise<{ decision: 'allow' | 'deny' | 'ask_user'; @@ -293,6 +295,132 @@ describe('ModelResult hooks integration', () => { expect(fail).toHaveBeenCalledTimes(1); expect(post).not.toHaveBeenCalled(); }); + + it('returns parse_error and skips hooks when arguments failed to parse', async () => { + const hooks = new HooksManager(); + const pre = vi.fn(); + const post = vi.fn(); + const fail = vi.fn(); + hooks.on('PreToolUse', { + handler: pre, + }); + hooks.on('PostToolUse', { + handler: post, + }); + hooks.on('PostToolUseFailure', { + handler: fail, + }); + + const execSpy = vi.fn(); + const tool = makeAutoTool('parsebomb', () => { + execSpy(); + return { + ran: true, + }; + }); + const m = buildModelResult({ + tools: [ + tool, + ] as const, + hooks, + }); + + // Simulate what the stream parser leaves when JSON.parse fails on the + // raw arguments: `arguments` stays a string instead of becoming an + // object. The hook chain must not see this payload. + const result = await internal(m).runToolWithHooks( + tool, + { + id: 'call_parse', + name: 'parsebomb', + arguments: '{not valid json' as unknown as Record, + }, + { + numberOfTurns: 1, + }, + ); + + expect(result.type).toBe('parse_error'); + expect(result.errorMessage).toContain('Failed to parse tool call arguments'); + expect(execSpy).not.toHaveBeenCalled(); + expect(pre).not.toHaveBeenCalled(); + expect(post).not.toHaveBeenCalled(); + expect(fail).not.toHaveBeenCalled(); + }); + + it('preserves original null arguments on effectiveToolCall when no PreToolUse mutation fires', async () => { + // Regression: reference-equality against the coerced `{}` payload used + // to look like a mutation had occurred and overwrite `arguments` with + // `{}`, erasing the distinction between "no args" and "empty args". + // The fix tracks mutation application explicitly. + const hooks = new HooksManager(); + const pre = vi.fn(); + hooks.on('PreToolUse', { + handler: pre, + }); + + const tool = makeAutoTool('nopargs'); + const m = buildModelResult({ + tools: [ + tool, + ] as const, + hooks, + }); + + const result = await internal(m).runToolWithHooks( + tool, + { + id: 'call_noargs', + name: 'nopargs', + arguments: null as unknown as Record, + }, + { + numberOfTurns: 1, + }, + ); + + expect(result.type).toBe('execution'); + expect(pre).toHaveBeenCalledTimes(1); + // effectiveToolCall.arguments must pass through unchanged when no + // handler piped a mutation — NOT silently coerced to {}. + expect(result.effectiveToolCall?.arguments).toBeNull(); + }); + + it('applies mutatedInput from PreToolUse even when original arguments were null', async () => { + const hooks = new HooksManager(); + hooks.on('PreToolUse', { + handler: () => ({ + mutatedInput: { + injected: true, + }, + }), + }); + + const tool = makeAutoTool('withmutation'); + const m = buildModelResult({ + tools: [ + tool, + ] as const, + hooks, + }); + + const result = await internal(m).runToolWithHooks( + tool, + { + id: 'call_mut', + name: 'withmutation', + arguments: null as unknown as Record, + }, + { + numberOfTurns: 1, + }, + ); + + expect(result.type).toBe('execution'); + expect(result.effectiveToolCall?.arguments).toEqual({ + injected: true, + }); + }); }); describe('executeAutoApproveTools', () => {