From e0389baaa41f02d18ac550c4c960692261edb7b9 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Sun, 15 Mar 2026 20:19:04 -0700 Subject: [PATCH 01/12] feat(concurrency): bullmq based queueing system --- apps/sim/app/api/jobs/[jobId]/route.test.ts | 160 +++++ apps/sim/app/api/jobs/[jobId]/route.ts | 56 +- .../app/api/schedules/execute/route.test.ts | 85 ++- apps/sim/app/api/schedules/execute/route.ts | 63 +- .../app/api/webhooks/trigger/[path]/route.ts | 50 +- .../[id]/execute/route.async.test.ts | 35 +- .../app/api/workflows/[id]/execute/route.ts | 279 ++++++++- apps/sim/background/schedule-execution.ts | 1 + apps/sim/background/workflow-execution.ts | 1 + .../workspace-notification-delivery.ts | 218 ++++++- apps/sim/lib/billing/subscriptions/utils.ts | 20 +- apps/sim/lib/billing/types/index.ts | 40 +- apps/sim/lib/billing/webhooks/enterprise.ts | 35 +- .../lib/billing/workspace-concurrency.test.ts | 146 +++++ apps/sim/lib/billing/workspace-concurrency.ts | 170 ++++++ apps/sim/lib/core/admission/gate.ts | 60 ++ .../lib/core/async-jobs/backends/bullmq.ts | 106 ++++ .../sim/lib/core/async-jobs/backends/index.ts | 1 + apps/sim/lib/core/async-jobs/config.ts | 46 +- apps/sim/lib/core/async-jobs/index.ts | 1 + apps/sim/lib/core/async-jobs/types.ts | 6 +- apps/sim/lib/core/bullmq/connection.ts | 29 + apps/sim/lib/core/bullmq/index.ts | 16 + apps/sim/lib/core/bullmq/queues.ts | 196 ++++++ apps/sim/lib/core/config/env.ts | 9 + .../lib/core/workspace-dispatch/adapter.ts | 80 +++ .../workspace-dispatch/dispatcher.test.ts | 175 ++++++ .../lib/core/workspace-dispatch/dispatcher.ts | 156 +++++ .../lib/core/workspace-dispatch/factory.ts | 42 ++ apps/sim/lib/core/workspace-dispatch/index.ts | 32 + .../workspace-dispatch/memory-store.test.ts | 65 ++ .../core/workspace-dispatch/memory-store.ts | 478 +++++++++++++++ .../lib/core/workspace-dispatch/planner.ts | 154 +++++ .../workspace-dispatch/reconciler.test.ts | 225 +++++++ .../lib/core/workspace-dispatch/reconciler.ts | 196 ++++++ .../core/workspace-dispatch/redis-store.ts | 574 ++++++++++++++++++ .../core/workspace-dispatch/status.test.ts | 102 ++++ .../sim/lib/core/workspace-dispatch/status.ts | 110 ++++ apps/sim/lib/core/workspace-dispatch/store.ts | 193 ++++++ apps/sim/lib/core/workspace-dispatch/types.ts | 107 ++++ .../core/workspace-dispatch/worker.test.ts | 98 +++ .../sim/lib/core/workspace-dispatch/worker.ts | 104 ++++ apps/sim/lib/execution/buffered-stream.ts | 111 ++++ .../lib/knowledge/connectors/sync-engine.ts | 83 ++- .../knowledge/documents/document-processor.ts | 14 +- .../documents/parser-extension.test.ts | 27 + .../knowledge/documents/parser-extension.ts | 48 ++ apps/sim/lib/knowledge/documents/queue.ts | 227 ------- apps/sim/lib/knowledge/documents/service.ts | 178 +++--- apps/sim/lib/logs/events.ts | 6 + .../lib/notifications/inactivity-polling.ts | 3 + .../uploads/utils/user-file-base64.server.ts | 8 +- apps/sim/lib/webhooks/processor.test.ts | 26 +- apps/sim/lib/webhooks/processor.ts | 112 ++-- .../workflows/executor/execution-events.ts | 61 +- .../executor/queued-workflow-execution.ts | 339 +++++++++++ apps/sim/lib/workflows/utils.ts | 8 +- apps/sim/package.json | 5 +- apps/sim/worker/health.ts | 77 +++ apps/sim/worker/index.ts | 190 ++++++ .../processors/knowledge-connector-sync.ts | 22 + .../knowledge-document-processing.ts | 26 + .../processors/mothership-job-execution.ts | 20 + apps/sim/worker/processors/schedule.ts | 21 + apps/sim/worker/processors/webhook.ts | 21 + apps/sim/worker/processors/workflow.ts | 51 ++ .../workspace-notification-delivery.ts | 32 + bun.lock | 30 +- docker-compose.prod.yml | 40 ++ helm/sim/values.yaml | 5 + 70 files changed, 5871 insertions(+), 640 deletions(-) create mode 100644 apps/sim/app/api/jobs/[jobId]/route.test.ts create mode 100644 apps/sim/lib/billing/workspace-concurrency.test.ts create mode 100644 apps/sim/lib/billing/workspace-concurrency.ts create mode 100644 apps/sim/lib/core/admission/gate.ts create mode 100644 apps/sim/lib/core/async-jobs/backends/bullmq.ts create mode 100644 apps/sim/lib/core/bullmq/connection.ts create mode 100644 apps/sim/lib/core/bullmq/index.ts create mode 100644 apps/sim/lib/core/bullmq/queues.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/adapter.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/dispatcher.test.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/dispatcher.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/factory.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/index.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/memory-store.test.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/memory-store.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/planner.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/reconciler.test.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/reconciler.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/redis-store.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/status.test.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/status.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/store.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/types.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/worker.test.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/worker.ts create mode 100644 apps/sim/lib/execution/buffered-stream.ts create mode 100644 apps/sim/lib/knowledge/documents/parser-extension.test.ts create mode 100644 apps/sim/lib/knowledge/documents/parser-extension.ts delete mode 100644 apps/sim/lib/knowledge/documents/queue.ts create mode 100644 apps/sim/lib/workflows/executor/queued-workflow-execution.ts create mode 100644 apps/sim/worker/health.ts create mode 100644 apps/sim/worker/index.ts create mode 100644 apps/sim/worker/processors/knowledge-connector-sync.ts create mode 100644 apps/sim/worker/processors/knowledge-document-processing.ts create mode 100644 apps/sim/worker/processors/mothership-job-execution.ts create mode 100644 apps/sim/worker/processors/schedule.ts create mode 100644 apps/sim/worker/processors/webhook.ts create mode 100644 apps/sim/worker/processors/workflow.ts create mode 100644 apps/sim/worker/processors/workspace-notification-delivery.ts diff --git a/apps/sim/app/api/jobs/[jobId]/route.test.ts b/apps/sim/app/api/jobs/[jobId]/route.test.ts new file mode 100644 index 00000000000..050c0bee2c5 --- /dev/null +++ b/apps/sim/app/api/jobs/[jobId]/route.test.ts @@ -0,0 +1,160 @@ +/** + * @vitest-environment node + */ +import type { NextRequest } from 'next/server' +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { + mockCheckHybridAuth, + mockGetDispatchJobRecord, + mockGetJobQueue, + mockVerifyWorkflowAccess, + mockGetWorkflowById, +} = vi.hoisted(() => ({ + mockCheckHybridAuth: vi.fn(), + mockGetDispatchJobRecord: vi.fn(), + mockGetJobQueue: vi.fn(), + mockVerifyWorkflowAccess: vi.fn(), + mockGetWorkflowById: vi.fn(), +})) + +vi.mock('@sim/logger', () => ({ + createLogger: () => ({ + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }), +})) + +vi.mock('@/lib/auth/hybrid', () => ({ + checkHybridAuth: mockCheckHybridAuth, +})) + +vi.mock('@/lib/core/async-jobs', () => ({ + JOB_STATUS: { + PENDING: 'pending', + PROCESSING: 'processing', + COMPLETED: 'completed', + FAILED: 'failed', + }, + getJobQueue: mockGetJobQueue, +})) + +vi.mock('@/lib/core/workspace-dispatch/store', () => ({ + getDispatchJobRecord: mockGetDispatchJobRecord, +})) + +vi.mock('@/lib/core/utils/request', () => ({ + generateRequestId: vi.fn().mockReturnValue('request-1'), +})) + +vi.mock('@/socket/middleware/permissions', () => ({ + verifyWorkflowAccess: mockVerifyWorkflowAccess, +})) + +vi.mock('@/lib/workflows/utils', () => ({ + getWorkflowById: mockGetWorkflowById, +})) + +import { GET } from './route' + +function createMockRequest(): NextRequest { + return { + headers: { + get: () => null, + }, + } as NextRequest +} + +describe('GET /api/jobs/[jobId]', () => { + beforeEach(() => { + vi.clearAllMocks() + + mockCheckHybridAuth.mockResolvedValue({ + success: true, + userId: 'user-1', + apiKeyType: undefined, + workspaceId: undefined, + }) + + mockVerifyWorkflowAccess.mockResolvedValue({ hasAccess: true }) + mockGetWorkflowById.mockResolvedValue({ + id: 'workflow-1', + workspaceId: 'workspace-1', + }) + + mockGetJobQueue.mockResolvedValue({ + getJob: vi.fn().mockResolvedValue(null), + }) + }) + + it('returns dispatcher-aware waiting status with metadata', async () => { + mockGetDispatchJobRecord.mockResolvedValue({ + id: 'dispatch-1', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: { + workflowId: 'workflow-1', + }, + priority: 10, + status: 'waiting', + createdAt: 1000, + admittedAt: 2000, + }) + + const response = await GET(createMockRequest(), { + params: Promise.resolve({ jobId: 'dispatch-1' }), + }) + const body = await response.json() + + expect(response.status).toBe(200) + expect(body.status).toBe('waiting') + expect(body.metadata.queueName).toBe('workflow-execution') + expect(body.metadata.lane).toBe('runtime') + expect(body.metadata.workspaceId).toBe('workspace-1') + }) + + it('returns completed output from dispatch state', async () => { + mockGetDispatchJobRecord.mockResolvedValue({ + id: 'dispatch-2', + workspaceId: 'workspace-1', + lane: 'interactive', + queueName: 'workflow-execution', + bullmqJobName: 'direct-workflow-execution', + bullmqPayload: {}, + metadata: { + workflowId: 'workflow-1', + }, + priority: 1, + status: 'completed', + createdAt: 1000, + startedAt: 2000, + completedAt: 7000, + output: { success: true }, + }) + + const response = await GET(createMockRequest(), { + params: Promise.resolve({ jobId: 'dispatch-2' }), + }) + const body = await response.json() + + expect(response.status).toBe(200) + expect(body.status).toBe('completed') + expect(body.output).toEqual({ success: true }) + expect(body.metadata.duration).toBe(5000) + }) + + it('returns 404 when neither dispatch nor BullMQ job exists', async () => { + mockGetDispatchJobRecord.mockResolvedValue(null) + + const response = await GET(createMockRequest(), { + params: Promise.resolve({ jobId: 'missing-job' }), + }) + + expect(response.status).toBe(404) + }) +}) diff --git a/apps/sim/app/api/jobs/[jobId]/route.ts b/apps/sim/app/api/jobs/[jobId]/route.ts index cb8a43a80de..aed0a106f8c 100644 --- a/apps/sim/app/api/jobs/[jobId]/route.ts +++ b/apps/sim/app/api/jobs/[jobId]/route.ts @@ -1,8 +1,10 @@ import { createLogger } from '@sim/logger' import { type NextRequest, NextResponse } from 'next/server' import { checkHybridAuth } from '@/lib/auth/hybrid' -import { getJobQueue, JOB_STATUS } from '@/lib/core/async-jobs' +import { getJobQueue } from '@/lib/core/async-jobs' import { generateRequestId } from '@/lib/core/utils/request' +import { presentDispatchOrJobStatus } from '@/lib/core/workspace-dispatch/status' +import { getDispatchJobRecord } from '@/lib/core/workspace-dispatch/store' import { createErrorResponse } from '@/app/api/workflows/utils' const logger = createLogger('TaskStatusAPI') @@ -23,68 +25,54 @@ export async function GET( const authenticatedUserId = authResult.userId + const dispatchJob = await getDispatchJobRecord(taskId) const jobQueue = await getJobQueue() - const job = await jobQueue.getJob(taskId) + const job = dispatchJob ? await jobQueue.getJob(taskId) : await jobQueue.getJob(taskId) - if (!job) { + if (!job && !dispatchJob) { return createErrorResponse('Task not found', 404) } - if (job.metadata?.workflowId) { + const metadataToCheck = dispatchJob?.metadata ?? job?.metadata + + if (metadataToCheck?.workflowId) { const { verifyWorkflowAccess } = await import('@/socket/middleware/permissions') const accessCheck = await verifyWorkflowAccess( authenticatedUserId, - job.metadata.workflowId as string + metadataToCheck.workflowId as string ) if (!accessCheck.hasAccess) { - logger.warn(`[${requestId}] Access denied to workflow ${job.metadata.workflowId}`) + logger.warn(`[${requestId}] Access denied to workflow ${metadataToCheck.workflowId}`) return createErrorResponse('Access denied', 403) } if (authResult.apiKeyType === 'workspace' && authResult.workspaceId) { const { getWorkflowById } = await import('@/lib/workflows/utils') - const workflow = await getWorkflowById(job.metadata.workflowId as string) + const workflow = await getWorkflowById(metadataToCheck.workflowId as string) if (!workflow?.workspaceId || workflow.workspaceId !== authResult.workspaceId) { return createErrorResponse('API key is not authorized for this workspace', 403) } } - } else if (job.metadata?.userId && job.metadata.userId !== authenticatedUserId) { - logger.warn(`[${requestId}] Access denied to user ${job.metadata.userId}`) + } else if (metadataToCheck?.userId && metadataToCheck.userId !== authenticatedUserId) { + logger.warn(`[${requestId}] Access denied to user ${metadataToCheck.userId}`) return createErrorResponse('Access denied', 403) - } else if (!job.metadata?.userId && !job.metadata?.workflowId) { + } else if (!metadataToCheck?.userId && !metadataToCheck?.workflowId) { logger.warn(`[${requestId}] Access denied to job ${taskId}`) return createErrorResponse('Access denied', 403) } - const mappedStatus = job.status === JOB_STATUS.PENDING ? 'queued' : job.status - + const presented = presentDispatchOrJobStatus(dispatchJob, job) const response: any = { success: true, taskId, - status: mappedStatus, - metadata: { - startedAt: job.startedAt, - }, - } - - if (job.status === JOB_STATUS.COMPLETED) { - response.output = job.output - response.metadata.completedAt = job.completedAt - if (job.startedAt && job.completedAt) { - response.metadata.duration = job.completedAt.getTime() - job.startedAt.getTime() - } - } - - if (job.status === JOB_STATUS.FAILED) { - response.error = job.error - response.metadata.completedAt = job.completedAt - if (job.startedAt && job.completedAt) { - response.metadata.duration = job.completedAt.getTime() - job.startedAt.getTime() - } + status: presented.status, + metadata: presented.metadata, } - if (job.status === JOB_STATUS.PROCESSING || job.status === JOB_STATUS.PENDING) { - response.estimatedDuration = 300000 + if (presented.output !== undefined) response.output = presented.output + if (presented.error !== undefined) response.error = presented.error + if (presented.estimatedDuration !== undefined) { + response.estimatedDuration = presented.estimatedDuration } return NextResponse.json(response) diff --git a/apps/sim/app/api/schedules/execute/route.test.ts b/apps/sim/app/api/schedules/execute/route.test.ts index cfdf6c3877b..80c59e537d1 100644 --- a/apps/sim/app/api/schedules/execute/route.test.ts +++ b/apps/sim/app/api/schedules/execute/route.test.ts @@ -9,10 +9,12 @@ import { beforeEach, describe, expect, it, vi } from 'vitest' const { mockVerifyCronAuth, mockExecuteScheduleJob, + mockExecuteJobInline, mockFeatureFlags, mockDbReturning, mockDbUpdate, mockEnqueue, + mockEnqueueWorkspaceDispatch, mockStartJob, mockCompleteJob, mockMarkJobFailed, @@ -22,6 +24,7 @@ const { const mockDbSet = vi.fn().mockReturnValue({ where: mockDbWhere }) const mockDbUpdate = vi.fn().mockReturnValue({ set: mockDbSet }) const mockEnqueue = vi.fn().mockResolvedValue('job-id-1') + const mockEnqueueWorkspaceDispatch = vi.fn().mockResolvedValue('job-id-1') const mockStartJob = vi.fn().mockResolvedValue(undefined) const mockCompleteJob = vi.fn().mockResolvedValue(undefined) const mockMarkJobFailed = vi.fn().mockResolvedValue(undefined) @@ -29,6 +32,7 @@ const { return { mockVerifyCronAuth: vi.fn().mockReturnValue(null), mockExecuteScheduleJob: vi.fn().mockResolvedValue(undefined), + mockExecuteJobInline: vi.fn().mockResolvedValue(undefined), mockFeatureFlags: { isTriggerDevEnabled: false, isHosted: false, @@ -38,6 +42,7 @@ const { mockDbReturning, mockDbUpdate, mockEnqueue, + mockEnqueueWorkspaceDispatch, mockStartJob, mockCompleteJob, mockMarkJobFailed, @@ -50,6 +55,8 @@ vi.mock('@/lib/auth/internal', () => ({ vi.mock('@/background/schedule-execution', () => ({ executeScheduleJob: mockExecuteScheduleJob, + executeJobInline: mockExecuteJobInline, + releaseScheduleLock: vi.fn().mockResolvedValue(undefined), })) vi.mock('@/lib/core/config/feature-flags', () => mockFeatureFlags) @@ -68,6 +75,22 @@ vi.mock('@/lib/core/async-jobs', () => ({ shouldExecuteInline: vi.fn().mockReturnValue(false), })) +vi.mock('@/lib/core/bullmq', () => ({ + isBullMQEnabled: vi.fn().mockReturnValue(true), + createBullMQJobData: vi.fn((payload: unknown) => ({ payload })), +})) + +vi.mock('@/lib/core/workspace-dispatch', () => ({ + enqueueWorkspaceDispatch: mockEnqueueWorkspaceDispatch, +})) + +vi.mock('@/lib/workflows/utils', () => ({ + getWorkflowById: vi.fn().mockResolvedValue({ + id: 'workflow-1', + workspaceId: 'workspace-1', + }), +})) + vi.mock('drizzle-orm', () => ({ and: vi.fn((...conditions: unknown[]) => ({ type: 'and', conditions })), eq: vi.fn((field: unknown, value: unknown) => ({ field, value, type: 'eq' })), @@ -142,6 +165,18 @@ const MULTIPLE_SCHEDULES = [ }, ] +const SINGLE_JOB = [ + { + id: 'job-1', + cronExpression: '0 * * * *', + failedCount: 0, + lastQueuedAt: undefined, + sourceUserId: 'user-1', + sourceWorkspaceId: 'workspace-1', + sourceType: 'job', + }, +] + function createMockRequest(): NextRequest { const mockHeaders = new Map([ ['authorization', 'Bearer test-cron-secret'], @@ -211,30 +246,44 @@ describe('Scheduled Workflow Execution API Route', () => { expect(data).toHaveProperty('executedCount', 2) }) + it('should queue mothership jobs to BullMQ when available', async () => { + mockDbReturning.mockReturnValueOnce([]).mockReturnValueOnce(SINGLE_JOB) + + const response = await GET(createMockRequest()) + + expect(response.status).toBe(200) + expect(mockEnqueueWorkspaceDispatch).toHaveBeenCalledWith( + expect.objectContaining({ + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'mothership-job-execution', + bullmqJobName: 'mothership-job-execution', + bullmqPayload: { + payload: { + scheduleId: 'job-1', + cronExpression: '0 * * * *', + failedCount: 0, + now: expect.any(String), + }, + }, + }) + ) + expect(mockExecuteJobInline).not.toHaveBeenCalled() + }) + it('should enqueue preassigned correlation metadata for schedules', async () => { mockDbReturning.mockReturnValue(SINGLE_SCHEDULE) const response = await GET(createMockRequest()) expect(response.status).toBe(200) - expect(mockEnqueue).toHaveBeenCalledWith( - 'schedule-execution', + expect(mockEnqueueWorkspaceDispatch).toHaveBeenCalledWith( expect.objectContaining({ - scheduleId: 'schedule-1', - workflowId: 'workflow-1', - executionId: 'schedule-execution-1', - requestId: 'test-request-id', - correlation: { - executionId: 'schedule-execution-1', - requestId: 'test-request-id', - source: 'schedule', - workflowId: 'workflow-1', - scheduleId: 'schedule-1', - triggerType: 'schedule', - scheduledFor: '2025-01-01T00:00:00.000Z', - }, - }), - { + id: 'schedule-execution-1', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'schedule-execution', + bullmqJobName: 'schedule-execution', metadata: { workflowId: 'workflow-1', correlation: { @@ -247,7 +296,7 @@ describe('Scheduled Workflow Execution API Route', () => { scheduledFor: '2025-01-01T00:00:00.000Z', }, }, - } + }) ) }) }) diff --git a/apps/sim/app/api/schedules/execute/route.ts b/apps/sim/app/api/schedules/execute/route.ts index cef36bfb25b..1744e5743ff 100644 --- a/apps/sim/app/api/schedules/execute/route.ts +++ b/apps/sim/app/api/schedules/execute/route.ts @@ -5,7 +5,9 @@ import { type NextRequest, NextResponse } from 'next/server' import { v4 as uuidv4 } from 'uuid' import { verifyCronAuth } from '@/lib/auth/internal' import { getJobQueue, shouldExecuteInline } from '@/lib/core/async-jobs' +import { createBullMQJobData, isBullMQEnabled } from '@/lib/core/bullmq' import { generateRequestId } from '@/lib/core/utils/request' +import { enqueueWorkspaceDispatch } from '@/lib/core/workspace-dispatch' import { executeJobInline, executeScheduleJob, @@ -73,6 +75,8 @@ export async function GET(request: NextRequest) { cronExpression: workflowSchedule.cronExpression, failedCount: workflowSchedule.failedCount, lastQueuedAt: workflowSchedule.lastQueuedAt, + sourceWorkspaceId: workflowSchedule.sourceWorkspaceId, + sourceUserId: workflowSchedule.sourceUserId, sourceType: workflowSchedule.sourceType, }) @@ -111,9 +115,41 @@ export async function GET(request: NextRequest) { } try { - const jobId = await jobQueue.enqueue('schedule-execution', payload, { - metadata: { workflowId: schedule.workflowId ?? undefined, correlation }, - }) + const workspaceId = schedule.workflowId + ? (await import('@/lib/workflows/utils')).getWorkflowById + : null + const resolvedWorkflow = + schedule.workflowId && workspaceId ? await workspaceId(schedule.workflowId) : null + const resolvedWorkspaceId = resolvedWorkflow?.workspaceId + + let jobId: string + if (isBullMQEnabled()) { + if (!resolvedWorkspaceId) { + throw new Error( + `Missing workspace for scheduled workflow ${schedule.workflowId}; refusing to bypass workspace admission` + ) + } + + jobId = await enqueueWorkspaceDispatch({ + id: executionId, + workspaceId: resolvedWorkspaceId, + lane: 'runtime', + queueName: 'schedule-execution', + bullmqJobName: 'schedule-execution', + bullmqPayload: createBullMQJobData(payload, { + workflowId: schedule.workflowId ?? undefined, + correlation, + }), + metadata: { + workflowId: schedule.workflowId ?? undefined, + correlation, + }, + }) + } else { + jobId = await jobQueue.enqueue('schedule-execution', payload, { + metadata: { workflowId: schedule.workflowId ?? undefined, correlation }, + }) + } logger.info( `[${requestId}] Queued schedule execution task ${jobId} for workflow ${schedule.workflowId}` ) @@ -165,7 +201,7 @@ export async function GET(request: NextRequest) { } }) - // Jobs always execute inline (no TriggerDev) + // Mothership jobs use BullMQ when available, otherwise direct inline execution. const jobPromises = dueJobs.map(async (job) => { const queueTime = job.lastQueuedAt ?? queuedAt const payload = { @@ -176,7 +212,24 @@ export async function GET(request: NextRequest) { } try { - await executeJobInline(payload) + if (isBullMQEnabled()) { + if (!job.sourceWorkspaceId || !job.sourceUserId) { + throw new Error(`Mothership job ${job.id} is missing workspace/user ownership`) + } + + await enqueueWorkspaceDispatch({ + workspaceId: job.sourceWorkspaceId!, + lane: 'runtime', + queueName: 'mothership-job-execution', + bullmqJobName: 'mothership-job-execution', + bullmqPayload: createBullMQJobData(payload), + metadata: { + userId: job.sourceUserId, + }, + }) + } else { + await executeJobInline(payload) + } } catch (error) { logger.error(`[${requestId}] Job execution failed for ${job.id}`, { error: error instanceof Error ? error.message : String(error), diff --git a/apps/sim/app/api/webhooks/trigger/[path]/route.ts b/apps/sim/app/api/webhooks/trigger/[path]/route.ts index 56304c3e850..2c283b72fdb 100644 --- a/apps/sim/app/api/webhooks/trigger/[path]/route.ts +++ b/apps/sim/app/api/webhooks/trigger/[path]/route.ts @@ -1,6 +1,8 @@ import { createLogger } from '@sim/logger' import { type NextRequest, NextResponse } from 'next/server' +import { admissionRejectedResponse, tryAdmit } from '@/lib/core/admission/gate' import { generateRequestId } from '@/lib/core/utils/request' +import { DispatchQueueFullError } from '@/lib/core/workspace-dispatch' import { checkWebhookPreprocessing, findAllWebhooksForPath, @@ -41,10 +43,25 @@ export async function POST( request: NextRequest, { params }: { params: Promise<{ path: string }> } ) { + const ticket = tryAdmit() + if (!ticket) { + return admissionRejectedResponse() + } + + try { + return await handleWebhookPost(request, params) + } finally { + ticket.release() + } +} + +async function handleWebhookPost( + request: NextRequest, + params: Promise<{ path: string }> +): Promise { const requestId = generateRequestId() const { path } = await params - // Handle provider challenges before body parsing (Microsoft Graph validationToken, etc.) const earlyChallenge = await handleProviderChallenges({}, request, requestId, path) if (earlyChallenge) { return earlyChallenge @@ -140,17 +157,30 @@ export async function POST( continue } - const response = await queueWebhookExecution(foundWebhook, foundWorkflow, body, request, { - requestId, - path, - actorUserId: preprocessResult.actorUserId, - executionId: preprocessResult.executionId, - correlation: preprocessResult.correlation, - }) - responses.push(response) + try { + const response = await queueWebhookExecution(foundWebhook, foundWorkflow, body, request, { + requestId, + path, + actorUserId: preprocessResult.actorUserId, + executionId: preprocessResult.executionId, + correlation: preprocessResult.correlation, + }) + responses.push(response) + } catch (error) { + if (error instanceof DispatchQueueFullError) { + return NextResponse.json( + { + error: 'Service temporarily at capacity', + message: error.message, + retryAfterSeconds: 10, + }, + { status: 503, headers: { 'Retry-After': '10' } } + ) + } + throw error + } } - // Return the last successful response, or a combined response for multiple webhooks if (responses.length === 0) { return new NextResponse('No webhooks processed successfully', { status: 500 }) } diff --git a/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts b/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts index 7d6c599dcfd..1a4e0bd980f 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts @@ -10,11 +10,13 @@ const { mockAuthorizeWorkflowByWorkspacePermission, mockPreprocessExecution, mockEnqueue, + mockEnqueueWorkspaceDispatch, } = vi.hoisted(() => ({ mockCheckHybridAuth: vi.fn(), mockAuthorizeWorkflowByWorkspacePermission: vi.fn(), mockPreprocessExecution: vi.fn(), mockEnqueue: vi.fn().mockResolvedValue('job-123'), + mockEnqueueWorkspaceDispatch: vi.fn().mockResolvedValue('job-123'), })) vi.mock('@/lib/auth/hybrid', () => ({ @@ -44,6 +46,16 @@ vi.mock('@/lib/core/async-jobs', () => ({ markJobFailed: vi.fn(), }), shouldExecuteInline: vi.fn().mockReturnValue(false), + shouldUseBullMQ: vi.fn().mockReturnValue(true), +})) + +vi.mock('@/lib/core/bullmq', () => ({ + createBullMQJobData: vi.fn((payload: unknown, metadata?: unknown) => ({ payload, metadata })), +})) + +vi.mock('@/lib/core/workspace-dispatch', () => ({ + enqueueWorkspaceDispatch: mockEnqueueWorkspaceDispatch, + waitForDispatchJob: vi.fn(), })) vi.mock('@/lib/core/utils/request', () => ({ @@ -132,22 +144,13 @@ describe('workflow execute async route', () => { expect(response.status).toBe(202) expect(body.executionId).toBe('execution-123') expect(body.jobId).toBe('job-123') - expect(mockEnqueue).toHaveBeenCalledWith( - 'workflow-execution', + expect(mockEnqueueWorkspaceDispatch).toHaveBeenCalledWith( expect.objectContaining({ - workflowId: 'workflow-1', - userId: 'actor-1', - executionId: 'execution-123', - requestId: 'req-12345678', - correlation: { - executionId: 'execution-123', - requestId: 'req-12345678', - source: 'workflow', - workflowId: 'workflow-1', - triggerType: 'manual', - }, - }), - { + id: 'execution-123', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', metadata: { workflowId: 'workflow-1', userId: 'actor-1', @@ -159,7 +162,7 @@ describe('workflow execute async route', () => { triggerType: 'manual', }, }, - } + }) ) }) }) diff --git a/apps/sim/app/api/workflows/[id]/execute/route.ts b/apps/sim/app/api/workflows/[id]/execute/route.ts index 8cee947272f..0b92df8aece 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.ts @@ -3,7 +3,9 @@ import { type NextRequest, NextResponse } from 'next/server' import { validate as uuidValidate, v4 as uuidv4 } from 'uuid' import { z } from 'zod' import { AuthType, checkHybridAuth } from '@/lib/auth/hybrid' -import { getJobQueue, shouldExecuteInline } from '@/lib/core/async-jobs' +import { admissionRejectedResponse, tryAdmit } from '@/lib/core/admission/gate' +import { getJobQueue, shouldExecuteInline, shouldUseBullMQ } from '@/lib/core/async-jobs' +import { createBullMQJobData } from '@/lib/core/bullmq' import { createTimeoutAbortController, getTimeoutErrorMessage, @@ -12,6 +14,13 @@ import { import { generateRequestId } from '@/lib/core/utils/request' import { SSE_HEADERS } from '@/lib/core/utils/sse' import { getBaseUrl } from '@/lib/core/utils/urls' +import { + DispatchQueueFullError, + enqueueWorkspaceDispatch, + type WorkspaceDispatchLane, + waitForDispatchJob, +} from '@/lib/core/workspace-dispatch' +import { createBufferedExecutionStream } from '@/lib/execution/buffered-stream' import { buildNextCallChain, parseCallChain, @@ -33,6 +42,11 @@ import { import { executeWorkflowCore } from '@/lib/workflows/executor/execution-core' import { type ExecutionEvent, encodeSSEEvent } from '@/lib/workflows/executor/execution-events' import { PauseResumeManager } from '@/lib/workflows/executor/human-in-the-loop-manager' +import { + DIRECT_WORKFLOW_JOB_NAME, + type QueuedWorkflowExecutionPayload, + type QueuedWorkflowExecutionResult, +} from '@/lib/workflows/executor/queued-workflow-execution' import { loadDeployedWorkflowState, loadWorkflowFromNormalizedTables, @@ -161,6 +175,7 @@ type AsyncExecutionParams = { requestId: string workflowId: string userId: string + workspaceId: string input: any triggerType: CoreTriggerType executionId: string @@ -168,7 +183,8 @@ type AsyncExecutionParams = { } async function handleAsyncExecution(params: AsyncExecutionParams): Promise { - const { requestId, workflowId, userId, input, triggerType, executionId, callChain } = params + const { requestId, workflowId, userId, workspaceId, input, triggerType, executionId, callChain } = + params const correlation = { executionId, @@ -181,6 +197,7 @@ async function handleAsyncExecution(params: AsyncExecutionParams): Promise { try { - await jobQueue.startJob(jobId) + await inlineJobQueue.startJob(jobId) const output = await executeWorkflowJob(payload) - await jobQueue.completeJob(jobId, output) + await inlineJobQueue.completeJob(jobId, output) } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error) logger.error(`[${requestId}] Async workflow execution failed`, { @@ -213,7 +249,7 @@ async function handleAsyncExecution(params: AsyncExecutionParams): Promise }) { + const ticket = tryAdmit() + if (!ticket) { + return admissionRejectedResponse() + } + + try { + return await handleExecutePost(req, params) + } finally { + ticket.release() + } +} + +async function handleExecutePost( + req: NextRequest, + params: Promise<{ id: string }> +): Promise { const requestId = generateRequestId() const { id: workflowId } = await params @@ -584,6 +672,7 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id: requestId, workflowId, userId: actorUserId, + workspaceId, input, triggerType: loggingTriggerType, executionId, @@ -676,30 +765,105 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id: if (!enableSSE) { logger.info(`[${requestId}] Using non-SSE execution (direct JSON response)`) + const metadata: ExecutionMetadata = { + requestId, + executionId, + workflowId, + workspaceId, + userId: actorUserId, + sessionUserId: isClientSession ? userId : undefined, + workflowUserId: workflow.userId, + triggerType, + useDraftState: shouldUseDraftState, + startTime: new Date().toISOString(), + isClientSession, + enforceCredentialAccess: useAuthenticatedUserAsActor, + workflowStateOverride: effectiveWorkflowStateOverride, + callChain, + } + + const executionVariables = cachedWorkflowData?.variables ?? workflow.variables ?? {} + + if (shouldUseBullMQ()) { + try { + const dispatchJobId = await enqueueDirectWorkflowExecution( + { + workflow, + metadata, + input: processedInput, + variables: executionVariables, + selectedOutputs, + includeFileBase64, + base64MaxBytes, + stopAfterBlockId, + timeoutMs: preprocessResult.executionTimeout?.sync, + runFromBlock: resolvedRunFromBlock, + }, + triggerType === 'manual' ? 1 : 5, + 'interactive' + ) + + const resultRecord = await waitForDispatchJob( + dispatchJobId, + (preprocessResult.executionTimeout?.sync ?? 300000) + 30000 + ) + + const result = resultRecord.output as QueuedWorkflowExecutionResult + + const resultForResponseBlock = { + success: result.success, + logs: result.logs, + output: result.output, + } + + if ( + auth.authType !== AuthType.INTERNAL_JWT && + workflowHasResponseBlock(resultForResponseBlock) + ) { + return createHttpResponseFromBlock(resultForResponseBlock) + } + + return NextResponse.json( + { + success: result.success, + executionId, + output: result.output, + error: result.error, + metadata: result.metadata, + }, + { status: result.statusCode ?? 200 } + ) + } catch (error: unknown) { + if (error instanceof DispatchQueueFullError) { + return NextResponse.json( + { + error: 'Service temporarily at capacity', + message: error.message, + retryAfterSeconds: 10, + }, + { status: 503, headers: { 'Retry-After': '10' } } + ) + } + + const errorMessage = error instanceof Error ? error.message : 'Unknown error' + + logger.error(`[${requestId}] Queued non-SSE execution failed: ${errorMessage}`) + + return NextResponse.json( + { + success: false, + error: errorMessage, + }, + { status: 500 } + ) + } + } + const timeoutController = createTimeoutAbortController( preprocessResult.executionTimeout?.sync ) try { - const metadata: ExecutionMetadata = { - requestId, - executionId, - workflowId, - workspaceId, - userId: actorUserId, - sessionUserId: isClientSession ? userId : undefined, - workflowUserId: workflow.userId, - triggerType, - useDraftState: shouldUseDraftState, - startTime: new Date().toISOString(), - isClientSession, - enforceCredentialAccess: useAuthenticatedUserAsActor, - workflowStateOverride: effectiveWorkflowStateOverride, - callChain, - } - - const executionVariables = cachedWorkflowData?.variables ?? workflow.variables ?? {} - const snapshot = new ExecutionSnapshot( metadata, workflow, @@ -809,6 +973,52 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id: } if (shouldUseDraftState) { + if (shouldUseBullMQ()) { + const metadata: ExecutionMetadata = { + requestId, + executionId, + workflowId, + workspaceId, + userId: actorUserId, + sessionUserId: isClientSession ? userId : undefined, + workflowUserId: workflow.userId, + triggerType, + useDraftState: shouldUseDraftState, + startTime: new Date().toISOString(), + isClientSession, + enforceCredentialAccess: useAuthenticatedUserAsActor, + workflowStateOverride: effectiveWorkflowStateOverride, + callChain, + } + + const executionVariables = cachedWorkflowData?.variables ?? workflow.variables ?? {} + + await enqueueDirectWorkflowExecution( + { + workflow, + metadata, + input: processedInput, + variables: executionVariables, + selectedOutputs, + includeFileBase64, + base64MaxBytes, + stopAfterBlockId, + timeoutMs: preprocessResult.executionTimeout?.sync, + runFromBlock: resolvedRunFromBlock, + streamEvents: true, + }, + 1, + 'interactive' + ) + + return new NextResponse(createBufferedExecutionStream(executionId), { + headers: { + ...SSE_HEADERS, + 'X-Execution-Id': executionId, + }, + }) + } + logger.info(`[${requestId}] Using SSE console log streaming (manual execution)`) } else { logger.info(`[${requestId}] Using streaming API response`) @@ -1271,6 +1481,17 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id: }, }) } catch (error: any) { + if (error instanceof DispatchQueueFullError) { + return NextResponse.json( + { + error: 'Service temporarily at capacity', + message: error.message, + retryAfterSeconds: 10, + }, + { status: 503, headers: { 'Retry-After': '10' } } + ) + } + logger.error(`[${requestId}] Failed to start workflow execution:`, error) return NextResponse.json( { error: error.message || 'Failed to start workflow execution' }, diff --git a/apps/sim/background/schedule-execution.ts b/apps/sim/background/schedule-execution.ts index d1231e16a61..1136e716d9a 100644 --- a/apps/sim/background/schedule-execution.ts +++ b/apps/sim/background/schedule-execution.ts @@ -303,6 +303,7 @@ async function runWorkflowExecution({ export type ScheduleExecutionPayload = { scheduleId: string workflowId: string + workspaceId?: string executionId?: string requestId?: string correlation?: AsyncExecutionCorrelation diff --git a/apps/sim/background/workflow-execution.ts b/apps/sim/background/workflow-execution.ts index 49756d82c27..aa2411a2580 100644 --- a/apps/sim/background/workflow-execution.ts +++ b/apps/sim/background/workflow-execution.ts @@ -36,6 +36,7 @@ export function buildWorkflowCorrelation( export type WorkflowExecutionPayload = { workflowId: string userId: string + workspaceId?: string input?: any triggerType?: CoreTriggerType executionId?: string diff --git a/apps/sim/background/workspace-notification-delivery.ts b/apps/sim/background/workspace-notification-delivery.ts index 1886d5462e3..230d33dae67 100644 --- a/apps/sim/background/workspace-notification-delivery.ts +++ b/apps/sim/background/workspace-notification-delivery.ts @@ -1,5 +1,5 @@ import { createHmac } from 'crypto' -import { db } from '@sim/db' +import { db, workflowExecutionLogs } from '@sim/db' import { account, workspaceNotificationDelivery, @@ -17,11 +17,14 @@ import { import { checkUsageStatus } from '@/lib/billing/calculations/usage-monitor' import { getHighestPrioritySubscription } from '@/lib/billing/core/subscription' import { dollarsToCredits } from '@/lib/billing/credits/conversion' +import { createBullMQJobData, isBullMQEnabled } from '@/lib/core/bullmq' +import { acquireLock } from '@/lib/core/config/redis' import { RateLimiter } from '@/lib/core/rate-limiter' import { decryptSecret } from '@/lib/core/security/encryption' import { secureFetchWithValidation } from '@/lib/core/security/input-validation.server' import { formatDuration } from '@/lib/core/utils/formatting' import { getBaseUrl } from '@/lib/core/utils/urls' +import { enqueueWorkspaceDispatch } from '@/lib/core/workspace-dispatch' import type { TraceSpan, WorkflowExecutionLog } from '@/lib/logs/types' import { sendEmail } from '@/lib/messaging/email/mailer' import type { AlertConfig } from '@/lib/notifications/alert-rules' @@ -32,6 +35,7 @@ const logger = createLogger('WorkspaceNotificationDelivery') const MAX_ATTEMPTS = 5 const RETRY_DELAYS = [5 * 1000, 15 * 1000, 60 * 1000, 3 * 60 * 1000, 10 * 60 * 1000] +const NOTIFICATION_DISPATCH_LOCK_TTL_SECONDS = 3 function getRetryDelayWithJitter(baseDelay: number): number { const jitter = Math.random() * 0.1 * baseDelay @@ -486,12 +490,170 @@ async function updateDeliveryStatus( export interface NotificationDeliveryParams { deliveryId: string subscriptionId: string + workspaceId: string notificationType: 'webhook' | 'email' | 'slack' log: WorkflowExecutionLog alertConfig?: AlertConfig } -export async function executeNotificationDelivery(params: NotificationDeliveryParams) { +export type NotificationDeliveryResult = + | { status: 'success' | 'skipped' | 'failed' } + | { status: 'retry'; retryDelayMs: number } + +async function buildRetryLog(params: NotificationDeliveryParams): Promise { + const [storedLog] = await db + .select() + .from(workflowExecutionLogs) + .where( + and( + eq(workflowExecutionLogs.executionId, params.log.executionId), + eq(workflowExecutionLogs.workflowId, params.log.workflowId!) + ) + ) + .limit(1) + + if (storedLog) { + return storedLog as unknown as WorkflowExecutionLog + } + + const now = new Date().toISOString() + return { + id: `retry_log_${params.deliveryId}`, + workflowId: params.log.workflowId, + executionId: params.log.executionId, + stateSnapshotId: '', + level: 'info', + trigger: 'system', + startedAt: now, + endedAt: now, + totalDurationMs: 0, + executionData: {}, + cost: { total: 0 }, + createdAt: now, + } +} + +export async function enqueueNotificationDeliveryDispatch( + params: NotificationDeliveryParams +): Promise { + if (!isBullMQEnabled()) { + return false + } + + const lockAcquired = await acquireLock( + `workspace-notification-dispatch:${params.deliveryId}`, + params.deliveryId, + NOTIFICATION_DISPATCH_LOCK_TTL_SECONDS + ) + if (!lockAcquired) { + return false + } + + await enqueueWorkspaceDispatch({ + workspaceId: params.workspaceId, + lane: 'lightweight', + queueName: 'workspace-notification-delivery', + bullmqJobName: 'workspace-notification-delivery', + bullmqPayload: createBullMQJobData(params), + metadata: { + workflowId: params.log.workflowId ?? undefined, + }, + }) + + return true +} + +const STUCK_IN_PROGRESS_THRESHOLD_MS = 5 * 60 * 1000 + +export async function sweepPendingNotificationDeliveries(limit = 50): Promise { + if (!isBullMQEnabled()) { + return 0 + } + + const stuckThreshold = new Date(Date.now() - STUCK_IN_PROGRESS_THRESHOLD_MS) + + await db + .update(workspaceNotificationDelivery) + .set({ + status: 'pending', + updatedAt: new Date(), + }) + .where( + and( + eq(workspaceNotificationDelivery.status, 'in_progress'), + lte(workspaceNotificationDelivery.lastAttemptAt, stuckThreshold) + ) + ) + + const dueDeliveries = await db + .select({ + deliveryId: workspaceNotificationDelivery.id, + subscriptionId: workspaceNotificationDelivery.subscriptionId, + workflowId: workspaceNotificationDelivery.workflowId, + executionId: workspaceNotificationDelivery.executionId, + workspaceId: workspaceNotificationSubscription.workspaceId, + alertConfig: workspaceNotificationSubscription.alertConfig, + notificationType: workspaceNotificationSubscription.notificationType, + }) + .from(workspaceNotificationDelivery) + .innerJoin( + workspaceNotificationSubscription, + eq(workspaceNotificationDelivery.subscriptionId, workspaceNotificationSubscription.id) + ) + .where( + and( + eq(workspaceNotificationDelivery.status, 'pending'), + or( + isNull(workspaceNotificationDelivery.nextAttemptAt), + lte(workspaceNotificationDelivery.nextAttemptAt, new Date()) + ) + ) + ) + .limit(limit) + + let enqueued = 0 + + for (const delivery of dueDeliveries) { + const params: NotificationDeliveryParams = { + deliveryId: delivery.deliveryId, + subscriptionId: delivery.subscriptionId, + workspaceId: delivery.workspaceId, + notificationType: delivery.notificationType, + log: await buildRetryLog({ + deliveryId: delivery.deliveryId, + subscriptionId: delivery.subscriptionId, + workspaceId: delivery.workspaceId, + notificationType: delivery.notificationType, + log: { + id: '', + workflowId: delivery.workflowId, + executionId: delivery.executionId, + stateSnapshotId: '', + level: 'info', + trigger: 'system', + startedAt: '', + endedAt: '', + totalDurationMs: 0, + executionData: {}, + cost: { total: 0 }, + createdAt: '', + }, + alertConfig: (delivery.alertConfig as AlertConfig | null) ?? undefined, + }), + alertConfig: (delivery.alertConfig as AlertConfig | null) ?? undefined, + } + + if (await enqueueNotificationDeliveryDispatch(params)) { + enqueued += 1 + } + } + + return enqueued +} + +export async function executeNotificationDelivery( + params: NotificationDeliveryParams +): Promise { const { deliveryId, subscriptionId, notificationType, log, alertConfig } = params try { @@ -504,7 +666,7 @@ export async function executeNotificationDelivery(params: NotificationDeliveryPa if (!subscription || !subscription.active) { logger.warn(`Subscription ${subscriptionId} not found or inactive`) await updateDeliveryStatus(deliveryId, 'failed', 'Subscription not found or inactive') - return + return { status: 'failed' } } const claimed = await db @@ -529,7 +691,7 @@ export async function executeNotificationDelivery(params: NotificationDeliveryPa if (claimed.length === 0) { logger.info(`Delivery ${deliveryId} not claimable`) - return + return { status: 'skipped' } } const attempts = claimed[0].attempts @@ -539,7 +701,7 @@ export async function executeNotificationDelivery(params: NotificationDeliveryPa if (!payload) { await updateDeliveryStatus(deliveryId, 'failed', 'Workflow was archived or deleted') logger.info(`Skipping delivery ${deliveryId} - workflow was archived or deleted`) - return + return { status: 'failed' } } let result: { success: boolean; status?: number; error?: string } @@ -561,39 +723,35 @@ export async function executeNotificationDelivery(params: NotificationDeliveryPa if (result.success) { await updateDeliveryStatus(deliveryId, 'success', undefined, result.status) logger.info(`${notificationType} notification delivered successfully`, { deliveryId }) - } else { - if (attempts < MAX_ATTEMPTS) { - const retryDelay = getRetryDelayWithJitter( - RETRY_DELAYS[attempts - 1] || RETRY_DELAYS[RETRY_DELAYS.length - 1] - ) - const nextAttemptAt = new Date(Date.now() + retryDelay) + return { status: 'success' } + } + if (attempts < MAX_ATTEMPTS) { + const retryDelay = getRetryDelayWithJitter( + RETRY_DELAYS[attempts - 1] || RETRY_DELAYS[RETRY_DELAYS.length - 1] + ) + const nextAttemptAt = new Date(Date.now() + retryDelay) - await updateDeliveryStatus( - deliveryId, - 'pending', - result.error, - result.status, - nextAttemptAt - ) + await updateDeliveryStatus(deliveryId, 'pending', result.error, result.status, nextAttemptAt) - logger.info( - `${notificationType} notification failed, scheduled retry ${attempts}/${MAX_ATTEMPTS}`, - { - deliveryId, - error: result.error, - } - ) - } else { - await updateDeliveryStatus(deliveryId, 'failed', result.error, result.status) - logger.error(`${notificationType} notification failed after ${MAX_ATTEMPTS} attempts`, { + logger.info( + `${notificationType} notification failed, scheduled retry ${attempts}/${MAX_ATTEMPTS}`, + { deliveryId, error: result.error, - }) - } + } + ) + return { status: 'retry', retryDelayMs: retryDelay } } + await updateDeliveryStatus(deliveryId, 'failed', result.error, result.status) + logger.error(`${notificationType} notification failed after ${MAX_ATTEMPTS} attempts`, { + deliveryId, + error: result.error, + }) + return { status: 'failed' } } catch (error) { logger.error('Notification delivery failed', { deliveryId, error }) await updateDeliveryStatus(deliveryId, 'failed', 'Internal error') + return { status: 'failed' } } } diff --git a/apps/sim/lib/billing/subscriptions/utils.ts b/apps/sim/lib/billing/subscriptions/utils.ts index d5ddbe33223..b8095cd732a 100644 --- a/apps/sim/lib/billing/subscriptions/utils.ts +++ b/apps/sim/lib/billing/subscriptions/utils.ts @@ -13,7 +13,7 @@ import { isPro, isTeam, } from '@/lib/billing/plan-helpers' -import type { EnterpriseSubscriptionMetadata } from '@/lib/billing/types' +import { parseEnterpriseSubscriptionMetadata } from '@/lib/billing/types' import { env } from '@/lib/core/config/env' /** @@ -48,27 +48,15 @@ export function checkEnterprisePlan(subscription: any): boolean { return isEnterprise(subscription?.plan) && subscription?.status === 'active' } -/** - * Type guard to check if metadata is valid EnterpriseSubscriptionMetadata - */ -function isEnterpriseMetadata(metadata: unknown): metadata is EnterpriseSubscriptionMetadata { - return ( - !!metadata && - typeof metadata === 'object' && - 'seats' in metadata && - typeof (metadata as EnterpriseSubscriptionMetadata).seats === 'string' - ) -} - export function getEffectiveSeats(subscription: any): number { if (!subscription) { return 0 } if (isEnterprise(subscription.plan)) { - const metadata = subscription.metadata as EnterpriseSubscriptionMetadata | null - if (isEnterpriseMetadata(metadata)) { - return Number.parseInt(metadata.seats, 10) + const metadata = parseEnterpriseSubscriptionMetadata(subscription.metadata) + if (metadata) { + return metadata.seats } return 0 } diff --git a/apps/sim/lib/billing/types/index.ts b/apps/sim/lib/billing/types/index.ts index e3c3f2de559..cd81abda7f2 100644 --- a/apps/sim/lib/billing/types/index.ts +++ b/apps/sim/lib/billing/types/index.ts @@ -2,18 +2,44 @@ * Billing System Types * Centralized type definitions for the billing system */ +import { z } from 'zod' -export interface EnterpriseSubscriptionMetadata { - plan: 'enterprise' +export const enterpriseSubscriptionMetadataSchema = z.object({ + plan: z.literal('enterprise'), // The referenceId must be provided in Stripe metadata to link to the organization // This gets stored in the subscription.referenceId column - referenceId: string + referenceId: z.string().min(1), // The fixed monthly price for this enterprise customer (as string from Stripe metadata) // This will be used to set the organization's usage limit - monthlyPrice: string - // Number of seats for invitation limits (not for billing) (as string from Stripe metadata) - // We set Stripe quantity to 1 and use this for actual seat count - seats: string + monthlyPrice: z.coerce.number().positive(), + // Number of seats for invitation limits (not for billing) + seats: z.coerce.number().int().positive(), + // Optional custom workspace concurrency limit for enterprise workspaces + workspaceConcurrencyLimit: z.coerce.number().int().positive().optional(), +}) + +export type EnterpriseSubscriptionMetadata = z.infer + +const enterpriseWorkspaceConcurrencyMetadataSchema = z.object({ + workspaceConcurrencyLimit: z.coerce.number().int().positive().optional(), +}) + +export type EnterpriseWorkspaceConcurrencyMetadata = z.infer< + typeof enterpriseWorkspaceConcurrencyMetadataSchema +> + +export function parseEnterpriseSubscriptionMetadata( + value: unknown +): EnterpriseSubscriptionMetadata | null { + const result = enterpriseSubscriptionMetadataSchema.safeParse(value) + return result.success ? result.data : null +} + +export function parseEnterpriseWorkspaceConcurrencyMetadata( + value: unknown +): EnterpriseWorkspaceConcurrencyMetadata | null { + const result = enterpriseWorkspaceConcurrencyMetadataSchema.safeParse(value) + return result.success ? result.data : null } export interface UsageData { diff --git a/apps/sim/lib/billing/webhooks/enterprise.ts b/apps/sim/lib/billing/webhooks/enterprise.ts index cf20b52b395..c4bc6a19f22 100644 --- a/apps/sim/lib/billing/webhooks/enterprise.ts +++ b/apps/sim/lib/billing/webhooks/enterprise.ts @@ -6,26 +6,10 @@ import type Stripe from 'stripe' import { getEmailSubject, renderEnterpriseSubscriptionEmail } from '@/components/emails' import { sendEmail } from '@/lib/messaging/email/mailer' import { getFromEmailAddress } from '@/lib/messaging/email/utils' -import type { EnterpriseSubscriptionMetadata } from '../types' +import { parseEnterpriseSubscriptionMetadata } from '../types' const logger = createLogger('BillingEnterprise') -function isEnterpriseMetadata(value: unknown): value is EnterpriseSubscriptionMetadata { - return ( - !!value && - typeof value === 'object' && - 'plan' in value && - 'referenceId' in value && - 'monthlyPrice' in value && - 'seats' in value && - typeof value.plan === 'string' && - value.plan.toLowerCase() === 'enterprise' && - typeof value.referenceId === 'string' && - typeof value.monthlyPrice === 'string' && - typeof value.seats === 'string' - ) -} - export async function handleManualEnterpriseSubscription(event: Stripe.Event) { const stripeSubscription = event.data.object as Stripe.Subscription @@ -63,19 +47,24 @@ export async function handleManualEnterpriseSubscription(event: Stripe.Event) { throw new Error('Unable to resolve referenceId for subscription') } - if (!isEnterpriseMetadata(metadata)) { + const enterpriseMetadata = parseEnterpriseSubscriptionMetadata(metadata) + if (!enterpriseMetadata) { logger.error('[subscription.created] Invalid enterprise metadata shape', { subscriptionId: stripeSubscription.id, metadata, }) throw new Error('Invalid enterprise metadata for subscription') } - const enterpriseMetadata = metadata - const metadataJson: Record = { ...enterpriseMetadata } + const metadataJson: Record = { + ...metadata, + workspaceConcurrencyLimit: + typeof metadata.workspaceConcurrencyLimit === 'string' + ? Number.parseInt(metadata.workspaceConcurrencyLimit, 10) + : metadata.workspaceConcurrencyLimit, + } - // Extract and parse seats and monthly price from metadata (they come as strings from Stripe) - const seats = Number.parseInt(enterpriseMetadata.seats, 10) - const monthlyPrice = Number.parseFloat(enterpriseMetadata.monthlyPrice) + const seats = enterpriseMetadata.seats + const monthlyPrice = enterpriseMetadata.monthlyPrice if (!seats || seats <= 0 || Number.isNaN(seats)) { logger.error('[subscription.created] Invalid or missing seats in enterprise metadata', { diff --git a/apps/sim/lib/billing/workspace-concurrency.test.ts b/apps/sim/lib/billing/workspace-concurrency.test.ts new file mode 100644 index 00000000000..462e24a8e06 --- /dev/null +++ b/apps/sim/lib/billing/workspace-concurrency.test.ts @@ -0,0 +1,146 @@ +/** + * @vitest-environment node + */ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { + mockGetHighestPrioritySubscription, + mockGetWorkspaceBilledAccountUserId, + mockFeatureFlags, + mockRedisGet, + mockRedisSet, + mockRedisDel, + mockRedisKeys, + mockGetRedisClient, +} = vi.hoisted(() => ({ + mockGetHighestPrioritySubscription: vi.fn(), + mockGetWorkspaceBilledAccountUserId: vi.fn(), + mockFeatureFlags: { + isBillingEnabled: true, + }, + mockRedisGet: vi.fn(), + mockRedisSet: vi.fn(), + mockRedisDel: vi.fn(), + mockRedisKeys: vi.fn(), + mockGetRedisClient: vi.fn(), +})) + +vi.mock('@sim/logger', () => ({ + createLogger: () => ({ + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }), +})) + +vi.mock('@/lib/billing/core/plan', () => ({ + getHighestPrioritySubscription: mockGetHighestPrioritySubscription, +})) + +vi.mock('@/lib/workspaces/utils', () => ({ + getWorkspaceBilledAccountUserId: mockGetWorkspaceBilledAccountUserId, +})) + +vi.mock('@/lib/core/config/redis', () => ({ + getRedisClient: mockGetRedisClient, +})) + +vi.mock('@/lib/core/config/feature-flags', () => mockFeatureFlags) + +import { + getWorkspaceConcurrencyLimit, + resetWorkspaceConcurrencyLimitCache, +} from '@/lib/billing/workspace-concurrency' + +describe('workspace concurrency billing', () => { + beforeEach(() => { + vi.clearAllMocks() + mockFeatureFlags.isBillingEnabled = true + + mockRedisGet.mockResolvedValue(null) + mockRedisSet.mockResolvedValue('OK') + mockRedisDel.mockResolvedValue(1) + mockRedisKeys.mockResolvedValue([]) + mockGetRedisClient.mockReturnValue({ + get: mockRedisGet, + set: mockRedisSet, + del: mockRedisDel, + keys: mockRedisKeys, + }) + }) + + it('returns free tier when no billed account exists', async () => { + mockGetWorkspaceBilledAccountUserId.mockResolvedValue(null) + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(5) + }) + + it('returns pro limit for pro billing accounts', async () => { + mockGetWorkspaceBilledAccountUserId.mockResolvedValue('user-1') + mockGetHighestPrioritySubscription.mockResolvedValue({ + plan: 'pro_6000', + metadata: null, + }) + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(50) + }) + + it('returns max limit for max plan tiers', async () => { + mockGetWorkspaceBilledAccountUserId.mockResolvedValue('user-1') + mockGetHighestPrioritySubscription.mockResolvedValue({ + plan: 'pro_25000', + metadata: null, + }) + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(200) + }) + + it('returns max limit for legacy team plans', async () => { + mockGetWorkspaceBilledAccountUserId.mockResolvedValue('user-1') + mockGetHighestPrioritySubscription.mockResolvedValue({ + plan: 'team', + metadata: null, + }) + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(200) + }) + + it('returns enterprise metadata override when present', async () => { + mockGetWorkspaceBilledAccountUserId.mockResolvedValue('user-1') + mockGetHighestPrioritySubscription.mockResolvedValue({ + plan: 'enterprise', + metadata: { + workspaceConcurrencyLimit: '350', + }, + }) + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(350) + }) + + it('uses free-tier limit when billing is disabled', async () => { + mockFeatureFlags.isBillingEnabled = false + mockGetWorkspaceBilledAccountUserId.mockResolvedValue('user-1') + mockGetHighestPrioritySubscription.mockResolvedValue({ + plan: 'pro_25000', + metadata: { + workspaceConcurrencyLimit: 999, + }, + }) + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(5) + }) + + it('uses redis cache when available', async () => { + mockRedisGet.mockResolvedValueOnce('123') + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(123) + expect(mockGetWorkspaceBilledAccountUserId).not.toHaveBeenCalled() + }) + + it('can clear a specific workspace cache entry', async () => { + await resetWorkspaceConcurrencyLimitCache('workspace-1') + + expect(mockRedisDel).toHaveBeenCalledWith('workspace-concurrency-limit:workspace-1') + }) +}) diff --git a/apps/sim/lib/billing/workspace-concurrency.ts b/apps/sim/lib/billing/workspace-concurrency.ts new file mode 100644 index 00000000000..e164bdb2ccd --- /dev/null +++ b/apps/sim/lib/billing/workspace-concurrency.ts @@ -0,0 +1,170 @@ +import { createLogger } from '@sim/logger' +import { getHighestPrioritySubscription } from '@/lib/billing/core/plan' +import { getPlanTierCredits, isEnterprise, isPro, isTeam } from '@/lib/billing/plan-helpers' +import { parseEnterpriseWorkspaceConcurrencyMetadata } from '@/lib/billing/types' +import { env } from '@/lib/core/config/env' +import { isBillingEnabled } from '@/lib/core/config/feature-flags' +import { getRedisClient } from '@/lib/core/config/redis' +import { getWorkspaceBilledAccountUserId } from '@/lib/workspaces/utils' + +const logger = createLogger('WorkspaceConcurrencyBilling') + +const CACHE_TTL_MS = 60_000 +const CACHE_TTL_SECONDS = Math.floor(CACHE_TTL_MS / 1000) + +interface CacheEntry { + value: number + expiresAt: number +} + +const inMemoryConcurrencyCache = new Map() + +function cacheKey(workspaceId: string): string { + return `workspace-concurrency-limit:${workspaceId}` +} + +function parsePositiveLimit(value: unknown): number | null { + if (typeof value === 'number' && Number.isFinite(value) && value > 0) { + return Math.floor(value) + } + + if (typeof value === 'string') { + const parsed = Number.parseInt(value, 10) + if (Number.isFinite(parsed) && parsed > 0) { + return parsed + } + } + + return null +} + +function getFreeConcurrencyLimit(): number { + return Number.parseInt(env.WORKSPACE_CONCURRENCY_FREE, 10) || 5 +} + +function getProConcurrencyLimit(): number { + return Number.parseInt(env.WORKSPACE_CONCURRENCY_PRO, 10) || 50 +} + +function getTeamConcurrencyLimit(): number { + return Number.parseInt(env.WORKSPACE_CONCURRENCY_TEAM, 10) || 200 +} + +function getEnterpriseDefaultConcurrencyLimit(): number { + return Number.parseInt(env.WORKSPACE_CONCURRENCY_ENTERPRISE, 10) || 200 +} + +function getEnterpriseConcurrencyLimit(metadata: unknown): number { + const enterpriseMetadata = parseEnterpriseWorkspaceConcurrencyMetadata(metadata) + return enterpriseMetadata?.workspaceConcurrencyLimit ?? getEnterpriseDefaultConcurrencyLimit() +} + +function getPlanConcurrencyLimit(plan: string | null | undefined, metadata: unknown): number { + if (!isBillingEnabled) { + return getFreeConcurrencyLimit() + } + + if (!plan) { + return getFreeConcurrencyLimit() + } + + if (isEnterprise(plan)) { + return getEnterpriseConcurrencyLimit(metadata) + } + + if (plan === 'team') { + return getTeamConcurrencyLimit() + } + + const credits = getPlanTierCredits(plan) + if (credits >= 25_000) { + return getTeamConcurrencyLimit() + } + + if (isPro(plan) || isTeam(plan)) { + return getProConcurrencyLimit() + } + + return getFreeConcurrencyLimit() +} + +export async function getWorkspaceConcurrencyLimit(workspaceId: string): Promise { + const redis = getRedisClient() + + if (redis) { + const cached = await redis.get(cacheKey(workspaceId)) + const cachedValue = parsePositiveLimit(cached) + if (cachedValue !== null) { + return cachedValue + } + } else { + const cached = inMemoryConcurrencyCache.get(workspaceId) + if (cached && cached.expiresAt > Date.now()) { + return cached.value + } + } + + try { + const billedAccountUserId = await getWorkspaceBilledAccountUserId(workspaceId) + if (!billedAccountUserId) { + if (redis) { + await redis.set( + cacheKey(workspaceId), + String(getFreeConcurrencyLimit()), + 'EX', + CACHE_TTL_SECONDS + ) + } else { + inMemoryConcurrencyCache.set(workspaceId, { + value: getFreeConcurrencyLimit(), + expiresAt: Date.now() + CACHE_TTL_MS, + }) + } + return getFreeConcurrencyLimit() + } + + const subscription = await getHighestPrioritySubscription(billedAccountUserId) + const limit = getPlanConcurrencyLimit(subscription?.plan, subscription?.metadata) + + if (redis) { + await redis.set(cacheKey(workspaceId), String(limit), 'EX', CACHE_TTL_SECONDS) + } else { + inMemoryConcurrencyCache.set(workspaceId, { + value: limit, + expiresAt: Date.now() + CACHE_TTL_MS, + }) + } + + return limit + } catch (error) { + logger.error('Failed to resolve workspace concurrency limit, using free tier', { + workspaceId, + error, + }) + + return getFreeConcurrencyLimit() + } +} + +export async function resetWorkspaceConcurrencyLimitCache(workspaceId?: string): Promise { + if (!workspaceId) { + inMemoryConcurrencyCache.clear() + } else { + inMemoryConcurrencyCache.delete(workspaceId) + } + + const redis = getRedisClient() + if (!redis) { + return + } + + if (workspaceId) { + await redis.del(cacheKey(workspaceId)) + return + } + + const keys = await redis.keys('workspace-concurrency-limit:*') + if (keys.length > 0) { + await redis.del(...keys) + } +} diff --git a/apps/sim/lib/core/admission/gate.ts b/apps/sim/lib/core/admission/gate.ts new file mode 100644 index 00000000000..a1dc7e0dce9 --- /dev/null +++ b/apps/sim/lib/core/admission/gate.ts @@ -0,0 +1,60 @@ +import { createLogger } from '@sim/logger' +import { NextResponse } from 'next/server' +import { env } from '@/lib/core/config/env' + +const logger = createLogger('AdmissionGate') + +const MAX_INFLIGHT = Number.parseInt(env.ADMISSION_GATE_MAX_INFLIGHT ?? '') || 500 + +let inflight = 0 + +export interface AdmissionTicket { + release: () => void +} + +/** + * Attempts to admit a request through the in-process gate. + * Returns a ticket with a release() handle on success, or null if at capacity. + * Zero external calls — purely in-process atomic counter. + */ +export function tryAdmit(): AdmissionTicket | null { + if (inflight >= MAX_INFLIGHT) { + return null + } + + inflight++ + let released = false + + return { + release() { + if (released) return + released = true + inflight-- + }, + } +} + +/** + * Returns a 429 response for requests rejected by the admission gate. + */ +export function admissionRejectedResponse(): NextResponse { + logger.warn('Admission gate rejecting request', { inflight, maxInflight: MAX_INFLIGHT }) + return NextResponse.json( + { + error: 'Too many requests', + message: 'Server is at capacity. Please retry shortly.', + retryAfterSeconds: 5, + }, + { + status: 429, + headers: { 'Retry-After': '5' }, + } + ) +} + +/** + * Returns the current gate metrics for observability. + */ +export function getAdmissionGateStatus(): { inflight: number; maxInflight: number } { + return { inflight, maxInflight: MAX_INFLIGHT } +} diff --git a/apps/sim/lib/core/async-jobs/backends/bullmq.ts b/apps/sim/lib/core/async-jobs/backends/bullmq.ts new file mode 100644 index 00000000000..a7bb4647ef4 --- /dev/null +++ b/apps/sim/lib/core/async-jobs/backends/bullmq.ts @@ -0,0 +1,106 @@ +import { createLogger } from '@sim/logger' +import type { Job as BullMQJob } from 'bullmq' +import { + type EnqueueOptions, + JOB_STATUS, + type Job, + type JobQueueBackend, + type JobStatus, + type JobType, +} from '@/lib/core/async-jobs/types' +import { type BullMQJobData, createBullMQJobData, getBullMQQueue } from '@/lib/core/bullmq' + +const logger = createLogger('BullMQJobQueue') + +function mapBullMQStatus(status: string): JobStatus { + switch (status) { + case 'active': + return JOB_STATUS.PROCESSING + case 'completed': + return JOB_STATUS.COMPLETED + case 'failed': + return JOB_STATUS.FAILED + default: + return JOB_STATUS.PENDING + } +} + +async function toJob( + queueType: JobType, + bullJob: BullMQJob> | null +): Promise { + if (!bullJob) { + return null + } + + const status = mapBullMQStatus(await bullJob.getState()) + + return { + id: bullJob.id ?? '', + type: queueType, + payload: bullJob.data.payload, + status, + createdAt: new Date(bullJob.timestamp), + startedAt: bullJob.processedOn ? new Date(bullJob.processedOn) : undefined, + completedAt: bullJob.finishedOn ? new Date(bullJob.finishedOn) : undefined, + attempts: bullJob.attemptsMade, + maxAttempts: bullJob.opts.attempts ?? 1, + error: bullJob.failedReason || undefined, + output: bullJob.returnvalue, + metadata: bullJob.data.metadata ?? {}, + } +} + +export class BullMQJobQueue implements JobQueueBackend { + async enqueue( + type: JobType, + payload: TPayload, + options?: EnqueueOptions + ): Promise { + const queue = getBullMQQueue(type) + + const job = await queue.add( + options?.name ?? type, + createBullMQJobData(payload, options?.metadata), + { + jobId: options?.jobId, + attempts: options?.maxAttempts, + priority: options?.priority, + delay: options?.delayMs, + } + ) + + logger.debug('Enqueued job via BullMQ', { + jobId: job.id, + type, + name: options?.name ?? type, + }) + + return String(job.id) + } + + async getJob(jobId: string): Promise { + const workflowJob = await getBullMQQueue('workflow-execution').getJob(jobId) + if (workflowJob) { + return toJob('workflow-execution', workflowJob) + } + + const webhookJob = await getBullMQQueue('webhook-execution').getJob(jobId) + if (webhookJob) { + return toJob('webhook-execution', webhookJob) + } + + const scheduleJob = await getBullMQQueue('schedule-execution').getJob(jobId) + if (scheduleJob) { + return toJob('schedule-execution', scheduleJob) + } + + return null + } + + async startJob(_jobId: string): Promise {} + + async completeJob(_jobId: string, _output: unknown): Promise {} + + async markJobFailed(_jobId: string, _error: string): Promise {} +} diff --git a/apps/sim/lib/core/async-jobs/backends/index.ts b/apps/sim/lib/core/async-jobs/backends/index.ts index 144094e6407..ef84a232233 100644 --- a/apps/sim/lib/core/async-jobs/backends/index.ts +++ b/apps/sim/lib/core/async-jobs/backends/index.ts @@ -1,3 +1,4 @@ +export { BullMQJobQueue } from './bullmq' export { DatabaseJobQueue } from './database' export { RedisJobQueue } from './redis' export { TriggerDevJobQueue } from './trigger-dev' diff --git a/apps/sim/lib/core/async-jobs/config.ts b/apps/sim/lib/core/async-jobs/config.ts index 0537a6a8ef9..c4f0a4dcf83 100644 --- a/apps/sim/lib/core/async-jobs/config.ts +++ b/apps/sim/lib/core/async-jobs/config.ts @@ -1,7 +1,7 @@ import { createLogger } from '@sim/logger' import type { AsyncBackendType, JobQueueBackend } from '@/lib/core/async-jobs/types' +import { isBullMQEnabled } from '@/lib/core/bullmq' import { isTriggerDevEnabled } from '@/lib/core/config/feature-flags' -import { getRedisClient } from '@/lib/core/config/redis' const logger = createLogger('AsyncJobsConfig') @@ -11,16 +11,15 @@ let cachedInlineBackend: JobQueueBackend | null = null /** * Determines which async backend to use based on environment configuration. - * Follows the fallback chain: trigger.dev → redis → database + * Follows the fallback chain: trigger.dev → bullmq → database */ export function getAsyncBackendType(): AsyncBackendType { if (isTriggerDevEnabled) { return 'trigger-dev' } - const redis = getRedisClient() - if (redis) { - return 'redis' + if (isBullMQEnabled()) { + return 'bullmq' } return 'database' @@ -43,13 +42,9 @@ export async function getJobQueue(): Promise { cachedBackend = new TriggerDevJobQueue() break } - case 'redis': { - const redis = getRedisClient() - if (!redis) { - throw new Error('Redis client not available but redis backend was selected') - } - const { RedisJobQueue } = await import('@/lib/core/async-jobs/backends/redis') - cachedBackend = new RedisJobQueue(redis) + case 'bullmq': { + const { BullMQJobQueue } = await import('@/lib/core/async-jobs/backends/bullmq') + cachedBackend = new BullMQJobQueue() break } case 'database': { @@ -62,6 +57,10 @@ export async function getJobQueue(): Promise { cachedBackendType = type logger.info(`Async job backend initialized: ${type}`) + if (!cachedBackend) { + throw new Error(`Failed to initialize async backend: ${type}`) + } + return cachedBackend } @@ -73,20 +72,19 @@ export function getCurrentBackendType(): AsyncBackendType | null { } /** - * Gets a job queue backend that bypasses Trigger.dev (Redis -> Database). - * Used for non-polling webhooks that should always execute inline. + * Gets a job queue backend that bypasses Trigger.dev (BullMQ -> Database). + * Used for execution paths that must avoid Trigger.dev cold starts. */ export async function getInlineJobQueue(): Promise { if (cachedInlineBackend) { return cachedInlineBackend } - const redis = getRedisClient() let type: string - if (redis) { - const { RedisJobQueue } = await import('@/lib/core/async-jobs/backends/redis') - cachedInlineBackend = new RedisJobQueue(redis) - type = 'redis' + if (isBullMQEnabled()) { + const { BullMQJobQueue } = await import('@/lib/core/async-jobs/backends/bullmq') + cachedInlineBackend = new BullMQJobQueue() + type = 'bullmq' } else { const { DatabaseJobQueue } = await import('@/lib/core/async-jobs/backends/database') cachedInlineBackend = new DatabaseJobQueue() @@ -98,11 +96,15 @@ export async function getInlineJobQueue(): Promise { } /** - * Checks if jobs should be executed inline (fire-and-forget). - * For Redis/DB backends, we execute inline. Trigger.dev handles execution itself. + * Checks if jobs should be executed inline in-process. + * Database fallback is the only mode that still relies on inline execution. */ export function shouldExecuteInline(): boolean { - return getAsyncBackendType() !== 'trigger-dev' + return getAsyncBackendType() === 'database' +} + +export function shouldUseBullMQ(): boolean { + return isBullMQEnabled() } /** diff --git a/apps/sim/lib/core/async-jobs/index.ts b/apps/sim/lib/core/async-jobs/index.ts index 24e6f1e526f..76ec7072207 100644 --- a/apps/sim/lib/core/async-jobs/index.ts +++ b/apps/sim/lib/core/async-jobs/index.ts @@ -5,6 +5,7 @@ export { getJobQueue, resetJobQueueCache, shouldExecuteInline, + shouldUseBullMQ, } from './config' export type { AsyncBackendType, diff --git a/apps/sim/lib/core/async-jobs/types.ts b/apps/sim/lib/core/async-jobs/types.ts index 27137ddadc3..a2ccf1d680c 100644 --- a/apps/sim/lib/core/async-jobs/types.ts +++ b/apps/sim/lib/core/async-jobs/types.ts @@ -62,6 +62,10 @@ export interface JobMetadata { export interface EnqueueOptions { maxAttempts?: number metadata?: JobMetadata + jobId?: string + priority?: number + name?: string + delayMs?: number } /** @@ -95,4 +99,4 @@ export interface JobQueueBackend { markJobFailed(jobId: string, error: string): Promise } -export type AsyncBackendType = 'trigger-dev' | 'redis' | 'database' +export type AsyncBackendType = 'trigger-dev' | 'bullmq' | 'redis' | 'database' diff --git a/apps/sim/lib/core/bullmq/connection.ts b/apps/sim/lib/core/bullmq/connection.ts new file mode 100644 index 00000000000..80def9d5cb5 --- /dev/null +++ b/apps/sim/lib/core/bullmq/connection.ts @@ -0,0 +1,29 @@ +import type { ConnectionOptions } from 'bullmq' +import { env } from '@/lib/core/config/env' + +export function isBullMQEnabled(): boolean { + return Boolean(env.REDIS_URL) +} + +export function getBullMQConnectionOptions(): ConnectionOptions { + if (!env.REDIS_URL) { + throw new Error('BullMQ requires REDIS_URL') + } + + const redisUrl = new URL(env.REDIS_URL) + const isTls = redisUrl.protocol === 'rediss:' + const port = redisUrl.port ? Number.parseInt(redisUrl.port, 10) : 6379 + const dbPath = redisUrl.pathname.replace('/', '') + const db = dbPath ? Number.parseInt(dbPath, 10) : undefined + + return { + host: redisUrl.hostname, + port, + username: redisUrl.username || undefined, + password: redisUrl.password || undefined, + db: Number.isFinite(db) ? db : undefined, + maxRetriesPerRequest: null, + enableReadyCheck: false, + ...(isTls ? { tls: {} } : {}), + } +} diff --git a/apps/sim/lib/core/bullmq/index.ts b/apps/sim/lib/core/bullmq/index.ts new file mode 100644 index 00000000000..efe937aa476 --- /dev/null +++ b/apps/sim/lib/core/bullmq/index.ts @@ -0,0 +1,16 @@ +export { getBullMQConnectionOptions, isBullMQEnabled } from './connection' +export { + type BullMQJobData, + createBullMQJobData, + getBullMQQueue, + getBullMQQueueByName, + getKnowledgeConnectorSyncQueue, + getKnowledgeDocumentProcessingQueue, + getMothershipJobExecutionQueue, + getWorkflowQueueEvents, + getWorkspaceNotificationDeliveryQueue, + KNOWLEDGE_CONNECTOR_SYNC_QUEUE, + KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE, + MOTHERSHIP_JOB_EXECUTION_QUEUE, + WORKSPACE_NOTIFICATION_DELIVERY_QUEUE, +} from './queues' diff --git a/apps/sim/lib/core/bullmq/queues.ts b/apps/sim/lib/core/bullmq/queues.ts new file mode 100644 index 00000000000..0e526030d61 --- /dev/null +++ b/apps/sim/lib/core/bullmq/queues.ts @@ -0,0 +1,196 @@ +import { Queue, QueueEvents } from 'bullmq' +import type { JobMetadata, JobType } from '@/lib/core/async-jobs/types' +import { getBullMQConnectionOptions } from '@/lib/core/bullmq/connection' +import type { WorkspaceDispatchQueueName } from '@/lib/core/workspace-dispatch/types' + +export const KNOWLEDGE_CONNECTOR_SYNC_QUEUE = 'knowledge-connector-sync' as const +export const KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE = 'knowledge-process-document' as const +export const MOTHERSHIP_JOB_EXECUTION_QUEUE = 'mothership-job-execution' as const +export const WORKSPACE_NOTIFICATION_DELIVERY_QUEUE = 'workspace-notification-delivery' as const + +export interface BullMQJobData { + payload: TPayload + metadata?: JobMetadata +} + +let workflowQueueInstance: Queue | null = null +let webhookQueueInstance: Queue | null = null +let scheduleQueueInstance: Queue | null = null +let knowledgeConnectorSyncQueueInstance: Queue | null = null +let knowledgeDocumentProcessingQueueInstance: Queue | null = null +let mothershipJobExecutionQueueInstance: Queue | null = null +let workspaceNotificationDeliveryQueueInstance: Queue | null = null +let workflowQueueEventsInstance: QueueEvents | null = null + +function getQueueDefaultOptions(type: JobType) { + switch (type) { + case 'workflow-execution': + return { + attempts: 3, + backoff: { type: 'exponential' as const, delay: 1000 }, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 7 * 24 * 60 * 60 }, + } + case 'webhook-execution': + return { + attempts: 2, + backoff: { type: 'exponential' as const, delay: 2000 }, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 3 * 24 * 60 * 60 }, + } + case 'schedule-execution': + return { + attempts: 2, + backoff: { type: 'exponential' as const, delay: 5000 }, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 3 * 24 * 60 * 60 }, + } + } +} + +function createQueue(type: JobType): Queue { + return new Queue(type, { + connection: getBullMQConnectionOptions(), + defaultJobOptions: getQueueDefaultOptions(type), + }) +} + +function createNamedQueue( + name: + | typeof KNOWLEDGE_CONNECTOR_SYNC_QUEUE + | typeof KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE + | typeof MOTHERSHIP_JOB_EXECUTION_QUEUE + | typeof WORKSPACE_NOTIFICATION_DELIVERY_QUEUE +): Queue { + switch (name) { + case KNOWLEDGE_CONNECTOR_SYNC_QUEUE: + return new Queue(name, { + connection: getBullMQConnectionOptions(), + defaultJobOptions: { + attempts: 3, + backoff: { type: 'exponential', delay: 5000 }, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 7 * 24 * 60 * 60 }, + }, + }) + case KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE: + return new Queue(name, { + connection: getBullMQConnectionOptions(), + defaultJobOptions: { + attempts: 3, + backoff: { type: 'exponential', delay: 1000 }, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 7 * 24 * 60 * 60 }, + }, + }) + case MOTHERSHIP_JOB_EXECUTION_QUEUE: + return new Queue(name, { + connection: getBullMQConnectionOptions(), + defaultJobOptions: { + attempts: 1, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 7 * 24 * 60 * 60 }, + }, + }) + case WORKSPACE_NOTIFICATION_DELIVERY_QUEUE: + return new Queue(name, { + connection: getBullMQConnectionOptions(), + defaultJobOptions: { + attempts: 1, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 7 * 24 * 60 * 60 }, + }, + }) + } +} + +export function getBullMQQueue(type: JobType): Queue { + switch (type) { + case 'workflow-execution': + if (!workflowQueueInstance) { + workflowQueueInstance = createQueue(type) + } + return workflowQueueInstance + case 'webhook-execution': + if (!webhookQueueInstance) { + webhookQueueInstance = createQueue(type) + } + return webhookQueueInstance + case 'schedule-execution': + if (!scheduleQueueInstance) { + scheduleQueueInstance = createQueue(type) + } + return scheduleQueueInstance + } +} + +export function getBullMQQueueByName(queueName: WorkspaceDispatchQueueName): Queue { + switch (queueName) { + case 'workflow-execution': + case 'webhook-execution': + case 'schedule-execution': + return getBullMQQueue(queueName) + case KNOWLEDGE_CONNECTOR_SYNC_QUEUE: + return getKnowledgeConnectorSyncQueue() + case KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE: + return getKnowledgeDocumentProcessingQueue() + case MOTHERSHIP_JOB_EXECUTION_QUEUE: + return getMothershipJobExecutionQueue() + case WORKSPACE_NOTIFICATION_DELIVERY_QUEUE: + return getWorkspaceNotificationDeliveryQueue() + } +} + +export function getWorkflowQueueEvents(): QueueEvents { + if (!workflowQueueEventsInstance) { + workflowQueueEventsInstance = new QueueEvents('workflow-execution', { + connection: getBullMQConnectionOptions(), + }) + } + + return workflowQueueEventsInstance +} + +export function getKnowledgeConnectorSyncQueue(): Queue { + if (!knowledgeConnectorSyncQueueInstance) { + knowledgeConnectorSyncQueueInstance = createNamedQueue(KNOWLEDGE_CONNECTOR_SYNC_QUEUE) + } + + return knowledgeConnectorSyncQueueInstance +} + +export function getKnowledgeDocumentProcessingQueue(): Queue { + if (!knowledgeDocumentProcessingQueueInstance) { + knowledgeDocumentProcessingQueueInstance = createNamedQueue(KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE) + } + + return knowledgeDocumentProcessingQueueInstance +} + +export function getMothershipJobExecutionQueue(): Queue { + if (!mothershipJobExecutionQueueInstance) { + mothershipJobExecutionQueueInstance = createNamedQueue(MOTHERSHIP_JOB_EXECUTION_QUEUE) + } + + return mothershipJobExecutionQueueInstance +} + +export function getWorkspaceNotificationDeliveryQueue(): Queue { + if (!workspaceNotificationDeliveryQueueInstance) { + workspaceNotificationDeliveryQueueInstance = createNamedQueue( + WORKSPACE_NOTIFICATION_DELIVERY_QUEUE + ) + } + + return workspaceNotificationDeliveryQueueInstance +} + +export function createBullMQJobData( + payload: TPayload, + metadata?: JobMetadata +): BullMQJobData { + return { + payload, + metadata, + } +} diff --git a/apps/sim/lib/core/config/env.ts b/apps/sim/lib/core/config/env.ts index 65492cccb67..f858a09905b 100644 --- a/apps/sim/lib/core/config/env.ts +++ b/apps/sim/lib/core/config/env.ts @@ -180,6 +180,11 @@ export const env = createEnv({ // Data Retention FREE_PLAN_LOG_RETENTION_DAYS: z.string().optional(), // Log retention days for free plan users + // Admission & Burst Protection + ADMISSION_GATE_MAX_INFLIGHT: z.string().optional().default('500'), // Max concurrent in-flight execution requests per pod + DISPATCH_MAX_QUEUE_PER_WORKSPACE: z.string().optional().default('1000'), // Max queued dispatch jobs per workspace + DISPATCH_MAX_QUEUE_GLOBAL: z.string().optional().default('50000'), // Max queued dispatch jobs globally + // Rate Limiting Configuration RATE_LIMIT_WINDOW_MS: z.string().optional().default('60000'), // Rate limit window duration in milliseconds (default: 1 minute) MANUAL_EXECUTION_LIMIT: z.string().optional().default('999999'),// Manual execution bypass value (effectively unlimited) @@ -191,6 +196,10 @@ export const env = createEnv({ RATE_LIMIT_TEAM_ASYNC: z.string().optional().default('2500'), // Team tier async API executions per minute RATE_LIMIT_ENTERPRISE_SYNC: z.string().optional().default('600'), // Enterprise tier sync API executions per minute RATE_LIMIT_ENTERPRISE_ASYNC: z.string().optional().default('5000'), // Enterprise tier async API executions per minute + WORKSPACE_CONCURRENCY_FREE: z.string().optional().default('5'), // Free tier concurrent workspace executions + WORKSPACE_CONCURRENCY_PRO: z.string().optional().default('50'), // Pro tier concurrent workspace executions + WORKSPACE_CONCURRENCY_TEAM: z.string().optional().default('200'), // Team/Max tier concurrent workspace executions + WORKSPACE_CONCURRENCY_ENTERPRISE: z.string().optional().default('200'), // Enterprise default concurrent workspace executions // Timeout Configuration EXECUTION_TIMEOUT_FREE: z.string().optional().default('300'), // 5 minutes diff --git a/apps/sim/lib/core/workspace-dispatch/adapter.ts b/apps/sim/lib/core/workspace-dispatch/adapter.ts new file mode 100644 index 00000000000..637688d1117 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/adapter.ts @@ -0,0 +1,80 @@ +import type { + WorkspaceDispatchClaimResult, + WorkspaceDispatchEnqueueInput, + WorkspaceDispatchJobRecord, + WorkspaceDispatchLane, +} from '@/lib/core/workspace-dispatch/types' + +export interface WorkspaceDispatchStorageAdapter { + saveDispatchJob(record: WorkspaceDispatchJobRecord): Promise + getDispatchJobRecord(jobId: string): Promise + listDispatchJobsByStatuses( + statuses: readonly WorkspaceDispatchJobRecord['status'][] + ): Promise + updateDispatchJobRecord( + jobId: string, + updater: (record: WorkspaceDispatchJobRecord) => WorkspaceDispatchJobRecord + ): Promise + enqueueWorkspaceDispatchJob( + input: WorkspaceDispatchEnqueueInput + ): Promise + restoreWorkspaceDispatchJob(record: WorkspaceDispatchJobRecord): Promise + claimWorkspaceJob( + workspaceId: string, + options: { + lanes: readonly WorkspaceDispatchLane[] + concurrencyLimit: number + leaseId: string + now: number + leaseTtlMs: number + } + ): Promise + getWorkspaceQueueDepth( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise + getGlobalQueueDepth(): Promise + reconcileGlobalQueueDepth(): Promise + popNextWorkspaceId(): Promise + getQueuedWorkspaceCount(): Promise + hasActiveWorkspace(workspaceId: string): Promise + ensureWorkspaceActive(workspaceId: string, readyAt?: number): Promise + requeueWorkspaceId(workspaceId: string): Promise + workspaceHasPendingJobs( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise + getNextWorkspaceJob( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise + removeWorkspaceJobFromLane( + workspaceId: string, + lane: WorkspaceDispatchLane, + jobId: string + ): Promise + cleanupExpiredWorkspaceLeases(workspaceId: string): Promise + countActiveWorkspaceLeases(workspaceId: string): Promise + hasWorkspaceLease(workspaceId: string, leaseId: string): Promise + createWorkspaceLease(workspaceId: string, leaseId: string, ttlMs: number): Promise + refreshWorkspaceLease(workspaceId: string, leaseId: string, ttlMs: number): Promise + releaseWorkspaceLease(workspaceId: string, leaseId: string): Promise + removeWorkspaceIfIdle(workspaceId: string, lanes: readonly WorkspaceDispatchLane[]): Promise + markDispatchJobAdmitted( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number + ): Promise + markDispatchJobAdmitting( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number + ): Promise + markDispatchJobRunning(jobId: string): Promise + markDispatchJobCompleted(jobId: string, output: unknown): Promise + markDispatchJobFailed(jobId: string, error: string): Promise + clear(): Promise + dispose(): void +} diff --git a/apps/sim/lib/core/workspace-dispatch/dispatcher.test.ts b/apps/sim/lib/core/workspace-dispatch/dispatcher.test.ts new file mode 100644 index 00000000000..6daa485f918 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/dispatcher.test.ts @@ -0,0 +1,175 @@ +/** + * @vitest-environment node + */ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { mockGetWorkspaceConcurrencyLimit, mockAcquireLock, mockReleaseLock } = vi.hoisted(() => ({ + mockGetWorkspaceConcurrencyLimit: vi.fn(), + mockAcquireLock: vi.fn(), + mockReleaseLock: vi.fn(), +})) + +vi.mock('@/lib/billing/workspace-concurrency', () => ({ + getWorkspaceConcurrencyLimit: mockGetWorkspaceConcurrencyLimit, +})) + +vi.mock('@/lib/core/config/redis', () => ({ + acquireLock: mockAcquireLock, + releaseLock: mockReleaseLock, + getRedisClient: vi.fn().mockReturnValue(null), +})) + +vi.mock('@/lib/core/bullmq', () => ({ + getBullMQQueueByName: vi.fn().mockReturnValue({ + add: vi.fn().mockResolvedValue({ id: 'bullmq-1' }), + }), +})) + +import { MemoryWorkspaceDispatchStorage } from '@/lib/core/workspace-dispatch/memory-store' +import { + DISPATCH_SCAN_RESULTS, + dispatchNextAdmissibleWorkspaceJob, +} from '@/lib/core/workspace-dispatch/planner' +import { + enqueueWorkspaceDispatchJob, + setWorkspaceDispatchStorageAdapter, +} from '@/lib/core/workspace-dispatch/store' + +describe('workspace dispatch integration (memory-backed)', () => { + let store: MemoryWorkspaceDispatchStorage + + beforeEach(async () => { + vi.clearAllMocks() + store = new MemoryWorkspaceDispatchStorage() + setWorkspaceDispatchStorageAdapter(store) + + mockGetWorkspaceConcurrencyLimit.mockResolvedValue(5) + mockAcquireLock.mockResolvedValue(true) + mockReleaseLock.mockResolvedValue(true) + }) + + async function enqueue( + workspaceId: string, + overrides: { lane?: string; delayMs?: number; priority?: number } = {} + ) { + return enqueueWorkspaceDispatchJob({ + workspaceId, + lane: (overrides.lane ?? 'runtime') as 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: { payload: { workflowId: 'wf-1' } }, + metadata: { workflowId: 'wf-1' }, + delayMs: overrides.delayMs, + priority: overrides.priority, + }) + } + + it('admits jobs round-robin across workspaces', async () => { + await enqueue('ws-a') + await enqueue('ws-b') + await enqueue('ws-a') + + const r1 = await dispatchNextAdmissibleWorkspaceJob() + const r2 = await dispatchNextAdmissibleWorkspaceJob() + const r3 = await dispatchNextAdmissibleWorkspaceJob() + + expect(r1).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + expect(r2).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + expect(r3).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + }) + + it('respects workspace concurrency limits', async () => { + mockGetWorkspaceConcurrencyLimit.mockResolvedValue(1) + + await enqueue('ws-a') + await enqueue('ws-a') + + const r1 = await dispatchNextAdmissibleWorkspaceJob() + expect(r1).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + + const r2 = await dispatchNextAdmissibleWorkspaceJob() + expect(r2).toBe(DISPATCH_SCAN_RESULTS.NO_PROGRESS) + }) + + it('skips delayed jobs and admits ready ones in same lane', async () => { + await enqueue('ws-a', { delayMs: 60_000 }) + await enqueue('ws-a', { delayMs: 0 }) + + const r1 = await dispatchNextAdmissibleWorkspaceJob() + expect(r1).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + }) + + it('returns delayed when all jobs are delayed', async () => { + await enqueue('ws-a', { delayMs: 60_000 }) + + const r1 = await dispatchNextAdmissibleWorkspaceJob() + expect(r1).toBe(DISPATCH_SCAN_RESULTS.NO_PROGRESS) + }) + + it('returns no_workspace when queue is empty', async () => { + const result = await dispatchNextAdmissibleWorkspaceJob() + expect(result).toBe(DISPATCH_SCAN_RESULTS.NO_WORKSPACE) + }) + + it('lease cleanup frees capacity for new admissions', async () => { + mockGetWorkspaceConcurrencyLimit.mockResolvedValue(1) + + const record = await enqueue('ws-a') + await enqueue('ws-a') + + const r1 = await dispatchNextAdmissibleWorkspaceJob() + expect(r1).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + + const updated = await store.getDispatchJobRecord(record.id) + if (updated?.lease) { + await store.releaseWorkspaceLease('ws-a', updated.lease.leaseId) + } + + const r2 = await dispatchNextAdmissibleWorkspaceJob() + expect(r2).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + }) + + it('expired leases are cleaned up during claim', async () => { + mockGetWorkspaceConcurrencyLimit.mockResolvedValue(1) + + await enqueue('ws-a') + await enqueue('ws-a') + + const claimResult = await store.claimWorkspaceJob('ws-a', { + lanes: ['runtime'], + concurrencyLimit: 1, + leaseId: 'old-lease', + now: Date.now(), + leaseTtlMs: 1, + }) + expect(claimResult.type).toBe('admitted') + + await new Promise((resolve) => setTimeout(resolve, 10)) + + const r2 = await dispatchNextAdmissibleWorkspaceJob() + expect(r2).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + }) + + it('recovers job to waiting via restoreWorkspaceDispatchJob', async () => { + const record = await enqueue('ws-a') + + await store.claimWorkspaceJob('ws-a', { + lanes: ['runtime'], + concurrencyLimit: 1, + leaseId: 'lease-1', + now: Date.now(), + leaseTtlMs: 1000, + }) + + await store.markDispatchJobAdmitted(record.id, 'ws-a', 'lease-1', Date.now() + 10000) + + const admitted = await store.getDispatchJobRecord(record.id) + expect(admitted).toBeDefined() + const resetRecord = { ...admitted!, status: 'waiting' as const, lease: undefined } + await store.restoreWorkspaceDispatchJob(resetRecord) + + const restored = await store.getDispatchJobRecord(record.id) + expect(restored?.status).toBe('waiting') + expect(restored?.lease).toBeUndefined() + }) +}) diff --git a/apps/sim/lib/core/workspace-dispatch/dispatcher.ts b/apps/sim/lib/core/workspace-dispatch/dispatcher.ts new file mode 100644 index 00000000000..1122107ea4b --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/dispatcher.ts @@ -0,0 +1,156 @@ +import { createLogger } from '@sim/logger' +import { env } from '@/lib/core/config/env' +import { + enqueueWorkspaceDispatchJob, + getDispatchJobRecord, + getGlobalQueueDepth, + getQueuedWorkspaceCount, + getWorkspaceQueueDepth, +} from '@/lib/core/workspace-dispatch/store' +import { + WORKSPACE_DISPATCH_LANES, + type WorkspaceDispatchEnqueueInput, + type WorkspaceDispatchJobRecord, +} from '@/lib/core/workspace-dispatch/types' +import { DISPATCH_SCAN_RESULTS, dispatchNextAdmissibleWorkspaceJob } from './planner' +import { reconcileWorkspaceDispatchState } from './reconciler' + +const logger = createLogger('WorkspaceDispatcher') +const WAIT_POLL_INTERVAL_MS = 250 +const RECONCILE_INTERVAL_MS = 30_000 +const MAX_QUEUE_PER_WORKSPACE = Number.parseInt(env.DISPATCH_MAX_QUEUE_PER_WORKSPACE ?? '') || 1000 +const MAX_QUEUE_GLOBAL = Number.parseInt(env.DISPATCH_MAX_QUEUE_GLOBAL ?? '') || 50_000 + +let dispatcherRunning = false +let dispatcherWakePending = false +let lastReconcileAt = 0 + +async function runDispatcherLoop(): Promise { + if (dispatcherRunning) { + dispatcherWakePending = true + return + } + + dispatcherRunning = true + + try { + const now = Date.now() + if (now - lastReconcileAt >= RECONCILE_INTERVAL_MS) { + await reconcileWorkspaceDispatchState() + lastReconcileAt = now + } + + do { + dispatcherWakePending = false + const queuedWorkspaces = await getQueuedWorkspaceCount() + if (queuedWorkspaces === 0) { + continue + } + + let admitted = 0 + let scanned = 0 + const loopStartMs = Date.now() + + for (let index = 0; index < queuedWorkspaces; index++) { + scanned++ + const result = await dispatchNextAdmissibleWorkspaceJob() + if (result === DISPATCH_SCAN_RESULTS.ADMITTED) { + admitted++ + } + if (result === DISPATCH_SCAN_RESULTS.NO_WORKSPACE) { + break + } + } + + if (admitted > 0) { + dispatcherWakePending = true + } + + if (admitted > 0 || scanned > 0) { + logger.info('Dispatcher pass', { + admitted, + scanned, + queuedWorkspaces, + durationMs: Date.now() - loopStartMs, + }) + } + } while (dispatcherWakePending) + } catch (error) { + logger.error('Workspace dispatcher loop failed', { error }) + } finally { + dispatcherRunning = false + } +} + +export class DispatchQueueFullError extends Error { + readonly statusCode = 503 + + constructor( + readonly scope: 'workspace' | 'global', + readonly depth: number, + readonly limit: number + ) { + super( + scope === 'workspace' + ? `Workspace queue is at capacity (${depth}/${limit})` + : `Global dispatch queue is at capacity (${depth}/${limit})` + ) + this.name = 'DispatchQueueFullError' + } +} + +export async function enqueueWorkspaceDispatch( + input: WorkspaceDispatchEnqueueInput +): Promise { + const [workspaceDepth, globalDepth] = await Promise.all([ + getWorkspaceQueueDepth(input.workspaceId, WORKSPACE_DISPATCH_LANES), + getGlobalQueueDepth(), + ]) + + if (workspaceDepth >= MAX_QUEUE_PER_WORKSPACE) { + logger.warn('Workspace dispatch queue at capacity', { + workspaceId: input.workspaceId, + depth: workspaceDepth, + limit: MAX_QUEUE_PER_WORKSPACE, + }) + throw new DispatchQueueFullError('workspace', workspaceDepth, MAX_QUEUE_PER_WORKSPACE) + } + + if (globalDepth >= MAX_QUEUE_GLOBAL) { + logger.warn('Global dispatch queue at capacity', { + depth: globalDepth, + limit: MAX_QUEUE_GLOBAL, + }) + throw new DispatchQueueFullError('global', globalDepth, MAX_QUEUE_GLOBAL) + } + + const record = await enqueueWorkspaceDispatchJob(input) + void runDispatcherLoop() + return record.id +} + +export async function wakeWorkspaceDispatcher(): Promise { + await runDispatcherLoop() +} + +export async function waitForDispatchJob( + dispatchJobId: string, + timeoutMs: number +): Promise { + const deadline = Date.now() + timeoutMs + + while (Date.now() < deadline) { + const record = await getDispatchJobRecord(dispatchJobId) + if (!record) { + throw new Error(`Dispatch job not found: ${dispatchJobId}`) + } + + if (record.status === 'completed' || record.status === 'failed') { + return record + } + + await new Promise((resolve) => setTimeout(resolve, WAIT_POLL_INTERVAL_MS)) + } + + throw new Error(`Timed out waiting for dispatch job ${dispatchJobId}`) +} diff --git a/apps/sim/lib/core/workspace-dispatch/factory.ts b/apps/sim/lib/core/workspace-dispatch/factory.ts new file mode 100644 index 00000000000..3a07c68cf01 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/factory.ts @@ -0,0 +1,42 @@ +import { createLogger } from '@sim/logger' +import { getRedisClient } from '@/lib/core/config/redis' +import type { WorkspaceDispatchStorageAdapter } from '@/lib/core/workspace-dispatch/adapter' +import { MemoryWorkspaceDispatchStorage } from '@/lib/core/workspace-dispatch/memory-store' +import { RedisWorkspaceDispatchStorage } from '@/lib/core/workspace-dispatch/redis-store' + +const logger = createLogger('WorkspaceDispatchFactory') + +let cachedAdapter: WorkspaceDispatchStorageAdapter | null = null + +export function createWorkspaceDispatchStorageAdapter(): WorkspaceDispatchStorageAdapter { + if (cachedAdapter) { + return cachedAdapter + } + + const redis = getRedisClient() + + if (redis) { + logger.info('Workspace dispatcher: Using Redis storage') + const adapter = new RedisWorkspaceDispatchStorage(redis) + cachedAdapter = adapter + return adapter + } + + logger.warn( + 'Workspace dispatcher: Using in-memory storage; distributed fairness is disabled in multi-process deployments' + ) + const adapter = new MemoryWorkspaceDispatchStorage() + cachedAdapter = adapter + return adapter +} + +export function setWorkspaceDispatchStorageAdapter(adapter: WorkspaceDispatchStorageAdapter): void { + cachedAdapter = adapter +} + +export function resetWorkspaceDispatchStorageAdapter(): void { + if (cachedAdapter) { + cachedAdapter.dispose() + cachedAdapter = null + } +} diff --git a/apps/sim/lib/core/workspace-dispatch/index.ts b/apps/sim/lib/core/workspace-dispatch/index.ts new file mode 100644 index 00000000000..74645372c9f --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/index.ts @@ -0,0 +1,32 @@ +export type { WorkspaceDispatchStorageAdapter } from './adapter' +export { + DispatchQueueFullError, + enqueueWorkspaceDispatch, + waitForDispatchJob, + wakeWorkspaceDispatcher, +} from './dispatcher' +export { + createWorkspaceDispatchStorageAdapter, + resetWorkspaceDispatchStorageAdapter, +} from './factory' +export { + markDispatchJobAdmitted, + markDispatchJobAdmitting, + markDispatchJobCompleted, + markDispatchJobFailed, + markDispatchJobRunning, + refreshWorkspaceLease, + releaseWorkspaceLease, +} from './store' +export { + WORKSPACE_DISPATCH_LANES, + WORKSPACE_DISPATCH_STATUSES, + type WorkspaceDispatchEnqueueInput, + type WorkspaceDispatchJobContext, + type WorkspaceDispatchJobRecord, + type WorkspaceDispatchLane, + type WorkspaceDispatchLeaseInfo, + type WorkspaceDispatchQueueName, + type WorkspaceDispatchStatus, +} from './types' +export { getDispatchRuntimeMetadata, runDispatchedJob } from './worker' diff --git a/apps/sim/lib/core/workspace-dispatch/memory-store.test.ts b/apps/sim/lib/core/workspace-dispatch/memory-store.test.ts new file mode 100644 index 00000000000..87a54de26d1 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/memory-store.test.ts @@ -0,0 +1,65 @@ +/** + * @vitest-environment node + */ +import { afterEach, describe, expect, it } from 'vitest' +import { MemoryWorkspaceDispatchStorage } from '@/lib/core/workspace-dispatch/memory-store' + +describe('memory workspace dispatch storage', () => { + const store = new MemoryWorkspaceDispatchStorage() + + afterEach(async () => { + await store.clear() + }) + + it('claims a runnable job and marks it admitting with a lease', async () => { + const record = await store.enqueueWorkspaceDispatchJob({ + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: { payload: { workflowId: 'workflow-1' } }, + metadata: { + workflowId: 'workflow-1', + }, + }) + + const result = await store.claimWorkspaceJob('workspace-1', { + lanes: ['runtime'], + concurrencyLimit: 1, + leaseId: 'lease-1', + now: Date.now(), + leaseTtlMs: 1000, + }) + + expect(result.type).toBe('admitted') + if (result.type === 'admitted') { + expect(result.record.id).toBe(record.id) + expect(result.record.status).toBe('admitting') + expect(result.record.lease?.leaseId).toBe('lease-1') + } + }) + + it('returns delayed when only delayed jobs exist', async () => { + await store.enqueueWorkspaceDispatchJob({ + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: { payload: { workflowId: 'workflow-1' } }, + metadata: { + workflowId: 'workflow-1', + }, + delayMs: 5000, + }) + + const result = await store.claimWorkspaceJob('workspace-1', { + lanes: ['runtime'], + concurrencyLimit: 1, + leaseId: 'lease-2', + now: Date.now(), + leaseTtlMs: 1000, + }) + + expect(result.type).toBe('delayed') + }) +}) diff --git a/apps/sim/lib/core/workspace-dispatch/memory-store.ts b/apps/sim/lib/core/workspace-dispatch/memory-store.ts new file mode 100644 index 00000000000..2f4e0966bbd --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/memory-store.ts @@ -0,0 +1,478 @@ +import { createLogger } from '@sim/logger' +import type { WorkspaceDispatchStorageAdapter } from '@/lib/core/workspace-dispatch/adapter' +import { + WORKSPACE_DISPATCH_CLAIM_RESULTS, + type WorkspaceDispatchClaimResult, + type WorkspaceDispatchEnqueueInput, + type WorkspaceDispatchJobRecord, + type WorkspaceDispatchLane, +} from '@/lib/core/workspace-dispatch/types' + +const logger = createLogger('WorkspaceDispatchMemoryStore') +const JOB_TTL_MS = 48 * 60 * 60 * 1000 + +export class MemoryWorkspaceDispatchStorage implements WorkspaceDispatchStorageAdapter { + private jobs = new Map() + private workspaceOrder: string[] = [] + private laneQueues = new Map() + private leases = new Map>() + private sequence = 0 + private cleanupInterval: NodeJS.Timeout | null = null + + constructor() { + this.cleanupInterval = setInterval(() => { + void this.clearExpiredState() + }, 60_000) + this.cleanupInterval.unref() + } + + private queueKey(workspaceId: string, lane: WorkspaceDispatchLane): string { + return `${workspaceId}:${lane}` + } + + private ensureWorkspaceQueued(workspaceId: string): void { + if (!this.workspaceOrder.includes(workspaceId)) { + this.workspaceOrder.push(workspaceId) + } + } + + private getLaneQueue(workspaceId: string, lane: WorkspaceDispatchLane): string[] { + const key = this.queueKey(workspaceId, lane) + const existing = this.laneQueues.get(key) + if (existing) { + return existing + } + + const queue: string[] = [] + this.laneQueues.set(key, queue) + return queue + } + + private sortQueue(queue: string[]): void { + queue.sort((leftId, rightId) => { + const left = this.jobs.get(leftId) + const right = this.jobs.get(rightId) + if (!left || !right) { + return 0 + } + + if (left.priority !== right.priority) { + return left.priority - right.priority + } + + return left.createdAt - right.createdAt + }) + } + + private getLeaseMap(workspaceId: string): Map { + const existing = this.leases.get(workspaceId) + if (existing) { + return existing + } + + const leaseMap = new Map() + this.leases.set(workspaceId, leaseMap) + return leaseMap + } + + private async clearExpiredState(): Promise { + const now = Date.now() + + for (const [jobId, record] of this.jobs.entries()) { + if ( + (record.status === 'completed' || record.status === 'failed') && + record.completedAt && + now - record.completedAt > JOB_TTL_MS + ) { + this.jobs.delete(jobId) + } + } + + for (const [workspaceId, leaseMap] of this.leases.entries()) { + for (const [leaseId, expiresAt] of leaseMap.entries()) { + if (expiresAt <= now) { + leaseMap.delete(leaseId) + } + } + if (leaseMap.size === 0) { + this.leases.delete(workspaceId) + } + } + } + + async saveDispatchJob(record: WorkspaceDispatchJobRecord): Promise { + this.jobs.set(record.id, record) + } + + async getDispatchJobRecord(jobId: string): Promise { + return this.jobs.get(jobId) ?? null + } + + async listDispatchJobsByStatuses( + statuses: readonly WorkspaceDispatchJobRecord['status'][] + ): Promise { + return Array.from(this.jobs.values()).filter((record) => statuses.includes(record.status)) + } + + async updateDispatchJobRecord( + jobId: string, + updater: (record: WorkspaceDispatchJobRecord) => WorkspaceDispatchJobRecord + ): Promise { + const current = this.jobs.get(jobId) + if (!current) { + return null + } + + const updated = updater(current) + this.jobs.set(jobId, updated) + return updated + } + + async enqueueWorkspaceDispatchJob( + input: WorkspaceDispatchEnqueueInput + ): Promise { + const id = input.id ?? `dispatch_${crypto.randomUUID().replace(/-/g, '').slice(0, 20)}` + const createdAt = Date.now() + + const record: WorkspaceDispatchJobRecord = { + id, + workspaceId: input.workspaceId, + lane: input.lane, + queueName: input.queueName, + bullmqJobName: input.bullmqJobName, + bullmqPayload: input.bullmqPayload, + metadata: input.metadata, + priority: input.priority ?? 100, + maxAttempts: input.maxAttempts, + delayMs: input.delayMs, + status: 'waiting', + createdAt, + } + + this.jobs.set(id, record) + const queue = this.getLaneQueue(record.workspaceId, record.lane) + queue.push(id) + this.sortQueue(queue) + this.ensureWorkspaceQueued(record.workspaceId) + return record + } + + async restoreWorkspaceDispatchJob(record: WorkspaceDispatchJobRecord): Promise { + this.jobs.set(record.id, record) + const queue = this.getLaneQueue(record.workspaceId, record.lane) + if (!queue.includes(record.id)) { + queue.push(record.id) + this.sortQueue(queue) + } + this.ensureWorkspaceQueued(record.workspaceId) + } + + async claimWorkspaceJob( + workspaceId: string, + options: { + lanes: readonly WorkspaceDispatchLane[] + concurrencyLimit: number + leaseId: string + now: number + leaseTtlMs: number + } + ): Promise { + await this.cleanupExpiredWorkspaceLeases(workspaceId) + if (this.getLeaseMap(workspaceId).size >= options.concurrencyLimit) { + this.ensureWorkspaceQueued(workspaceId) + return { type: WORKSPACE_DISPATCH_CLAIM_RESULTS.LIMIT_REACHED } + } + + let selectedRecord: WorkspaceDispatchJobRecord | null = null + let selectedLane: WorkspaceDispatchLane | null = null + let nextReadyAt: number | null = null + + for (const lane of options.lanes) { + const queue = this.getLaneQueue(workspaceId, lane) + for (let scanIndex = 0; scanIndex < queue.length && scanIndex < 20; ) { + const jobId = queue[scanIndex] + const record = this.jobs.get(jobId) + if (!record) { + queue.splice(scanIndex, 1) + continue + } + + const readyAt = record.createdAt + (record.delayMs ?? 0) + if (readyAt <= options.now) { + selectedRecord = record + selectedLane = lane + queue.splice(scanIndex, 1) + break + } + + nextReadyAt = nextReadyAt ? Math.min(nextReadyAt, readyAt) : readyAt + scanIndex++ + } + + if (selectedRecord) { + break + } + } + + if (!selectedRecord || !selectedLane) { + const hasPending = await this.workspaceHasPendingJobs(workspaceId, options.lanes) + if (!hasPending) { + this.workspaceOrder = this.workspaceOrder.filter((value) => value !== workspaceId) + return { type: WORKSPACE_DISPATCH_CLAIM_RESULTS.EMPTY } + } + + this.ensureWorkspaceQueued(workspaceId) + return { + type: WORKSPACE_DISPATCH_CLAIM_RESULTS.DELAYED, + nextReadyAt: nextReadyAt ?? options.now, + } + } + + const leaseExpiresAt = options.now + options.leaseTtlMs + this.getLeaseMap(workspaceId).set(options.leaseId, leaseExpiresAt) + + const updatedRecord: WorkspaceDispatchJobRecord = { + ...selectedRecord, + status: 'admitting', + lease: { + workspaceId, + leaseId: options.leaseId, + }, + metadata: { + ...selectedRecord.metadata, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + } + this.jobs.set(updatedRecord.id, updatedRecord) + + const hasPending = await this.workspaceHasPendingJobs(workspaceId, options.lanes) + if (hasPending) { + this.ensureWorkspaceQueued(workspaceId) + } else { + this.workspaceOrder = this.workspaceOrder.filter((value) => value !== workspaceId) + } + + return { + type: WORKSPACE_DISPATCH_CLAIM_RESULTS.ADMITTED, + record: updatedRecord, + leaseId: options.leaseId, + leaseExpiresAt, + } + } + + async getWorkspaceQueueDepth( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + let depth = 0 + for (const lane of lanes) { + depth += this.getLaneQueue(workspaceId, lane).length + } + return depth + } + + async getGlobalQueueDepth(): Promise { + const terminalStatuses = new Set(['completed', 'failed']) + let count = 0 + for (const job of this.jobs.values()) { + if (!terminalStatuses.has(job.status)) { + count++ + } + } + return count + } + + async reconcileGlobalQueueDepth(): Promise { + // no-op: memory store computes depth on the fly + } + + async popNextWorkspaceId(): Promise { + return this.workspaceOrder.shift() ?? null + } + + async getQueuedWorkspaceCount(): Promise { + return this.workspaceOrder.length + } + + async hasActiveWorkspace(workspaceId: string): Promise { + return this.workspaceOrder.includes(workspaceId) + } + + async ensureWorkspaceActive(workspaceId: string): Promise { + this.ensureWorkspaceQueued(workspaceId) + } + + async requeueWorkspaceId(workspaceId: string): Promise { + this.ensureWorkspaceQueued(workspaceId) + } + + async workspaceHasPendingJobs( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + return lanes.some((lane) => this.getLaneQueue(workspaceId, lane).length > 0) + } + + async getNextWorkspaceJob( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + for (const lane of lanes) { + const queue = this.getLaneQueue(workspaceId, lane) + while (queue.length > 0) { + const jobId = queue[0] + const job = this.jobs.get(jobId) + if (job) { + return job + } + queue.shift() + } + } + + return null + } + + async removeWorkspaceJobFromLane( + workspaceId: string, + lane: WorkspaceDispatchLane, + jobId: string + ): Promise { + const queue = this.getLaneQueue(workspaceId, lane) + const index = queue.indexOf(jobId) + if (index >= 0) { + queue.splice(index, 1) + } + } + + async cleanupExpiredWorkspaceLeases(workspaceId: string): Promise { + const leaseMap = this.getLeaseMap(workspaceId) + const now = Date.now() + for (const [leaseId, expiresAt] of leaseMap.entries()) { + if (expiresAt <= now) { + leaseMap.delete(leaseId) + } + } + } + + async countActiveWorkspaceLeases(workspaceId: string): Promise { + await this.cleanupExpiredWorkspaceLeases(workspaceId) + return this.getLeaseMap(workspaceId).size + } + + async hasWorkspaceLease(workspaceId: string, leaseId: string): Promise { + await this.cleanupExpiredWorkspaceLeases(workspaceId) + return this.getLeaseMap(workspaceId).has(leaseId) + } + + async createWorkspaceLease(workspaceId: string, leaseId: string, ttlMs: number): Promise { + const expiresAt = Date.now() + ttlMs + this.getLeaseMap(workspaceId).set(leaseId, expiresAt) + return expiresAt + } + + async refreshWorkspaceLease( + workspaceId: string, + leaseId: string, + ttlMs: number + ): Promise { + return this.createWorkspaceLease(workspaceId, leaseId, ttlMs) + } + + async releaseWorkspaceLease(workspaceId: string, leaseId: string): Promise { + this.getLeaseMap(workspaceId).delete(leaseId) + } + + async removeWorkspaceIfIdle( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + const hasPending = await this.workspaceHasPendingJobs(workspaceId, lanes) + if (!hasPending) { + this.workspaceOrder = this.workspaceOrder.filter((value) => value !== workspaceId) + } + } + + async markDispatchJobAdmitted( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number + ): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'admitted', + admittedAt: Date.now(), + lease: { + workspaceId, + leaseId, + }, + metadata: { + ...record.metadata, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + })) + } + + async markDispatchJobAdmitting( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number + ): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'admitting', + lease: { + workspaceId, + leaseId, + }, + metadata: { + ...record.metadata, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + })) + } + + async markDispatchJobRunning(jobId: string): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'running', + startedAt: record.startedAt ?? Date.now(), + })) + } + + async markDispatchJobCompleted(jobId: string, output: unknown): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'completed', + completedAt: Date.now(), + output, + })) + } + + async markDispatchJobFailed(jobId: string, error: string): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'failed', + completedAt: Date.now(), + error, + })) + } + + async clear(): Promise { + this.jobs.clear() + this.workspaceOrder = [] + this.laneQueues.clear() + this.leases.clear() + } + + dispose(): void { + if (this.cleanupInterval) { + clearInterval(this.cleanupInterval) + this.cleanupInterval = null + } + void this.clear().catch((error) => { + logger.error('Failed to clear memory workspace dispatch storage', { error }) + }) + } +} diff --git a/apps/sim/lib/core/workspace-dispatch/planner.ts b/apps/sim/lib/core/workspace-dispatch/planner.ts new file mode 100644 index 00000000000..8ba42f83a0a --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/planner.ts @@ -0,0 +1,154 @@ +import { createLogger } from '@sim/logger' +import { getWorkspaceConcurrencyLimit } from '@/lib/billing/workspace-concurrency' +import { type BullMQJobData, getBullMQQueueByName } from '@/lib/core/bullmq' +import { acquireLock, releaseLock } from '@/lib/core/config/redis' +import { + claimWorkspaceJob, + markDispatchJobAdmitted, + popNextWorkspaceId, + releaseWorkspaceLease, + removeWorkspaceIfIdle, + requeueWorkspaceId, +} from '@/lib/core/workspace-dispatch/store' +import { + WORKSPACE_DISPATCH_CLAIM_RESULTS, + WORKSPACE_DISPATCH_LANES, + type WorkspaceDispatchJobRecord, +} from '@/lib/core/workspace-dispatch/types' + +const logger = createLogger('WorkspaceDispatchPlanner') + +const LEASE_TTL_MS = 15 * 60 * 1000 +const WORKSPACE_CLAIM_LOCK_TTL_SECONDS = 10 + +export const DISPATCH_SCAN_RESULTS = { + NO_WORKSPACE: 'no_workspace', + NO_PROGRESS: 'no_progress', + ADMITTED: 'admitted', +} as const + +export type DispatchScanResult = (typeof DISPATCH_SCAN_RESULTS)[keyof typeof DISPATCH_SCAN_RESULTS] + +function attachDispatchMetadata( + bullmqPayload: unknown, + record: WorkspaceDispatchJobRecord, + leaseId: string, + leaseExpiresAt: number +): BullMQJobData { + if ( + bullmqPayload && + typeof bullmqPayload === 'object' && + 'payload' in bullmqPayload && + 'metadata' in bullmqPayload + ) { + const data = bullmqPayload as BullMQJobData + return { + payload: data.payload, + metadata: { + ...(data.metadata ?? {}), + dispatchJobId: record.id, + dispatchWorkspaceId: record.workspaceId, + dispatchLeaseId: leaseId, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + } + } + + return { + payload: bullmqPayload, + metadata: { + ...record.metadata, + dispatchJobId: record.id, + dispatchWorkspaceId: record.workspaceId, + dispatchLeaseId: leaseId, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + } +} + +async function finalizeAdmittedJob( + record: WorkspaceDispatchJobRecord, + leaseId: string, + leaseExpiresAt: number +): Promise { + try { + await getBullMQQueueByName(record.queueName).add( + record.bullmqJobName, + attachDispatchMetadata(record.bullmqPayload, record, leaseId, leaseExpiresAt), + { + jobId: record.id, + attempts: record.maxAttempts, + priority: record.priority, + } + ) + + await markDispatchJobAdmitted(record.id, record.workspaceId, leaseId, leaseExpiresAt) + } catch (error) { + await releaseWorkspaceLease(record.workspaceId, leaseId).catch(() => undefined) + throw error + } +} + +export async function dispatchNextAdmissibleWorkspaceJob(): Promise { + const workspaceId = await popNextWorkspaceId() + if (!workspaceId) { + return DISPATCH_SCAN_RESULTS.NO_WORKSPACE + } + + const lockValue = `lock_${crypto.randomUUID()}` + try { + const lockKey = `workspace-dispatch:claim-lock:${workspaceId}` + const acquired = await acquireLock(lockKey, lockValue, WORKSPACE_CLAIM_LOCK_TTL_SECONDS) + if (!acquired) { + await requeueWorkspaceId(workspaceId) + return DISPATCH_SCAN_RESULTS.NO_PROGRESS + } + + const limit = await getWorkspaceConcurrencyLimit(workspaceId) + const leaseId = `lease_${crypto.randomUUID()}` + const claimResult = await claimWorkspaceJob(workspaceId, { + lanes: WORKSPACE_DISPATCH_LANES, + concurrencyLimit: limit, + leaseId, + now: Date.now(), + leaseTtlMs: LEASE_TTL_MS, + }) + + switch (claimResult.type) { + case WORKSPACE_DISPATCH_CLAIM_RESULTS.LIMIT_REACHED: + logger.debug('Workspace concurrency limit reached', { workspaceId, limit }) + await requeueWorkspaceId(workspaceId) + return DISPATCH_SCAN_RESULTS.NO_PROGRESS + case WORKSPACE_DISPATCH_CLAIM_RESULTS.DELAYED: + logger.debug('Workspace has only delayed jobs', { + workspaceId, + nextReadyAt: claimResult.nextReadyAt, + }) + return DISPATCH_SCAN_RESULTS.NO_PROGRESS + case WORKSPACE_DISPATCH_CLAIM_RESULTS.EMPTY: + await removeWorkspaceIfIdle(workspaceId, WORKSPACE_DISPATCH_LANES) + return DISPATCH_SCAN_RESULTS.NO_PROGRESS + case WORKSPACE_DISPATCH_CLAIM_RESULTS.ADMITTED: + logger.info('Admitting workspace job', { + workspaceId, + dispatchJobId: claimResult.record.id, + lane: claimResult.record.lane, + queueName: claimResult.record.queueName, + }) + await finalizeAdmittedJob( + claimResult.record, + claimResult.leaseId, + claimResult.leaseExpiresAt + ) + return DISPATCH_SCAN_RESULTS.ADMITTED + } + } catch (error) { + logger.error('Failed to dispatch workspace job', { workspaceId, error }) + await requeueWorkspaceId(workspaceId) + return DISPATCH_SCAN_RESULTS.NO_PROGRESS + } finally { + await releaseLock(`workspace-dispatch:claim-lock:${workspaceId}`, lockValue).catch( + () => undefined + ) + } +} diff --git a/apps/sim/lib/core/workspace-dispatch/reconciler.test.ts b/apps/sim/lib/core/workspace-dispatch/reconciler.test.ts new file mode 100644 index 00000000000..a61d0dc4d1b --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/reconciler.test.ts @@ -0,0 +1,225 @@ +/** + * @vitest-environment node + */ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { + mockGetBullMQQueueByName, + mockHasActiveWorkspace, + mockEnsureWorkspaceActive, + mockHasWorkspaceLease, + mockListDispatchJobsByStatuses, + mockMarkDispatchJobAdmitted, + mockMarkDispatchJobCompleted, + mockMarkDispatchJobFailed, + mockRefreshWorkspaceLease, + mockReleaseWorkspaceLease, + mockRemoveWorkspaceJobFromLane, + mockRestoreWorkspaceDispatchJob, + mockWakeWorkspaceDispatcher, +} = vi.hoisted(() => ({ + mockGetBullMQQueueByName: vi.fn(), + mockHasActiveWorkspace: vi.fn(), + mockEnsureWorkspaceActive: vi.fn(), + mockHasWorkspaceLease: vi.fn(), + mockListDispatchJobsByStatuses: vi.fn(), + mockMarkDispatchJobAdmitted: vi.fn(), + mockMarkDispatchJobCompleted: vi.fn(), + mockMarkDispatchJobFailed: vi.fn(), + mockRefreshWorkspaceLease: vi.fn(), + mockReleaseWorkspaceLease: vi.fn(), + mockRemoveWorkspaceJobFromLane: vi.fn(), + mockRestoreWorkspaceDispatchJob: vi.fn(), + mockWakeWorkspaceDispatcher: vi.fn(), +})) + +vi.mock('@sim/logger', () => ({ + createLogger: () => ({ + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }), +})) + +vi.mock('@/lib/core/bullmq', () => ({ + getBullMQQueueByName: mockGetBullMQQueueByName, +})) + +vi.mock('@/lib/core/workspace-dispatch/store', () => ({ + ensureWorkspaceActive: mockEnsureWorkspaceActive, + hasActiveWorkspace: mockHasActiveWorkspace, + hasWorkspaceLease: mockHasWorkspaceLease, + listDispatchJobsByStatuses: mockListDispatchJobsByStatuses, + markDispatchJobAdmitted: mockMarkDispatchJobAdmitted, + markDispatchJobCompleted: mockMarkDispatchJobCompleted, + markDispatchJobFailed: mockMarkDispatchJobFailed, + reconcileGlobalQueueDepth: vi.fn().mockResolvedValue(undefined), + refreshWorkspaceLease: mockRefreshWorkspaceLease, + releaseWorkspaceLease: mockReleaseWorkspaceLease, + removeWorkspaceJobFromLane: mockRemoveWorkspaceJobFromLane, + restoreWorkspaceDispatchJob: mockRestoreWorkspaceDispatchJob, +})) + +vi.mock('@/lib/core/workspace-dispatch/dispatcher', () => ({ + wakeWorkspaceDispatcher: mockWakeWorkspaceDispatcher, +})) + +import { reconcileWorkspaceDispatchState } from '@/lib/core/workspace-dispatch/reconciler' + +describe('workspace dispatch reconciler', () => { + beforeEach(() => { + vi.clearAllMocks() + mockHasActiveWorkspace.mockResolvedValue(true) + mockRemoveWorkspaceJobFromLane.mockResolvedValue(undefined) + }) + + it('marks dispatch job completed when BullMQ job is completed', async () => { + mockListDispatchJobsByStatuses.mockResolvedValue([ + { + id: 'dispatch-1', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: {}, + priority: 10, + status: 'running', + createdAt: 1, + lease: { + workspaceId: 'workspace-1', + leaseId: 'lease-1', + }, + }, + ]) + mockGetBullMQQueueByName.mockReturnValue({ + getJob: vi.fn().mockResolvedValue({ + getState: vi.fn().mockResolvedValue('completed'), + returnvalue: { ok: true }, + }), + }) + + await reconcileWorkspaceDispatchState() + + expect(mockMarkDispatchJobCompleted).toHaveBeenCalledWith('dispatch-1', { ok: true }) + expect(mockReleaseWorkspaceLease).toHaveBeenCalledWith('workspace-1', 'lease-1') + expect(mockWakeWorkspaceDispatcher).toHaveBeenCalled() + }) + + it('restores admitted jobs to waiting when lease and BullMQ job are gone', async () => { + mockListDispatchJobsByStatuses.mockResolvedValue([ + { + id: 'dispatch-2', + workspaceId: 'workspace-2', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: {}, + priority: 10, + status: 'admitted', + createdAt: 1, + admittedAt: 2, + lease: { + workspaceId: 'workspace-2', + leaseId: 'lease-2', + }, + }, + ]) + mockGetBullMQQueueByName.mockReturnValue({ + getJob: vi.fn().mockResolvedValue(null), + }) + mockHasWorkspaceLease.mockResolvedValue(false) + + await reconcileWorkspaceDispatchState() + + expect(mockRestoreWorkspaceDispatchJob).toHaveBeenCalledWith( + expect.objectContaining({ + id: 'dispatch-2', + status: 'waiting', + lease: undefined, + }) + ) + expect(mockWakeWorkspaceDispatcher).toHaveBeenCalled() + }) + + it('reacquires the lease for a live admitting BullMQ job', async () => { + mockListDispatchJobsByStatuses.mockResolvedValue([ + { + id: 'dispatch-3', + workspaceId: 'workspace-3', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: { + dispatchLeaseExpiresAt: 12345, + }, + priority: 10, + status: 'admitting', + createdAt: 1, + lease: { + workspaceId: 'workspace-3', + leaseId: 'lease-3', + }, + }, + ]) + mockGetBullMQQueueByName.mockReturnValue({ + getJob: vi.fn().mockResolvedValue({ + getState: vi.fn().mockResolvedValue('active'), + }), + }) + mockHasWorkspaceLease.mockResolvedValue(false) + + await reconcileWorkspaceDispatchState() + + expect(mockRefreshWorkspaceLease).toHaveBeenCalledWith('workspace-3', 'lease-3', 15 * 60 * 1000) + expect(mockMarkDispatchJobAdmitted).toHaveBeenCalledWith( + 'dispatch-3', + 'workspace-3', + 'lease-3', + 12345 + ) + expect(mockRemoveWorkspaceJobFromLane).toHaveBeenCalledWith( + 'workspace-3', + 'runtime', + 'dispatch-3' + ) + }) + + it('releases leaked lease and restores waiting when BullMQ job is gone but lease remains', async () => { + mockListDispatchJobsByStatuses.mockResolvedValue([ + { + id: 'dispatch-4', + workspaceId: 'workspace-4', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: {}, + priority: 10, + status: 'running', + createdAt: 1, + lease: { + workspaceId: 'workspace-4', + leaseId: 'lease-4', + }, + }, + ]) + mockGetBullMQQueueByName.mockReturnValue({ + getJob: vi.fn().mockResolvedValue(null), + }) + mockHasWorkspaceLease.mockResolvedValue(true) + + await reconcileWorkspaceDispatchState() + + expect(mockReleaseWorkspaceLease).toHaveBeenCalledWith('workspace-4', 'lease-4') + expect(mockRestoreWorkspaceDispatchJob).toHaveBeenCalledWith( + expect.objectContaining({ + id: 'dispatch-4', + status: 'waiting', + }) + ) + }) +}) diff --git a/apps/sim/lib/core/workspace-dispatch/reconciler.ts b/apps/sim/lib/core/workspace-dispatch/reconciler.ts new file mode 100644 index 00000000000..af67edb522a --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/reconciler.ts @@ -0,0 +1,196 @@ +import { createLogger } from '@sim/logger' +import { getBullMQQueueByName } from '@/lib/core/bullmq' +import { + ensureWorkspaceActive, + hasActiveWorkspace, + hasWorkspaceLease, + listDispatchJobsByStatuses, + markDispatchJobAdmitted, + markDispatchJobCompleted, + markDispatchJobFailed, + markDispatchJobRunning, + reconcileGlobalQueueDepth, + refreshWorkspaceLease, + releaseWorkspaceLease, + removeWorkspaceJobFromLane, + restoreWorkspaceDispatchJob, +} from '@/lib/core/workspace-dispatch/store' +import type { WorkspaceDispatchJobRecord } from '@/lib/core/workspace-dispatch/types' +import { wakeWorkspaceDispatcher } from './dispatcher' + +const logger = createLogger('WorkspaceDispatchReconciler') +const LEASE_TTL_MS = 15 * 60 * 1000 + +function resetToWaiting(record: WorkspaceDispatchJobRecord): WorkspaceDispatchJobRecord { + return { + ...record, + status: 'waiting', + admittedAt: undefined, + startedAt: undefined, + completedAt: undefined, + output: undefined, + error: undefined, + lease: undefined, + } +} + +async function reconcileTerminalBullMQState(record: WorkspaceDispatchJobRecord): Promise { + const queue = getBullMQQueueByName(record.queueName) + const job = await queue.getJob(record.id) + if (!job) { + return false + } + + const state = await job.getState() + if (state === 'completed') { + await markDispatchJobCompleted(record.id, job.returnvalue) + if (record.lease) { + await releaseWorkspaceLease(record.lease.workspaceId, record.lease.leaseId) + } + return true + } + + if (state === 'failed' && job.attemptsMade >= (job.opts.attempts ?? 1)) { + await markDispatchJobFailed(record.id, job.failedReason || 'Job failed') + if (record.lease) { + await releaseWorkspaceLease(record.lease.workspaceId, record.lease.leaseId) + } + return true + } + + return false +} + +async function reconcileStrandedDispatchJob(record: WorkspaceDispatchJobRecord): Promise { + if (!record.lease && record.status !== 'waiting') { + await restoreWorkspaceDispatchJob(resetToWaiting(record)) + return true + } + + if (!record.lease) { + return false + } + + const hasLease = await hasWorkspaceLease(record.lease.workspaceId, record.lease.leaseId) + const queue = getBullMQQueueByName(record.queueName) + const job = await queue.getJob(record.id) + if (hasLease) { + if (!job) { + await releaseWorkspaceLease(record.lease.workspaceId, record.lease.leaseId) + await restoreWorkspaceDispatchJob(resetToWaiting(record)) + return true + } + + return false + } + + if (job) { + if (record.status === 'admitting') { + await refreshWorkspaceLease(record.lease.workspaceId, record.lease.leaseId, LEASE_TTL_MS) + await markDispatchJobAdmitted( + record.id, + record.lease.workspaceId, + record.lease.leaseId, + (record.metadata as { dispatchLeaseExpiresAt?: number }).dispatchLeaseExpiresAt ?? + Date.now() + ) + await removeWorkspaceJobFromLane(record.workspaceId, record.lane, record.id).catch( + () => undefined + ) + return true + } + await refreshWorkspaceLease(record.lease.workspaceId, record.lease.leaseId, LEASE_TTL_MS) + if (record.status === 'admitted') { + await markDispatchJobRunning(record.id) + return true + } + return false + } + + await restoreWorkspaceDispatchJob(resetToWaiting(record)) + return true +} + +async function reconcileTerminalDispatchLease( + record: WorkspaceDispatchJobRecord +): Promise { + if ((record.status !== 'completed' && record.status !== 'failed') || !record.lease) { + return false + } + + const hasLease = await hasWorkspaceLease(record.lease.workspaceId, record.lease.leaseId) + if (!hasLease) { + return false + } + + await releaseWorkspaceLease(record.lease.workspaceId, record.lease.leaseId) + return true +} + +async function reconcileWaitingWorkspaceTracking( + waitingJobs: WorkspaceDispatchJobRecord[] +): Promise { + let changed = false + const earliestByWorkspace = new Map() + + for (const record of waitingJobs) { + const readyAt = record.createdAt + (record.delayMs ?? 0) + const current = earliestByWorkspace.get(record.workspaceId) + if (current === undefined || readyAt < current) { + earliestByWorkspace.set(record.workspaceId, readyAt) + } + } + + for (const [workspaceId, nextReadyAt] of earliestByWorkspace.entries()) { + const active = await hasActiveWorkspace(workspaceId) + if (!active) { + await ensureWorkspaceActive(workspaceId, nextReadyAt) + changed = true + } + } + + return changed +} + +export async function reconcileWorkspaceDispatchState(): Promise { + const activeJobs = await listDispatchJobsByStatuses(['admitting', 'admitted', 'running']) + const waitingJobs = await listDispatchJobsByStatuses(['waiting']) + const terminalJobs = await listDispatchJobsByStatuses(['completed', 'failed']) + let changed = false + + for (const record of activeJobs) { + const terminal = await reconcileTerminalBullMQState(record) + if (terminal) { + changed = true + continue + } + + const restored = await reconcileStrandedDispatchJob(record) + if (restored) { + changed = true + } + } + + if (await reconcileWaitingWorkspaceTracking(waitingJobs)) { + changed = true + } + + for (const record of terminalJobs) { + if (await reconcileTerminalDispatchLease(record)) { + changed = true + } + } + + await reconcileGlobalQueueDepth().catch((error) => { + logger.error('Failed to reconcile global queue depth', { error }) + }) + + if (changed) { + logger.info('Workspace dispatch reconciliation updated state', { + activeJobsInspected: activeJobs.length, + waitingJobsInspected: waitingJobs.length, + terminalJobsInspected: terminalJobs.length, + }) + await wakeWorkspaceDispatcher() + } +} diff --git a/apps/sim/lib/core/workspace-dispatch/redis-store.ts b/apps/sim/lib/core/workspace-dispatch/redis-store.ts new file mode 100644 index 00000000000..82ac3202803 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/redis-store.ts @@ -0,0 +1,574 @@ +import { createLogger } from '@sim/logger' +import type Redis from 'ioredis' +import type { WorkspaceDispatchStorageAdapter } from '@/lib/core/workspace-dispatch/adapter' +import { + WORKSPACE_DISPATCH_CLAIM_RESULTS, + type WorkspaceDispatchClaimResult, + type WorkspaceDispatchEnqueueInput, + type WorkspaceDispatchJobRecord, + type WorkspaceDispatchLane, +} from '@/lib/core/workspace-dispatch/types' + +const logger = createLogger('WorkspaceDispatchRedisStore') + +const DISPATCH_PREFIX = 'workspace-dispatch:v1' +const JOB_TTL_SECONDS = 48 * 60 * 60 +const SEQUENCE_KEY = `${DISPATCH_PREFIX}:sequence` +const ACTIVE_WORKSPACES_KEY = `${DISPATCH_PREFIX}:workspaces` +const GLOBAL_DEPTH_KEY = `${DISPATCH_PREFIX}:global-depth` +const CLAIM_JOB_SCRIPT = ` +local workspaceId = ARGV[1] +local now = tonumber(ARGV[2]) +local concurrencyLimit = tonumber(ARGV[3]) +local leaseId = ARGV[4] +local leaseExpiresAt = tonumber(ARGV[5]) +local lanes = cjson.decode(ARGV[6]) +local sequenceKey = ARGV[7] +local activeWorkspacesKey = ARGV[8] +local jobPrefix = ARGV[9] +local workspacePrefix = ARGV[10] +local jobTtlSeconds = tonumber(ARGV[11]) + +local function laneKey(lane) + return workspacePrefix .. workspaceId .. ':lane:' .. lane +end + +local function leaseKey() + return workspacePrefix .. workspaceId .. ':leases' +end + +local function workspaceHasPending() + local minReadyAt = nil + local hasPending = false + + for _, lane in ipairs(lanes) do + local ids = redis.call('ZRANGE', laneKey(lane), 0, 0) + if #ids > 0 then + local raw = redis.call('GET', jobPrefix .. ids[1]) + if raw then + hasPending = true + local record = cjson.decode(raw) + local readyAt = (record.createdAt or 0) + (record.delayMs or 0) + if (minReadyAt == nil) or (readyAt < minReadyAt) then + minReadyAt = readyAt + end + else + redis.call('ZREM', laneKey(lane), ids[1]) + end + end + end + + return hasPending, minReadyAt +end + +redis.call('ZREMRANGEBYSCORE', leaseKey(), 0, now) +local activeLeaseCount = redis.call('ZCARD', leaseKey()) +if activeLeaseCount >= concurrencyLimit then + return cjson.encode({ type = 'limit_reached' }) +end + +local selectedId = nil +local selectedLane = nil +local selectedRecord = nil +local delayedNextReadyAt = nil + +local maxScanPerLane = 20 + +for _, lane in ipairs(lanes) do + local ids = redis.call('ZRANGE', laneKey(lane), 0, maxScanPerLane - 1) + for _, candidateId in ipairs(ids) do + local raw = redis.call('GET', jobPrefix .. candidateId) + if raw then + local record = cjson.decode(raw) + local readyAt = (record.createdAt or 0) + (record.delayMs or 0) + if readyAt <= now then + selectedId = candidateId + selectedLane = lane + selectedRecord = record + break + end + + if (delayedNextReadyAt == nil) or (readyAt < delayedNextReadyAt) then + delayedNextReadyAt = readyAt + end + else + redis.call('ZREM', laneKey(lane), candidateId) + end + end + + if selectedRecord then + break + end +end + +if selectedRecord == nil then + local hasPending, minReadyAt = workspaceHasPending() + if not hasPending then + return cjson.encode({ type = 'empty' }) + end + + local sequence = redis.call('INCR', sequenceKey) + local score = sequence + if minReadyAt ~= nil and minReadyAt > now then + score = minReadyAt * 1000000 + sequence + end + redis.call('ZADD', activeWorkspacesKey, score, workspaceId) + + return cjson.encode({ + type = 'delayed', + nextReadyAt = delayedNextReadyAt or minReadyAt or now + }) +end + +redis.call('ZADD', leaseKey(), leaseExpiresAt, leaseId) +selectedRecord.status = 'admitting' +selectedRecord.lease = { + workspaceId = workspaceId, + leaseId = leaseId +} +if selectedRecord.metadata == nil then + selectedRecord.metadata = {} +end +selectedRecord.metadata.dispatchLeaseExpiresAt = leaseExpiresAt + +redis.call('SET', jobPrefix .. selectedId, cjson.encode(selectedRecord), 'EX', jobTtlSeconds) +redis.call('ZREM', laneKey(selectedLane), selectedId) + +local hasPending, minReadyAt = workspaceHasPending() +if hasPending then + local sequence = redis.call('INCR', sequenceKey) + local score = sequence + if minReadyAt ~= nil and minReadyAt > now then + score = minReadyAt * 1000000 + sequence + end + redis.call('ZADD', activeWorkspacesKey, score, workspaceId) +end + +return cjson.encode({ + type = 'admitted', + record = selectedRecord, + leaseId = leaseId, + leaseExpiresAt = leaseExpiresAt +}) +` + +function jobKey(jobId: string): string { + return `${DISPATCH_PREFIX}:job:${jobId}` +} + +function workspaceLaneKey(workspaceId: string, lane: WorkspaceDispatchLane): string { + return `${DISPATCH_PREFIX}:workspace:${workspaceId}:lane:${lane}` +} + +function workspaceLeaseKey(workspaceId: string): string { + return `${DISPATCH_PREFIX}:workspace:${workspaceId}:leases` +} + +function createPriorityScore(priority: number, sequence: number): number { + return priority * 1_000_000_000_000 + sequence +} + +export class RedisWorkspaceDispatchStorage implements WorkspaceDispatchStorageAdapter { + constructor(private redis: Redis) {} + + private async nextSequence(): Promise { + return this.redis.incr(SEQUENCE_KEY) + } + + async saveDispatchJob(record: WorkspaceDispatchJobRecord): Promise { + await this.redis.set(jobKey(record.id), JSON.stringify(record), 'EX', JOB_TTL_SECONDS) + } + + async getDispatchJobRecord(jobId: string): Promise { + const raw = await this.redis.get(jobKey(jobId)) + if (!raw) { + return null + } + + try { + return JSON.parse(raw) as WorkspaceDispatchJobRecord + } catch (error) { + logger.warn('Corrupted dispatch job record, deleting', { jobId, error }) + await this.redis.del(jobKey(jobId)) + return null + } + } + + async listDispatchJobsByStatuses( + statuses: readonly WorkspaceDispatchJobRecord['status'][] + ): Promise { + let cursor = '0' + const jobs: WorkspaceDispatchJobRecord[] = [] + + do { + const [nextCursor, keys] = await this.redis.scan( + cursor, + 'MATCH', + `${DISPATCH_PREFIX}:job:*`, + 'COUNT', + 100 + ) + cursor = nextCursor + + if (keys.length === 0) { + continue + } + + const values = await this.redis.mget(...keys) + for (const value of values) { + if (!value) { + continue + } + try { + const record = JSON.parse(value) as WorkspaceDispatchJobRecord + if (statuses.includes(record.status)) { + jobs.push(record) + } + } catch { + // Best effort during reconciliation scans. + } + } + } while (cursor !== '0') + + return jobs + } + + async updateDispatchJobRecord( + jobId: string, + updater: (record: WorkspaceDispatchJobRecord) => WorkspaceDispatchJobRecord + ): Promise { + const current = await this.getDispatchJobRecord(jobId) + if (!current) { + return null + } + + const updated = updater(current) + await this.saveDispatchJob(updated) + return updated + } + + async enqueueWorkspaceDispatchJob( + input: WorkspaceDispatchEnqueueInput + ): Promise { + const id = input.id ?? `dispatch_${crypto.randomUUID().replace(/-/g, '').slice(0, 20)}` + const createdAt = Date.now() + const sequence = await this.nextSequence() + + const record: WorkspaceDispatchJobRecord = { + id, + workspaceId: input.workspaceId, + lane: input.lane, + queueName: input.queueName, + bullmqJobName: input.bullmqJobName, + bullmqPayload: input.bullmqPayload, + metadata: input.metadata, + priority: input.priority ?? 100, + maxAttempts: input.maxAttempts, + delayMs: input.delayMs, + status: 'waiting', + createdAt, + } + + const score = createPriorityScore(record.priority, sequence) + const pipeline = this.redis.pipeline() + pipeline.set(jobKey(id), JSON.stringify(record), 'EX', JOB_TTL_SECONDS) + pipeline.zadd(workspaceLaneKey(record.workspaceId, record.lane), score, id) + pipeline.zadd(ACTIVE_WORKSPACES_KEY, 'NX', sequence, record.workspaceId) + pipeline.incr(GLOBAL_DEPTH_KEY) + await pipeline.exec() + + return record + } + + async restoreWorkspaceDispatchJob(record: WorkspaceDispatchJobRecord): Promise { + const sequence = await this.nextSequence() + const score = createPriorityScore(record.priority, sequence) + const pipeline = this.redis.pipeline() + pipeline.set(jobKey(record.id), JSON.stringify(record), 'EX', JOB_TTL_SECONDS) + pipeline.zadd(workspaceLaneKey(record.workspaceId, record.lane), score, record.id) + pipeline.zadd(ACTIVE_WORKSPACES_KEY, 'NX', sequence, record.workspaceId) + await pipeline.exec() + } + + async claimWorkspaceJob( + workspaceId: string, + options: { + lanes: readonly WorkspaceDispatchLane[] + concurrencyLimit: number + leaseId: string + now: number + leaseTtlMs: number + } + ): Promise { + const raw = await this.redis.eval( + CLAIM_JOB_SCRIPT, + 0, + workspaceId, + String(options.now), + String(options.concurrencyLimit), + options.leaseId, + String(options.now + options.leaseTtlMs), + JSON.stringify(options.lanes), + SEQUENCE_KEY, + ACTIVE_WORKSPACES_KEY, + `${DISPATCH_PREFIX}:job:`, + `${DISPATCH_PREFIX}:workspace:`, + String(JOB_TTL_SECONDS) + ) + + const parsed = JSON.parse(String(raw)) as WorkspaceDispatchClaimResult + switch (parsed.type) { + case WORKSPACE_DISPATCH_CLAIM_RESULTS.ADMITTED: + case WORKSPACE_DISPATCH_CLAIM_RESULTS.DELAYED: + case WORKSPACE_DISPATCH_CLAIM_RESULTS.LIMIT_REACHED: + case WORKSPACE_DISPATCH_CLAIM_RESULTS.EMPTY: + return parsed + default: + throw new Error( + `Unknown dispatch claim result: ${String((parsed as { type?: string }).type)}` + ) + } + } + + async getWorkspaceQueueDepth( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + if (lanes.length === 0) return 0 + const pipeline = this.redis.pipeline() + for (const lane of lanes) { + pipeline.zcard(workspaceLaneKey(workspaceId, lane)) + } + const results = await pipeline.exec() + let depth = 0 + for (const result of results ?? []) { + if (result && !result[0]) { + depth += (result[1] as number) ?? 0 + } + } + return depth + } + + async getGlobalQueueDepth(): Promise { + const count = await this.redis.get(GLOBAL_DEPTH_KEY) + return count ? Math.max(0, Number.parseInt(count, 10)) : 0 + } + + async reconcileGlobalQueueDepth(): Promise { + const allJobs = await this.listDispatchJobsByStatuses([ + 'waiting', + 'admitting', + 'admitted', + 'running', + ]) + await this.redis.set(GLOBAL_DEPTH_KEY, allJobs.length) + } + + async popNextWorkspaceId(): Promise { + const result = await this.redis.zpopmin(ACTIVE_WORKSPACES_KEY) + if (!result || result.length === 0) { + return null + } + + return result[0] ?? null + } + + async getQueuedWorkspaceCount(): Promise { + return this.redis.zcard(ACTIVE_WORKSPACES_KEY) + } + + async hasActiveWorkspace(workspaceId: string): Promise { + return (await this.redis.zscore(ACTIVE_WORKSPACES_KEY, workspaceId)) !== null + } + + async ensureWorkspaceActive(workspaceId: string, readyAt?: number): Promise { + const sequence = await this.nextSequence() + const score = readyAt && readyAt > Date.now() ? readyAt * 1_000_000 + sequence : sequence + await this.redis.zadd(ACTIVE_WORKSPACES_KEY, 'NX', score, workspaceId) + } + + async requeueWorkspaceId(workspaceId: string): Promise { + const sequence = await this.nextSequence() + await this.redis.zadd(ACTIVE_WORKSPACES_KEY, sequence, workspaceId) + } + + async workspaceHasPendingJobs( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + for (const lane of lanes) { + const count = await this.redis.zcard(workspaceLaneKey(workspaceId, lane)) + if (count > 0) { + return true + } + } + + return false + } + + async getNextWorkspaceJob( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + for (const lane of lanes) { + const ids = await this.redis.zrange(workspaceLaneKey(workspaceId, lane), 0, 0) + if (ids.length === 0) { + continue + } + + const record = await this.getDispatchJobRecord(ids[0]) + if (!record) { + await this.redis.zrem(workspaceLaneKey(workspaceId, lane), ids[0]) + continue + } + + return record + } + + return null + } + + async removeWorkspaceJobFromLane( + workspaceId: string, + lane: WorkspaceDispatchLane, + jobId: string + ): Promise { + await this.redis.zrem(workspaceLaneKey(workspaceId, lane), jobId) + } + + async cleanupExpiredWorkspaceLeases(workspaceId: string): Promise { + await this.redis.zremrangebyscore(workspaceLeaseKey(workspaceId), 0, Date.now()) + } + + async countActiveWorkspaceLeases(workspaceId: string): Promise { + await this.cleanupExpiredWorkspaceLeases(workspaceId) + return this.redis.zcard(workspaceLeaseKey(workspaceId)) + } + + async hasWorkspaceLease(workspaceId: string, leaseId: string): Promise { + await this.cleanupExpiredWorkspaceLeases(workspaceId) + return (await this.redis.zscore(workspaceLeaseKey(workspaceId), leaseId)) !== null + } + + async createWorkspaceLease(workspaceId: string, leaseId: string, ttlMs: number): Promise { + const expiresAt = Date.now() + ttlMs + await this.redis.zadd(workspaceLeaseKey(workspaceId), expiresAt, leaseId) + return expiresAt + } + + async refreshWorkspaceLease( + workspaceId: string, + leaseId: string, + ttlMs: number + ): Promise { + return this.createWorkspaceLease(workspaceId, leaseId, ttlMs) + } + + async releaseWorkspaceLease(workspaceId: string, leaseId: string): Promise { + await this.redis.zrem(workspaceLeaseKey(workspaceId), leaseId) + } + + async removeWorkspaceIfIdle( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + const hasPendingJobs = await this.workspaceHasPendingJobs(workspaceId, lanes) + if (!hasPendingJobs) { + await this.redis.zrem(ACTIVE_WORKSPACES_KEY, workspaceId) + } + } + + async markDispatchJobAdmitted( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number + ): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'admitted', + admittedAt: Date.now(), + lease: { + workspaceId, + leaseId, + }, + metadata: { + ...record.metadata, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + })) + } + + async markDispatchJobAdmitting( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number + ): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'admitting', + lease: { + workspaceId, + leaseId, + }, + metadata: { + ...record.metadata, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + })) + } + + async markDispatchJobRunning(jobId: string): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'running', + startedAt: record.startedAt ?? Date.now(), + })) + } + + async markDispatchJobCompleted(jobId: string, output: unknown): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'completed', + completedAt: Date.now(), + output, + })) + await this.redis.decr(GLOBAL_DEPTH_KEY).catch(() => undefined) + } + + async markDispatchJobFailed(jobId: string, error: string): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'failed', + completedAt: Date.now(), + error, + })) + await this.redis.decr(GLOBAL_DEPTH_KEY).catch(() => undefined) + } + + async clear(): Promise { + let cursor = '0' + const keys: string[] = [] + + do { + const [nextCursor, foundKeys] = await this.redis.scan( + cursor, + 'MATCH', + `${DISPATCH_PREFIX}:*`, + 'COUNT', + 100 + ) + cursor = nextCursor + keys.push(...foundKeys) + } while (cursor !== '0') + + if (keys.length > 0) { + await this.redis.del(...keys) + } + } + + dispose(): void { + logger.info('Redis workspace dispatch storage disposed') + } +} diff --git a/apps/sim/lib/core/workspace-dispatch/status.test.ts b/apps/sim/lib/core/workspace-dispatch/status.test.ts new file mode 100644 index 00000000000..e72e210b18d --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/status.test.ts @@ -0,0 +1,102 @@ +/** + * @vitest-environment node + */ +import { describe, expect, it } from 'vitest' +import { presentDispatchOrJobStatus } from '@/lib/core/workspace-dispatch/status' + +describe('workspace dispatch status presentation', () => { + it('presents waiting dispatch jobs with queue metadata', () => { + const result = presentDispatchOrJobStatus( + { + id: 'dispatch-1', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: { workflowId: 'workflow-1' }, + priority: 10, + status: 'waiting', + createdAt: 1000, + }, + null + ) + + expect(result).toEqual({ + status: 'waiting', + metadata: { + createdAt: new Date(1000), + admittedAt: undefined, + startedAt: undefined, + completedAt: undefined, + queueName: 'workflow-execution', + lane: 'runtime', + workspaceId: 'workspace-1', + }, + estimatedDuration: 300000, + }) + }) + + it('presents admitting dispatch jobs distinctly', () => { + const result = presentDispatchOrJobStatus( + { + id: 'dispatch-1a', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: { workflowId: 'workflow-1' }, + priority: 10, + status: 'admitting', + createdAt: 1000, + }, + null + ) + + expect(result.status).toBe('admitting') + expect(result.estimatedDuration).toBe(300000) + }) + + it('presents completed dispatch jobs with output and duration', () => { + const result = presentDispatchOrJobStatus( + { + id: 'dispatch-2', + workspaceId: 'workspace-1', + lane: 'interactive', + queueName: 'workflow-execution', + bullmqJobName: 'direct-workflow-execution', + bullmqPayload: {}, + metadata: { workflowId: 'workflow-1' }, + priority: 1, + status: 'completed', + createdAt: 1000, + admittedAt: 1500, + startedAt: 2000, + completedAt: 7000, + output: { success: true }, + }, + null + ) + + expect(result.status).toBe('completed') + expect(result.output).toEqual({ success: true }) + expect(result.metadata.duration).toBe(5000) + }) + + it('falls back to legacy job status when no dispatch record exists', () => { + const result = presentDispatchOrJobStatus(null, { + id: 'job-1', + type: 'workflow-execution', + payload: {}, + status: 'pending', + createdAt: new Date(1000), + attempts: 0, + maxAttempts: 3, + metadata: {}, + }) + + expect(result.status).toBe('queued') + expect(result.estimatedDuration).toBe(300000) + }) +}) diff --git a/apps/sim/lib/core/workspace-dispatch/status.ts b/apps/sim/lib/core/workspace-dispatch/status.ts new file mode 100644 index 00000000000..fc5d934434c --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/status.ts @@ -0,0 +1,110 @@ +import type { Job, JobStatus } from '@/lib/core/async-jobs/types' +import type { WorkspaceDispatchJobRecord } from '@/lib/core/workspace-dispatch/types' + +export type DispatchPresentedStatus = + | 'waiting' + | 'admitting' + | 'admitted' + | 'running' + | 'completed' + | 'failed' + | 'queued' + | JobStatus + +export interface DispatchStatusPresentation { + status: DispatchPresentedStatus + metadata: { + createdAt?: Date + admittedAt?: Date + startedAt?: Date + completedAt?: Date + queueName?: string + lane?: string + workspaceId?: string + duration?: number + } + output?: unknown + error?: string + estimatedDuration?: number +} + +export function presentDispatchOrJobStatus( + dispatchJob: WorkspaceDispatchJobRecord | null, + job: Job | null +): DispatchStatusPresentation { + if (dispatchJob) { + const startedAt = dispatchJob.startedAt ? new Date(dispatchJob.startedAt) : undefined + const completedAt = dispatchJob.completedAt ? new Date(dispatchJob.completedAt) : undefined + + const response: DispatchStatusPresentation = { + status: dispatchJob.status, + metadata: { + createdAt: new Date(dispatchJob.createdAt), + admittedAt: dispatchJob.admittedAt ? new Date(dispatchJob.admittedAt) : undefined, + startedAt, + completedAt, + queueName: dispatchJob.queueName, + lane: dispatchJob.lane, + workspaceId: dispatchJob.workspaceId, + }, + } + + if (startedAt && completedAt) { + response.metadata.duration = completedAt.getTime() - startedAt.getTime() + } + + if (dispatchJob.status === 'completed') { + response.output = dispatchJob.output + } + + if (dispatchJob.status === 'failed') { + response.error = dispatchJob.error + } + + if ( + dispatchJob.status === 'waiting' || + dispatchJob.status === 'admitting' || + dispatchJob.status === 'admitted' || + dispatchJob.status === 'running' + ) { + response.estimatedDuration = 300000 + } + + return response + } + + if (!job) { + return { + status: 'queued', + metadata: {}, + } + } + + const mappedStatus = job.status === 'pending' ? 'queued' : job.status + const response: DispatchStatusPresentation = { + status: mappedStatus, + metadata: { + createdAt: job.createdAt, + startedAt: job.startedAt, + completedAt: job.completedAt, + }, + } + + if (job.startedAt && job.completedAt) { + response.metadata.duration = job.completedAt.getTime() - job.startedAt.getTime() + } + + if (job.status === 'completed') { + response.output = job.output + } + + if (job.status === 'failed') { + response.error = job.error + } + + if (job.status === 'processing' || job.status === 'pending') { + response.estimatedDuration = 300000 + } + + return response +} diff --git a/apps/sim/lib/core/workspace-dispatch/store.ts b/apps/sim/lib/core/workspace-dispatch/store.ts new file mode 100644 index 00000000000..9bc7f0bebe9 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/store.ts @@ -0,0 +1,193 @@ +import type { WorkspaceDispatchStorageAdapter } from '@/lib/core/workspace-dispatch/adapter' +import { + setWorkspaceDispatchStorageAdapter as _setAdapter, + createWorkspaceDispatchStorageAdapter, +} from '@/lib/core/workspace-dispatch/factory' +import type { + WorkspaceDispatchClaimResult, + WorkspaceDispatchEnqueueInput, + WorkspaceDispatchJobRecord, + WorkspaceDispatchLane, +} from '@/lib/core/workspace-dispatch/types' + +function getAdapter() { + return createWorkspaceDispatchStorageAdapter() +} + +export function setWorkspaceDispatchStorageAdapter(adapter: WorkspaceDispatchStorageAdapter): void { + _setAdapter(adapter) +} + +export async function saveDispatchJob(record: WorkspaceDispatchJobRecord): Promise { + return getAdapter().saveDispatchJob(record) +} + +export async function getDispatchJobRecord( + jobId: string +): Promise { + return getAdapter().getDispatchJobRecord(jobId) +} + +export async function listDispatchJobsByStatuses( + statuses: readonly WorkspaceDispatchJobRecord['status'][] +): Promise { + return getAdapter().listDispatchJobsByStatuses(statuses) +} + +export async function updateDispatchJobRecord( + jobId: string, + updater: (record: WorkspaceDispatchJobRecord) => WorkspaceDispatchJobRecord +): Promise { + return getAdapter().updateDispatchJobRecord(jobId, updater) +} + +export async function enqueueWorkspaceDispatchJob( + input: WorkspaceDispatchEnqueueInput +): Promise { + return getAdapter().enqueueWorkspaceDispatchJob(input) +} + +export async function restoreWorkspaceDispatchJob( + record: WorkspaceDispatchJobRecord +): Promise { + return getAdapter().restoreWorkspaceDispatchJob(record) +} + +export async function claimWorkspaceJob( + workspaceId: string, + options: { + lanes: readonly WorkspaceDispatchLane[] + concurrencyLimit: number + leaseId: string + now: number + leaseTtlMs: number + } +): Promise { + return getAdapter().claimWorkspaceJob(workspaceId, options) +} + +export async function getWorkspaceQueueDepth( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] +): Promise { + return getAdapter().getWorkspaceQueueDepth(workspaceId, lanes) +} + +export async function getGlobalQueueDepth(): Promise { + return getAdapter().getGlobalQueueDepth() +} + +export async function reconcileGlobalQueueDepth(): Promise { + return getAdapter().reconcileGlobalQueueDepth() +} + +export async function popNextWorkspaceId(): Promise { + return getAdapter().popNextWorkspaceId() +} + +export async function getQueuedWorkspaceCount(): Promise { + return getAdapter().getQueuedWorkspaceCount() +} + +export async function hasActiveWorkspace(workspaceId: string): Promise { + return getAdapter().hasActiveWorkspace(workspaceId) +} + +export async function ensureWorkspaceActive(workspaceId: string, readyAt?: number): Promise { + return getAdapter().ensureWorkspaceActive(workspaceId, readyAt) +} + +export async function requeueWorkspaceId(workspaceId: string): Promise { + return getAdapter().requeueWorkspaceId(workspaceId) +} + +export async function workspaceHasPendingJobs( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] +): Promise { + return getAdapter().workspaceHasPendingJobs(workspaceId, lanes) +} + +export async function getNextWorkspaceJob( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] +): Promise { + return getAdapter().getNextWorkspaceJob(workspaceId, lanes) +} + +export async function removeWorkspaceJobFromLane( + workspaceId: string, + lane: WorkspaceDispatchLane, + jobId: string +): Promise { + return getAdapter().removeWorkspaceJobFromLane(workspaceId, lane, jobId) +} + +export async function cleanupExpiredWorkspaceLeases(workspaceId: string): Promise { + return getAdapter().cleanupExpiredWorkspaceLeases(workspaceId) +} + +export async function countActiveWorkspaceLeases(workspaceId: string): Promise { + return getAdapter().countActiveWorkspaceLeases(workspaceId) +} + +export async function hasWorkspaceLease(workspaceId: string, leaseId: string): Promise { + return getAdapter().hasWorkspaceLease(workspaceId, leaseId) +} + +export async function createWorkspaceLease( + workspaceId: string, + leaseId: string, + ttlMs: number +): Promise { + return getAdapter().createWorkspaceLease(workspaceId, leaseId, ttlMs) +} + +export async function refreshWorkspaceLease( + workspaceId: string, + leaseId: string, + ttlMs: number +): Promise { + return getAdapter().refreshWorkspaceLease(workspaceId, leaseId, ttlMs) +} + +export async function releaseWorkspaceLease(workspaceId: string, leaseId: string): Promise { + return getAdapter().releaseWorkspaceLease(workspaceId, leaseId) +} + +export async function removeWorkspaceIfIdle( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] +): Promise { + return getAdapter().removeWorkspaceIfIdle(workspaceId, lanes) +} + +export async function markDispatchJobAdmitted( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number +): Promise { + return getAdapter().markDispatchJobAdmitted(jobId, workspaceId, leaseId, leaseExpiresAt) +} + +export async function markDispatchJobAdmitting( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number +): Promise { + return getAdapter().markDispatchJobAdmitting(jobId, workspaceId, leaseId, leaseExpiresAt) +} + +export async function markDispatchJobRunning(jobId: string): Promise { + return getAdapter().markDispatchJobRunning(jobId) +} + +export async function markDispatchJobCompleted(jobId: string, output: unknown): Promise { + return getAdapter().markDispatchJobCompleted(jobId, output) +} + +export async function markDispatchJobFailed(jobId: string, error: string): Promise { + return getAdapter().markDispatchJobFailed(jobId, error) +} diff --git a/apps/sim/lib/core/workspace-dispatch/types.ts b/apps/sim/lib/core/workspace-dispatch/types.ts new file mode 100644 index 00000000000..87218956644 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/types.ts @@ -0,0 +1,107 @@ +import type { JobMetadata, JobType } from '@/lib/core/async-jobs/types' +import type { + KNOWLEDGE_CONNECTOR_SYNC_QUEUE, + KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE, + MOTHERSHIP_JOB_EXECUTION_QUEUE, + WORKSPACE_NOTIFICATION_DELIVERY_QUEUE, +} from '@/lib/core/bullmq/queues' + +export const WORKSPACE_DISPATCH_LANES = [ + 'interactive', + 'runtime', + 'knowledge', + 'lightweight', +] as const + +export type WorkspaceDispatchLane = (typeof WORKSPACE_DISPATCH_LANES)[number] + +export type WorkspaceDispatchQueueName = + | JobType + | typeof KNOWLEDGE_CONNECTOR_SYNC_QUEUE + | typeof KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE + | typeof MOTHERSHIP_JOB_EXECUTION_QUEUE + | typeof WORKSPACE_NOTIFICATION_DELIVERY_QUEUE + +export const WORKSPACE_DISPATCH_STATUSES = { + WAITING: 'waiting', + ADMITTING: 'admitting', + ADMITTED: 'admitted', + RUNNING: 'running', + COMPLETED: 'completed', + FAILED: 'failed', +} as const + +export type WorkspaceDispatchStatus = + (typeof WORKSPACE_DISPATCH_STATUSES)[keyof typeof WORKSPACE_DISPATCH_STATUSES] + +export interface WorkspaceDispatchLeaseInfo { + workspaceId: string + leaseId: string +} + +export interface WorkspaceDispatchJobContext { + dispatchJobId: string + workspaceId: string + lane: WorkspaceDispatchLane + queueName: WorkspaceDispatchQueueName + bullmqJobName: string + priority: number +} + +export interface WorkspaceDispatchJobRecord { + id: string + workspaceId: string + lane: WorkspaceDispatchLane + queueName: WorkspaceDispatchQueueName + bullmqJobName: string + bullmqPayload: unknown + metadata: JobMetadata + priority: number + maxAttempts?: number + delayMs?: number + status: WorkspaceDispatchStatus + createdAt: number + admittedAt?: number + startedAt?: number + completedAt?: number + output?: unknown + error?: string + lease?: WorkspaceDispatchLeaseInfo +} + +export interface WorkspaceDispatchEnqueueInput { + id?: string + workspaceId: string + lane: WorkspaceDispatchLane + queueName: WorkspaceDispatchQueueName + bullmqJobName: string + bullmqPayload: unknown + metadata: JobMetadata + priority?: number + maxAttempts?: number + delayMs?: number +} + +export const WORKSPACE_DISPATCH_CLAIM_RESULTS = { + ADMITTED: 'admitted', + LIMIT_REACHED: 'limit_reached', + DELAYED: 'delayed', + EMPTY: 'empty', +} as const + +export type WorkspaceDispatchClaimResult = + | { + type: typeof WORKSPACE_DISPATCH_CLAIM_RESULTS.ADMITTED + record: WorkspaceDispatchJobRecord + leaseId: string + leaseExpiresAt: number + } + | { + type: + | typeof WORKSPACE_DISPATCH_CLAIM_RESULTS.LIMIT_REACHED + | typeof WORKSPACE_DISPATCH_CLAIM_RESULTS.EMPTY + } + | { + type: typeof WORKSPACE_DISPATCH_CLAIM_RESULTS.DELAYED + nextReadyAt: number + } diff --git a/apps/sim/lib/core/workspace-dispatch/worker.test.ts b/apps/sim/lib/core/workspace-dispatch/worker.test.ts new file mode 100644 index 00000000000..1833b128cdd --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/worker.test.ts @@ -0,0 +1,98 @@ +/** + * @vitest-environment node + */ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { + mockMarkDispatchJobCompleted, + mockMarkDispatchJobFailed, + mockMarkDispatchJobRunning, + mockReleaseWorkspaceLease, + mockWakeWorkspaceDispatcher, +} = vi.hoisted(() => ({ + mockMarkDispatchJobCompleted: vi.fn(), + mockMarkDispatchJobFailed: vi.fn(), + mockMarkDispatchJobRunning: vi.fn(), + mockReleaseWorkspaceLease: vi.fn(), + mockWakeWorkspaceDispatcher: vi.fn(), +})) + +vi.mock('@sim/logger', () => ({ + createLogger: () => ({ + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }), +})) + +vi.mock('@/lib/core/workspace-dispatch', () => ({ + markDispatchJobCompleted: mockMarkDispatchJobCompleted, + markDispatchJobFailed: mockMarkDispatchJobFailed, + markDispatchJobRunning: mockMarkDispatchJobRunning, + releaseWorkspaceLease: mockReleaseWorkspaceLease, + wakeWorkspaceDispatcher: mockWakeWorkspaceDispatcher, +})) + +import { getDispatchRuntimeMetadata, runDispatchedJob } from '@/lib/core/workspace-dispatch/worker' + +describe('workspace dispatch worker lifecycle', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('returns null for missing metadata', () => { + expect(getDispatchRuntimeMetadata(undefined)).toBeNull() + }) + + it('extracts dispatch runtime metadata when all fields are present', () => { + expect( + getDispatchRuntimeMetadata({ + dispatchJobId: 'dispatch-1', + dispatchWorkspaceId: 'workspace-1', + dispatchLeaseId: 'lease-1', + }) + ).toEqual({ + dispatchJobId: 'dispatch-1', + dispatchWorkspaceId: 'workspace-1', + dispatchLeaseId: 'lease-1', + }) + }) + + it('marks running, completed, releases lease, and wakes dispatcher on success', async () => { + const result = await runDispatchedJob( + { + dispatchJobId: 'dispatch-1', + dispatchWorkspaceId: 'workspace-1', + dispatchLeaseId: 'lease-1', + }, + async () => ({ success: true }) + ) + + expect(result).toEqual({ success: true }) + expect(mockMarkDispatchJobRunning).toHaveBeenCalledWith('dispatch-1') + expect(mockMarkDispatchJobCompleted).toHaveBeenCalledWith('dispatch-1', { success: true }) + expect(mockReleaseWorkspaceLease).toHaveBeenCalledWith('workspace-1', 'lease-1') + expect(mockWakeWorkspaceDispatcher).toHaveBeenCalled() + }) + + it('marks failed and still releases lease on error', async () => { + await expect( + runDispatchedJob( + { + dispatchJobId: 'dispatch-2', + dispatchWorkspaceId: 'workspace-2', + dispatchLeaseId: 'lease-2', + }, + async () => { + throw new Error('boom') + } + ) + ).rejects.toThrow('boom') + + expect(mockMarkDispatchJobRunning).toHaveBeenCalledWith('dispatch-2') + expect(mockMarkDispatchJobFailed).toHaveBeenCalledWith('dispatch-2', 'boom') + expect(mockReleaseWorkspaceLease).toHaveBeenCalledWith('workspace-2', 'lease-2') + expect(mockWakeWorkspaceDispatcher).toHaveBeenCalled() + }) +}) diff --git a/apps/sim/lib/core/workspace-dispatch/worker.ts b/apps/sim/lib/core/workspace-dispatch/worker.ts new file mode 100644 index 00000000000..ced31a599a3 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/worker.ts @@ -0,0 +1,104 @@ +import { createLogger } from '@sim/logger' +import { + markDispatchJobCompleted, + markDispatchJobFailed, + markDispatchJobRunning, + refreshWorkspaceLease, + releaseWorkspaceLease, + wakeWorkspaceDispatcher, +} from '@/lib/core/workspace-dispatch' + +const logger = createLogger('WorkspaceDispatchWorker') + +interface DispatchRuntimeMetadata { + dispatchJobId: string + dispatchWorkspaceId: string + dispatchLeaseId: string +} + +interface RunDispatchedJobOptions { + isFinalAttempt?: boolean + leaseTtlMs?: number +} + +const DEFAULT_LEASE_TTL_MS = 15 * 60 * 1000 +const LEASE_HEARTBEAT_INTERVAL_MS = 60_000 + +export function getDispatchRuntimeMetadata(metadata: unknown): DispatchRuntimeMetadata | null { + if (!metadata || typeof metadata !== 'object') { + return null + } + + const value = metadata as Partial + if (!value.dispatchJobId || !value.dispatchWorkspaceId || !value.dispatchLeaseId) { + return null + } + + return { + dispatchJobId: value.dispatchJobId, + dispatchWorkspaceId: value.dispatchWorkspaceId, + dispatchLeaseId: value.dispatchLeaseId, + } +} + +export async function runDispatchedJob( + metadata: unknown, + run: () => Promise, + options: RunDispatchedJobOptions = {} +): Promise { + const dispatchMetadata = getDispatchRuntimeMetadata(metadata) + + if (!dispatchMetadata) { + return run() + } + + const leaseTtlMs = options.leaseTtlMs ?? DEFAULT_LEASE_TTL_MS + const isFinalAttempt = options.isFinalAttempt ?? true + + await markDispatchJobRunning(dispatchMetadata.dispatchJobId) + + let heartbeatTimer: NodeJS.Timeout | null = setInterval(() => { + void refreshWorkspaceLease( + dispatchMetadata.dispatchWorkspaceId, + dispatchMetadata.dispatchLeaseId, + leaseTtlMs + ).catch((error) => { + logger.error('Failed to refresh dispatch lease', { error, dispatchMetadata }) + }) + }, LEASE_HEARTBEAT_INTERVAL_MS) + heartbeatTimer.unref() + + let succeeded = false + try { + const result = await run() + succeeded = true + await markDispatchJobCompleted(dispatchMetadata.dispatchJobId, result) + return result + } catch (error) { + if (isFinalAttempt && !succeeded) { + await markDispatchJobFailed( + dispatchMetadata.dispatchJobId, + error instanceof Error ? error.message : String(error) + ) + } + throw error + } finally { + if (heartbeatTimer) { + clearInterval(heartbeatTimer) + heartbeatTimer = null + } + + const shouldReleaseLease = succeeded || isFinalAttempt + if (shouldReleaseLease) { + try { + await releaseWorkspaceLease( + dispatchMetadata.dispatchWorkspaceId, + dispatchMetadata.dispatchLeaseId + ) + await wakeWorkspaceDispatcher() + } catch (error) { + logger.error('Failed to release dispatch lease', { error, dispatchMetadata }) + } + } + } +} diff --git a/apps/sim/lib/execution/buffered-stream.ts b/apps/sim/lib/execution/buffered-stream.ts new file mode 100644 index 00000000000..f1b413b6f96 --- /dev/null +++ b/apps/sim/lib/execution/buffered-stream.ts @@ -0,0 +1,111 @@ +import { createLogger } from '@sim/logger' +import { + type ExecutionStreamStatus, + getExecutionMeta, + readExecutionEvents, +} from '@/lib/execution/event-buffer' +import { formatSSEEvent } from '@/lib/workflows/executor/execution-events' + +const logger = createLogger('BufferedExecutionStream') + +const POLL_INTERVAL_MS = 500 +const MAX_POLL_DURATION_MS = 10 * 60 * 1000 + +function isTerminalStatus(status: ExecutionStreamStatus): boolean { + return status === 'complete' || status === 'error' || status === 'cancelled' +} + +export function createBufferedExecutionStream( + executionId: string, + initialEventId = 0 +): ReadableStream { + const encoder = new TextEncoder() + let closed = false + + return new ReadableStream({ + async start(controller) { + let lastEventId = initialEventId + const pollDeadline = Date.now() + MAX_POLL_DURATION_MS + + const enqueue = (text: string) => { + if (closed) { + return + } + + try { + controller.enqueue(encoder.encode(text)) + } catch { + closed = true + } + } + + try { + const initialEvents = await readExecutionEvents(executionId, lastEventId) + for (const entry of initialEvents) { + if (closed) { + return + } + + enqueue(formatSSEEvent(entry.event)) + lastEventId = entry.eventId + } + + while (!closed && Date.now() < pollDeadline) { + const meta = await getExecutionMeta(executionId) + + if (meta && isTerminalStatus(meta.status)) { + const finalEvents = await readExecutionEvents(executionId, lastEventId) + for (const entry of finalEvents) { + if (closed) { + return + } + + enqueue(formatSSEEvent(entry.event)) + lastEventId = entry.eventId + } + + enqueue('data: [DONE]\n\n') + controller.close() + return + } + + await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS)) + if (closed) { + return + } + + const newEvents = await readExecutionEvents(executionId, lastEventId) + for (const entry of newEvents) { + if (closed) { + return + } + + enqueue(formatSSEEvent(entry.event)) + lastEventId = entry.eventId + } + } + + if (!closed) { + logger.warn('Buffered execution stream deadline reached', { executionId }) + enqueue('data: [DONE]\n\n') + controller.close() + } + } catch (error) { + logger.error('Buffered execution stream failed', { + executionId, + error: error instanceof Error ? error.message : String(error), + }) + + if (!closed) { + try { + controller.close() + } catch {} + } + } + }, + cancel() { + closed = true + logger.info('Client disconnected from buffered execution stream', { executionId }) + }, + }) +} diff --git a/apps/sim/lib/knowledge/connectors/sync-engine.ts b/apps/sim/lib/knowledge/connectors/sync-engine.ts index 3ec619e723a..b388f4066de 100644 --- a/apps/sim/lib/knowledge/connectors/sync-engine.ts +++ b/apps/sim/lib/knowledge/connectors/sync-engine.ts @@ -8,11 +8,13 @@ import { import { createLogger } from '@sim/logger' import { and, eq, inArray, isNull, ne, sql } from 'drizzle-orm' import { decryptApiKey } from '@/lib/api-key/crypto' +import { createBullMQJobData, isBullMQEnabled } from '@/lib/core/bullmq' import { getInternalApiBaseUrl } from '@/lib/core/utils/urls' +import { enqueueWorkspaceDispatch } from '@/lib/core/workspace-dispatch' import { + dispatchDocumentProcessingJob, hardDeleteDocuments, isTriggerAvailable, - processDocumentAsync, } from '@/lib/knowledge/documents/service' import { StorageService } from '@/lib/uploads' import { deleteFile } from '@/lib/uploads/core/storage-service' @@ -131,8 +133,7 @@ export function resolveTagMapping( } /** - * Dispatch a connector sync — uses Trigger.dev when available, - * otherwise falls back to direct executeSync. + * Dispatch a connector sync using the configured background execution backend. */ export async function dispatchSync( connectorId: string, @@ -147,6 +148,38 @@ export async function dispatchSync( requestId, }) logger.info(`Dispatched connector sync to Trigger.dev`, { connectorId, requestId }) + } else if (isBullMQEnabled()) { + const connectorRows = await db + .select({ + workspaceId: knowledgeBase.workspaceId, + userId: knowledgeBase.userId, + }) + .from(knowledgeConnector) + .innerJoin(knowledgeBase, eq(knowledgeBase.id, knowledgeConnector.knowledgeBaseId)) + .where(eq(knowledgeConnector.id, connectorId)) + .limit(1) + + const workspaceId = connectorRows[0]?.workspaceId + const userId = connectorRows[0]?.userId + if (!workspaceId || !userId) { + throw new Error(`No workspace found for connector ${connectorId}`) + } + + await enqueueWorkspaceDispatch({ + workspaceId, + lane: 'knowledge', + queueName: 'knowledge-connector-sync', + bullmqJobName: 'knowledge-connector-sync', + bullmqPayload: createBullMQJobData({ + connectorId, + fullSync: options?.fullSync, + requestId, + }), + metadata: { + userId, + }, + }) + logger.info(`Dispatched connector sync to BullMQ`, { connectorId, requestId }) } else { executeSync(connectorId, { fullSync: options?.fullSync }).catch((error) => { logger.error(`Sync failed for connector ${connectorId}`, { @@ -498,21 +531,17 @@ export async function executeSync( if (stuckDocs.length > 0) { logger.info(`Retrying ${stuckDocs.length} stuck documents`, { connectorId }) for (const doc of stuckDocs) { - processDocumentAsync( - connector.knowledgeBaseId, - doc.id, - { + await dispatchDocumentProcessingJob({ + knowledgeBaseId: connector.knowledgeBaseId, + documentId: doc.id, + docData: { filename: doc.filename ?? 'document.txt', fileUrl: doc.fileUrl ?? '', fileSize: doc.fileSize ?? 0, mimeType: 'text/plain', }, - {} - ).catch((error) => { - logger.warn('Failed to retry stuck document', { - documentId: doc.id, - error: error instanceof Error ? error.message : String(error), - }) + processingOptions: {}, + requestId: `connector-retry-${connectorId}`, }) } } @@ -686,22 +715,17 @@ async function addDocument( throw error } - processDocumentAsync( + await dispatchDocumentProcessingJob({ knowledgeBaseId, documentId, - { + docData: { filename: processingFilename, fileUrl, fileSize: contentBuffer.length, mimeType: 'text/plain', }, - {} - ).catch((error) => { - logger.error('Failed to process connector document', { - documentId, - connectorId, - error: error instanceof Error ? error.message : String(error), - }) + processingOptions: {}, + requestId: `connector-sync-${connectorId}`, }) } @@ -807,21 +831,16 @@ async function updateDocument( } } - processDocumentAsync( + await dispatchDocumentProcessingJob({ knowledgeBaseId, - existingDocId, - { + documentId: existingDocId, + docData: { filename: processingFilename, fileUrl, fileSize: contentBuffer.length, mimeType: 'text/plain', }, - {} - ).catch((error) => { - logger.error('Failed to re-process updated connector document', { - documentId: existingDocId, - connectorId, - error: error instanceof Error ? error.message : String(error), - }) + processingOptions: {}, + requestId: `connector-sync-${connectorId}`, }) } diff --git a/apps/sim/lib/knowledge/documents/document-processor.ts b/apps/sim/lib/knowledge/documents/document-processor.ts index 0185de495b1..0a64dbf547c 100644 --- a/apps/sim/lib/knowledge/documents/document-processor.ts +++ b/apps/sim/lib/knowledge/documents/document-processor.ts @@ -5,6 +5,7 @@ import { type Chunk, JsonYamlChunker, StructuredDataChunker, TextChunker } from import { env } from '@/lib/core/config/env' import { parseBuffer, parseFile } from '@/lib/file-parsers' import type { FileParseMetadata } from '@/lib/file-parsers/types' +import { resolveParserExtension } from '@/lib/knowledge/documents/parser-extension' import { retryWithExponentialBackoff } from '@/lib/knowledge/documents/utils' import { StorageService } from '@/lib/uploads' import { isInternalFileUrl } from '@/lib/uploads/utils/file-utils' @@ -727,7 +728,7 @@ async function parseWithFileParser(fileUrl: string, filename: string, mimeType: if (fileUrl.startsWith('data:')) { content = await parseDataURI(fileUrl, filename, mimeType) } else if (fileUrl.startsWith('http')) { - const result = await parseHttpFile(fileUrl, filename) + const result = await parseHttpFile(fileUrl, filename, mimeType) content = result.content metadata = result.metadata || {} } else { @@ -759,7 +760,7 @@ async function parseDataURI(fileUrl: string, filename: string, mimeType: string) : decodeURIComponent(base64Data) } - const extension = filename.split('.').pop()?.toLowerCase() || 'txt' + const extension = resolveParserExtension(filename, mimeType) const buffer = Buffer.from(base64Data, 'base64') const result = await parseBuffer(buffer, extension) return result.content @@ -767,15 +768,12 @@ async function parseDataURI(fileUrl: string, filename: string, mimeType: string) async function parseHttpFile( fileUrl: string, - filename: string + filename: string, + mimeType: string ): Promise<{ content: string; metadata?: FileParseMetadata }> { const buffer = await downloadFileWithTimeout(fileUrl) - const extension = filename.split('.').pop()?.toLowerCase() - if (!extension) { - throw new Error(`Could not determine file extension: ${filename}`) - } - + const extension = resolveParserExtension(filename, mimeType) const result = await parseBuffer(buffer, extension) return result } diff --git a/apps/sim/lib/knowledge/documents/parser-extension.test.ts b/apps/sim/lib/knowledge/documents/parser-extension.test.ts new file mode 100644 index 00000000000..4d65abdfef5 --- /dev/null +++ b/apps/sim/lib/knowledge/documents/parser-extension.test.ts @@ -0,0 +1,27 @@ +/** + * @vitest-environment node + */ +import { describe, expect, it } from 'vitest' +import { resolveParserExtension } from '@/lib/knowledge/documents/parser-extension' + +describe('resolveParserExtension', () => { + it('uses a supported filename extension when present', () => { + expect(resolveParserExtension('report.pdf', 'application/pdf')).toBe('pdf') + }) + + it('falls back to mime type when filename has no extension', () => { + expect( + resolveParserExtension('[Business] Your Thursday morning trip with Uber', 'text/plain') + ).toBe('txt') + }) + + it('falls back to mime type when filename extension is unsupported', () => { + expect(resolveParserExtension('uber-message.business', 'text/plain')).toBe('txt') + }) + + it('throws when neither filename nor mime type resolves to a supported parser', () => { + expect(() => + resolveParserExtension('uber-message.unknown', 'application/octet-stream') + ).toThrow('Unsupported file type') + }) +}) diff --git a/apps/sim/lib/knowledge/documents/parser-extension.ts b/apps/sim/lib/knowledge/documents/parser-extension.ts new file mode 100644 index 00000000000..7260ee318c0 --- /dev/null +++ b/apps/sim/lib/knowledge/documents/parser-extension.ts @@ -0,0 +1,48 @@ +import { getExtensionFromMimeType } from '@/lib/uploads/utils/file-utils' + +const SUPPORTED_FILE_TYPES = [ + 'pdf', + 'csv', + 'docx', + 'doc', + 'txt', + 'md', + 'xlsx', + 'xls', + 'pptx', + 'ppt', + 'html', + 'htm', + 'json', + 'yaml', + 'yml', +] as const + +const SUPPORTED_FILE_TYPES_TEXT = SUPPORTED_FILE_TYPES.join(', ') + +function isSupportedParserExtension(extension: string): boolean { + return SUPPORTED_FILE_TYPES.includes(extension as (typeof SUPPORTED_FILE_TYPES)[number]) +} + +export function resolveParserExtension(filename: string, mimeType: string): string { + const filenameExtension = filename.includes('.') + ? filename.split('.').pop()?.toLowerCase() + : undefined + + if (filenameExtension && isSupportedParserExtension(filenameExtension)) { + return filenameExtension + } + + const mimeExtension = getExtensionFromMimeType(mimeType) + if (mimeExtension && isSupportedParserExtension(mimeExtension)) { + return mimeExtension + } + + if (filenameExtension) { + throw new Error( + `Unsupported file type: ${filenameExtension}. Supported types are: ${SUPPORTED_FILE_TYPES_TEXT}` + ) + } + + throw new Error(`Could not determine file type for ${filename || 'document'}`) +} diff --git a/apps/sim/lib/knowledge/documents/queue.ts b/apps/sim/lib/knowledge/documents/queue.ts deleted file mode 100644 index 31dd0879c70..00000000000 --- a/apps/sim/lib/knowledge/documents/queue.ts +++ /dev/null @@ -1,227 +0,0 @@ -import { createLogger } from '@sim/logger' -import { getRedisClient } from '@/lib/core/config/redis' -import { getStorageMethod, type StorageMethod } from '@/lib/core/storage' - -const logger = createLogger('DocumentQueue') - -interface QueueJob { - id: string - type: string - data: T - timestamp: number - attempts: number - maxAttempts: number -} - -interface QueueConfig { - maxConcurrent: number - retryDelay: number - maxRetries: number -} - -/** - * Document processing queue that uses either Redis or in-memory storage. - * Storage method is determined once at construction based on configuration. - * No switching on transient errors. - */ -export class DocumentProcessingQueue { - private config: QueueConfig - private storageMethod: StorageMethod - private processing = new Map>() - private inMemoryQueue: QueueJob[] = [] - private inMemoryProcessing = 0 - private processingStarted = false - - constructor(config: QueueConfig) { - this.config = config - this.storageMethod = getStorageMethod() - logger.info(`DocumentProcessingQueue using ${this.storageMethod} storage`) - } - - async addJob(type: string, data: T, options: { maxAttempts?: number } = {}): Promise { - const job: QueueJob = { - id: `${type}-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`, - type, - data, - timestamp: Date.now(), - attempts: 0, - maxAttempts: options.maxAttempts || this.config.maxRetries, - } - - if (this.storageMethod === 'redis') { - const redis = getRedisClient() - if (!redis) { - throw new Error('Redis configured but client unavailable') - } - await redis.lpush('document-queue', JSON.stringify(job)) - logger.info(`Job ${job.id} added to Redis queue`) - } else { - this.inMemoryQueue.push(job) - logger.info(`Job ${job.id} added to in-memory queue`) - } - - return job.id - } - - async processJobs(processor: (job: QueueJob) => Promise): Promise { - if (this.processingStarted) { - logger.info('Queue processing already started, skipping') - return - } - - this.processingStarted = true - logger.info(`Starting queue processing (${this.storageMethod})`) - - if (this.storageMethod === 'redis') { - await this.processRedisJobs(processor) - } else { - await this.processInMemoryJobs(processor) - } - } - - private async processRedisJobs(processor: (job: QueueJob) => Promise) { - const redis = getRedisClient() - if (!redis) { - throw new Error('Redis configured but client unavailable') - } - - const processJobsContinuously = async () => { - while (true) { - if (this.processing.size >= this.config.maxConcurrent) { - await new Promise((resolve) => setTimeout(resolve, 100)) - continue - } - - try { - const result = await redis.rpop('document-queue') - if (!result) { - await new Promise((resolve) => setTimeout(resolve, 500)) - continue - } - - const job: QueueJob = JSON.parse(result) - const promise = this.executeJob(job, processor) - this.processing.set(job.id, promise) - - promise.finally(() => { - this.processing.delete(job.id) - }) - } catch (error: any) { - logger.error('Error processing Redis job:', error) - await new Promise((resolve) => setTimeout(resolve, 1000)) - } - } - } - - const processors = Array(this.config.maxConcurrent) - .fill(null) - .map(() => processJobsContinuously()) - - Promise.allSettled(processors).catch((error) => { - logger.error('Error in Redis queue processors:', error) - }) - } - - private async processInMemoryJobs(processor: (job: QueueJob) => Promise) { - const processInMemoryContinuously = async () => { - while (true) { - if (this.inMemoryProcessing >= this.config.maxConcurrent) { - await new Promise((resolve) => setTimeout(resolve, 100)) - continue - } - - const job = this.inMemoryQueue.shift() - if (!job) { - await new Promise((resolve) => setTimeout(resolve, 500)) - continue - } - - this.inMemoryProcessing++ - - this.executeJob(job, processor).finally(() => { - this.inMemoryProcessing-- - }) - } - } - - const processors = Array(this.config.maxConcurrent) - .fill(null) - .map(() => processInMemoryContinuously()) - - Promise.allSettled(processors).catch((error) => { - logger.error('Error in in-memory queue processors:', error) - }) - } - - private async executeJob( - job: QueueJob, - processor: (job: QueueJob) => Promise - ): Promise { - try { - job.attempts++ - logger.info(`Processing job ${job.id} (attempt ${job.attempts}/${job.maxAttempts})`) - - await processor(job) - logger.info(`Job ${job.id} completed successfully`) - } catch (error) { - logger.error(`Job ${job.id} failed (attempt ${job.attempts}):`, error) - - if (job.attempts < job.maxAttempts) { - const delay = this.config.retryDelay * 2 ** (job.attempts - 1) - - setTimeout(async () => { - if (this.storageMethod === 'redis') { - const redis = getRedisClient() - if (!redis) { - logger.error('Redis unavailable for retry, job lost:', job.id) - return - } - await redis.lpush('document-queue', JSON.stringify(job)) - } else { - this.inMemoryQueue.push(job) - } - }, delay) - - logger.info(`Job ${job.id} will retry in ${delay}ms`) - } else { - logger.error(`Job ${job.id} failed permanently after ${job.attempts} attempts`) - } - } - } - - async getQueueStats(): Promise<{ - pending: number - processing: number - storageMethod: StorageMethod - }> { - let pending = 0 - - if (this.storageMethod === 'redis') { - const redis = getRedisClient() - if (redis) { - pending = await redis.llen('document-queue') - } - } else { - pending = this.inMemoryQueue.length - } - - return { - pending, - processing: this.storageMethod === 'redis' ? this.processing.size : this.inMemoryProcessing, - storageMethod: this.storageMethod, - } - } - - async clearQueue(): Promise { - if (this.storageMethod === 'redis') { - const redis = getRedisClient() - if (redis) { - await redis.del('document-queue') - logger.info('Redis queue cleared') - } - } - - this.inMemoryQueue.length = 0 - logger.info('In-memory queue cleared') - } -} diff --git a/apps/sim/lib/knowledge/documents/service.ts b/apps/sim/lib/knowledge/documents/service.ts index 2d607c41da5..ccb00f5633c 100644 --- a/apps/sim/lib/knowledge/documents/service.ts +++ b/apps/sim/lib/knowledge/documents/service.ts @@ -25,10 +25,11 @@ import { type SQL, sql, } from 'drizzle-orm' +import { createBullMQJobData, isBullMQEnabled } from '@/lib/core/bullmq' import { env } from '@/lib/core/config/env' -import { getStorageMethod, isRedisStorage } from '@/lib/core/storage' +import { isTriggerDevEnabled } from '@/lib/core/config/feature-flags' +import { enqueueWorkspaceDispatch } from '@/lib/core/workspace-dispatch' import { processDocument } from '@/lib/knowledge/documents/document-processor' -import { DocumentProcessingQueue } from '@/lib/knowledge/documents/queue' import type { DocumentSortField, SortOrder } from '@/lib/knowledge/documents/types' import { generateEmbeddings } from '@/lib/knowledge/embeddings' import { @@ -88,22 +89,8 @@ const REDIS_PROCESSING_CONFIG = { delayBetweenDocuments: env.KB_CONFIG_DELAY_BETWEEN_DOCUMENTS || 50, } -let documentQueue: DocumentProcessingQueue | null = null - -export function getDocumentQueue(): DocumentProcessingQueue { - if (!documentQueue) { - const config = isRedisStorage() ? REDIS_PROCESSING_CONFIG : PROCESSING_CONFIG - documentQueue = new DocumentProcessingQueue({ - maxConcurrent: config.maxConcurrentDocuments, - retryDelay: env.KB_CONFIG_MIN_TIMEOUT || 1000, - maxRetries: env.KB_CONFIG_MAX_ATTEMPTS || 3, - }) - } - return documentQueue -} - export function getProcessingConfig() { - return isRedisStorage() ? REDIS_PROCESSING_CONFIG : PROCESSING_CONFIG + return isBullMQEnabled() ? REDIS_PROCESSING_CONFIG : PROCESSING_CONFIG } export interface DocumentData { @@ -115,11 +102,11 @@ export interface DocumentData { } export interface ProcessingOptions { - chunkSize: number - minCharactersPerChunk: number - recipe: string - lang: string - chunkOverlap: number + chunkSize?: number + minCharactersPerChunk?: number + recipe?: string + lang?: string + chunkOverlap?: number } export interface DocumentJobData { @@ -135,6 +122,54 @@ export interface DocumentJobData { requestId: string } +export async function dispatchDocumentProcessingJob(payload: DocumentJobData): Promise { + if (isTriggerAvailable()) { + await tasks.trigger('knowledge-process-document', payload) + return + } + + if (isBullMQEnabled()) { + const workspaceRows = await db + .select({ + workspaceId: knowledgeBase.workspaceId, + userId: knowledgeBase.userId, + }) + .from(knowledgeBase) + .where(and(eq(knowledgeBase.id, payload.knowledgeBaseId), isNull(knowledgeBase.deletedAt))) + .limit(1) + + const workspaceId = workspaceRows[0]?.workspaceId + const userId = workspaceRows[0]?.userId + if (!workspaceId || !userId) { + throw new Error(`Knowledge base not found: ${payload.knowledgeBaseId}`) + } + + await enqueueWorkspaceDispatch({ + workspaceId, + lane: 'knowledge', + queueName: 'knowledge-process-document', + bullmqJobName: 'knowledge-process-document', + bullmqPayload: createBullMQJobData(payload), + metadata: { + userId, + }, + }) + return + } + + void processDocumentAsync( + payload.knowledgeBaseId, + payload.documentId, + payload.docData, + payload.processingOptions + ).catch((error) => { + logger.error(`[${payload.requestId}] Direct document processing failed`, { + documentId: payload.documentId, + error: error instanceof Error ? error.message : String(error), + }) + }) +} + export interface DocumentTagData { tagName: string fieldType: string @@ -323,7 +358,7 @@ export async function processDocumentTags( } /** - * Process documents with best available method: Trigger.dev > Redis queue > in-memory concurrency control + * Process documents with the configured background execution backend. */ export async function processDocumentsWithQueue( createdDocuments: DocumentData[], @@ -331,76 +366,29 @@ export async function processDocumentsWithQueue( processingOptions: ProcessingOptions, requestId: string ): Promise { - // Priority 1: Trigger.dev - if (isTriggerAvailable()) { - try { - logger.info( - `[${requestId}] Using Trigger.dev background processing for ${createdDocuments.length} documents` - ) - - const triggerPayloads = createdDocuments.map((doc) => ({ - knowledgeBaseId, - documentId: doc.documentId, - docData: { - filename: doc.filename, - fileUrl: doc.fileUrl, - fileSize: doc.fileSize, - mimeType: doc.mimeType, - }, - processingOptions, - requestId, - })) - - const result = await processDocumentsWithTrigger(triggerPayloads, requestId) - - if (result.success) { - logger.info( - `[${requestId}] Successfully triggered background processing: ${result.message}` - ) - return - } - logger.warn(`[${requestId}] Trigger.dev failed: ${result.message}, falling back to Redis`) - } catch (error) { - logger.warn(`[${requestId}] Trigger.dev processing failed, falling back to Redis:`, error) - } - } - - // Priority 2: Queue-based processing (Redis or in-memory based on storage method) - const queue = getDocumentQueue() - const storageMethod = getStorageMethod() + const jobPayloads = createdDocuments.map((doc) => ({ + knowledgeBaseId, + documentId: doc.documentId, + docData: { + filename: doc.filename, + fileUrl: doc.fileUrl, + fileSize: doc.fileSize, + mimeType: doc.mimeType, + }, + processingOptions, + requestId, + })) logger.info( - `[${requestId}] Using ${storageMethod} queue for ${createdDocuments.length} documents` - ) - - const jobPromises = createdDocuments.map((doc) => - queue.addJob('process-document', { - knowledgeBaseId, - documentId: doc.documentId, - docData: { - filename: doc.filename, - fileUrl: doc.fileUrl, - fileSize: doc.fileSize, - mimeType: doc.mimeType, - }, - processingOptions, - requestId, - }) + `[${requestId}] Dispatching background processing for ${jobPayloads.length} documents`, + { + backend: isTriggerAvailable() ? 'trigger-dev' : isBullMQEnabled() ? 'bullmq' : 'direct', + } ) - await Promise.all(jobPromises) + await Promise.all(jobPayloads.map((payload) => dispatchDocumentProcessingJob(payload))) - queue - .processJobs(async (job) => { - const data = job.data as DocumentJobData - const { knowledgeBaseId, documentId, docData, processingOptions } = data - await processDocumentAsync(knowledgeBaseId, documentId, docData, processingOptions) - }) - .catch((error) => { - logger.error(`[${requestId}] Error in queue processing:`, error) - }) - - logger.info(`[${requestId}] All documents queued for processing`) + logger.info(`[${requestId}] All documents dispatched for processing`) return } @@ -660,7 +648,7 @@ export async function processDocumentAsync( * Check if Trigger.dev is available and configured */ export function isTriggerAvailable(): boolean { - return !!(env.TRIGGER_SECRET_KEY && env.TRIGGER_DEV_ENABLED !== false) + return Boolean(env.TRIGGER_SECRET_KEY) && isTriggerDevEnabled } /** @@ -1591,11 +1579,13 @@ export async function retryDocumentProcessing( chunkOverlap: kbConfig.overlap, } - processDocumentAsync(knowledgeBaseId, documentId, docData, processingOptions).catch( - (error: unknown) => { - logger.error(`[${requestId}] Background retry processing error:`, error) - } - ) + await dispatchDocumentProcessingJob({ + knowledgeBaseId, + documentId, + docData, + processingOptions, + requestId, + }) logger.info(`[${requestId}] Document retry initiated: ${documentId}`) diff --git a/apps/sim/lib/logs/events.ts b/apps/sim/lib/logs/events.ts index bbf17b2320e..54392e2ebbf 100644 --- a/apps/sim/lib/logs/events.ts +++ b/apps/sim/lib/logs/events.ts @@ -12,6 +12,7 @@ import { } from '@/lib/notifications/alert-rules' import { getActiveWorkflowContext } from '@/lib/workflows/active-context' import { + enqueueNotificationDeliveryDispatch, executeNotificationDelivery, workspaceNotificationDeliveryTask, } from '@/background/workspace-notification-delivery' @@ -131,6 +132,7 @@ export async function emitWorkflowExecutionCompleted(log: WorkflowExecutionLog): const payload = { deliveryId, subscriptionId: subscription.id, + workspaceId, notificationType: subscription.notificationType, log: notificationLog, alertConfig: alertConfig || undefined, @@ -141,6 +143,10 @@ export async function emitWorkflowExecutionCompleted(log: WorkflowExecutionLog): logger.info( `Enqueued ${subscription.notificationType} notification ${deliveryId} via Trigger.dev` ) + } else if (await enqueueNotificationDeliveryDispatch(payload)) { + logger.info( + `Enqueued ${subscription.notificationType} notification ${deliveryId} via BullMQ` + ) } else { void executeNotificationDelivery(payload).catch((error) => { logger.error(`Direct notification delivery failed for ${deliveryId}`, { error }) diff --git a/apps/sim/lib/notifications/inactivity-polling.ts b/apps/sim/lib/notifications/inactivity-polling.ts index 3a4505346d8..81aa0692dba 100644 --- a/apps/sim/lib/notifications/inactivity-polling.ts +++ b/apps/sim/lib/notifications/inactivity-polling.ts @@ -12,6 +12,7 @@ import { v4 as uuidv4 } from 'uuid' import { isTriggerDevEnabled } from '@/lib/core/config/feature-flags' import { TRIGGER_TYPES } from '@/lib/workflows/triggers/triggers' import { + enqueueNotificationDeliveryDispatch, executeNotificationDelivery, workspaceNotificationDeliveryTask, } from '@/background/workspace-notification-delivery' @@ -181,6 +182,7 @@ async function checkWorkflowInactivity( const payload = { deliveryId, subscriptionId: subscription.id, + workspaceId: workflowData.workspaceId, notificationType: subscription.notificationType, log: mockLog, alertConfig, @@ -188,6 +190,7 @@ async function checkWorkflowInactivity( if (isTriggerDevEnabled) { await workspaceNotificationDeliveryTask.trigger(payload) + } else if (await enqueueNotificationDeliveryDispatch(payload)) { } else { void executeNotificationDelivery(payload).catch((error) => { logger.error(`Direct notification delivery failed for ${deliveryId}`, { error }) diff --git a/apps/sim/lib/uploads/utils/user-file-base64.server.ts b/apps/sim/lib/uploads/utils/user-file-base64.server.ts index f3abdf5acde..3aa2f219eb1 100644 --- a/apps/sim/lib/uploads/utils/user-file-base64.server.ts +++ b/apps/sim/lib/uploads/utils/user-file-base64.server.ts @@ -244,13 +244,13 @@ async function hydrateValue( * Hydrates UserFile objects within a value to include base64 content. * Returns the original structure with UserFile.base64 set where available. */ -export async function hydrateUserFilesWithBase64( - value: unknown, +export async function hydrateUserFilesWithBase64( + value: T, options: Base64HydrationOptions -): Promise { +): Promise { const logger = getHydrationLogger(options) const state = createHydrationState(options, logger) - return hydrateValue(value, options, state, logger) + return (await hydrateValue(value, options, state, logger)) as T } function isPlainObject(value: unknown): value is Record { diff --git a/apps/sim/lib/webhooks/processor.test.ts b/apps/sim/lib/webhooks/processor.test.ts index 20ae4408cd8..86876fda02b 100644 --- a/apps/sim/lib/webhooks/processor.test.ts +++ b/apps/sim/lib/webhooks/processor.test.ts @@ -9,12 +9,14 @@ const { mockUuidV4, mockPreprocessExecution, mockEnqueue, + mockEnqueueWorkspaceDispatch, mockGetJobQueue, mockShouldExecuteInline, } = vi.hoisted(() => ({ mockUuidV4: vi.fn(), mockPreprocessExecution: vi.fn(), mockEnqueue: vi.fn(), + mockEnqueueWorkspaceDispatch: vi.fn(), mockGetJobQueue: vi.fn(), mockShouldExecuteInline: vi.fn(), })) @@ -62,6 +64,15 @@ vi.mock('@/lib/core/async-jobs', () => ({ shouldExecuteInline: mockShouldExecuteInline, })) +vi.mock('@/lib/core/bullmq', () => ({ + isBullMQEnabled: vi.fn().mockReturnValue(true), + createBullMQJobData: vi.fn((payload: unknown, metadata?: unknown) => ({ payload, metadata })), +})) + +vi.mock('@/lib/core/workspace-dispatch', () => ({ + enqueueWorkspaceDispatch: mockEnqueueWorkspaceDispatch, +})) + vi.mock('@/lib/core/config/feature-flags', () => ({ isProd: false, })) @@ -142,6 +153,7 @@ describe('webhook processor execution identity', () => { actorUserId: 'actor-user-1', }) mockEnqueue.mockResolvedValue('job-1') + mockEnqueueWorkspaceDispatch.mockResolvedValue('job-1') mockGetJobQueue.mockResolvedValue({ enqueue: mockEnqueue }) mockShouldExecuteInline.mockReturnValue(false) mockUuidV4.mockReturnValue('generated-execution-id') @@ -202,15 +214,15 @@ describe('webhook processor execution identity', () => { ) expect(mockUuidV4).toHaveBeenCalledTimes(1) - expect(mockEnqueue).toHaveBeenCalledWith( - 'webhook-execution', - expect.objectContaining({ - executionId: 'generated-execution-id', - requestId: 'request-1', - correlation: preprocessingResult.correlation, - }), + expect(mockEnqueueWorkspaceDispatch).toHaveBeenCalledWith( expect.objectContaining({ + id: 'generated-execution-id', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'webhook-execution', metadata: expect.objectContaining({ + workflowId: 'workflow-1', + userId: 'actor-user-1', correlation: preprocessingResult.correlation, }), }) diff --git a/apps/sim/lib/webhooks/processor.ts b/apps/sim/lib/webhooks/processor.ts index 48604026691..5b537944337 100644 --- a/apps/sim/lib/webhooks/processor.ts +++ b/apps/sim/lib/webhooks/processor.ts @@ -7,8 +7,10 @@ import { v4 as uuidv4 } from 'uuid' import { checkEnterprisePlan, checkTeamPlan } from '@/lib/billing/subscriptions/utils' import { getInlineJobQueue, getJobQueue, shouldExecuteInline } from '@/lib/core/async-jobs' import type { AsyncExecutionCorrelation } from '@/lib/core/async-jobs/types' +import { createBullMQJobData, isBullMQEnabled } from '@/lib/core/bullmq' import { isProd } from '@/lib/core/config/feature-flags' import { safeCompare } from '@/lib/core/security/encryption' +import { enqueueWorkspaceDispatch } from '@/lib/core/workspace-dispatch' import { getEffectiveDecryptedEnv } from '@/lib/environment/utils' import { preprocessExecution } from '@/lib/execution/preprocessing' import { @@ -1277,53 +1279,91 @@ export async function queueWebhookExecution( const isPolling = isPollingWebhookProvider(payload.provider) if (isPolling && !shouldExecuteInline()) { - const jobQueue = await getJobQueue() - const jobId = await jobQueue.enqueue('webhook-execution', payload, { - metadata: { - workflowId: foundWorkflow.id, - userId: actorUserId, - correlation, - }, - }) + const jobId = isBullMQEnabled() + ? await enqueueWorkspaceDispatch({ + id: executionId, + workspaceId: foundWorkflow.workspaceId, + lane: 'runtime', + queueName: 'webhook-execution', + bullmqJobName: 'webhook-execution', + bullmqPayload: createBullMQJobData(payload, { + workflowId: foundWorkflow.id, + userId: actorUserId, + correlation, + }), + metadata: { + workflowId: foundWorkflow.id, + userId: actorUserId, + correlation, + }, + }) + : await (await getJobQueue()).enqueue('webhook-execution', payload, { + metadata: { + workflowId: foundWorkflow.id, + userId: actorUserId, + correlation, + }, + }) logger.info( `[${options.requestId}] Queued polling webhook execution task ${jobId} for ${foundWebhook.provider} webhook via job queue` ) } else { const jobQueue = await getInlineJobQueue() - const jobId = await jobQueue.enqueue('webhook-execution', payload, { - metadata: { - workflowId: foundWorkflow.id, - userId: actorUserId, - correlation, - }, - }) + const jobId = isBullMQEnabled() + ? await enqueueWorkspaceDispatch({ + id: executionId, + workspaceId: foundWorkflow.workspaceId, + lane: 'runtime', + queueName: 'webhook-execution', + bullmqJobName: 'webhook-execution', + bullmqPayload: createBullMQJobData(payload, { + workflowId: foundWorkflow.id, + userId: actorUserId, + correlation, + }), + metadata: { + workflowId: foundWorkflow.id, + userId: actorUserId, + correlation, + }, + }) + : await jobQueue.enqueue('webhook-execution', payload, { + metadata: { + workflowId: foundWorkflow.id, + userId: actorUserId, + correlation, + }, + }) logger.info( - `[${options.requestId}] Executing ${foundWebhook.provider} webhook ${jobId} inline` + `[${options.requestId}] Queued ${foundWebhook.provider} webhook execution ${jobId} via inline backend` ) - void (async () => { - try { - await jobQueue.startJob(jobId) - const output = await executeWebhookJob(payload) - await jobQueue.completeJob(jobId, output) - } catch (error) { - const errorMessage = error instanceof Error ? error.message : String(error) - logger.error(`[${options.requestId}] Webhook execution failed`, { - jobId, - error: errorMessage, - }) + + if (shouldExecuteInline()) { + void (async () => { try { - await jobQueue.markJobFailed(jobId, errorMessage) - } catch (markFailedError) { - logger.error(`[${options.requestId}] Failed to mark job as failed`, { + await jobQueue.startJob(jobId) + const output = await executeWebhookJob(payload) + await jobQueue.completeJob(jobId, output) + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + logger.error(`[${options.requestId}] Webhook execution failed`, { jobId, - error: - markFailedError instanceof Error - ? markFailedError.message - : String(markFailedError), + error: errorMessage, }) + try { + await jobQueue.markJobFailed(jobId, errorMessage) + } catch (markFailedError) { + logger.error(`[${options.requestId}] Failed to mark job as failed`, { + jobId, + error: + markFailedError instanceof Error + ? markFailedError.message + : String(markFailedError), + }) + } } - } - })() + })() + } } if (foundWebhook.provider === 'microsoft-teams') { diff --git a/apps/sim/lib/workflows/executor/execution-events.ts b/apps/sim/lib/workflows/executor/execution-events.ts index 2a2c06d4016..5872c1db5cc 100644 --- a/apps/sim/lib/workflows/executor/execution-events.ts +++ b/apps/sim/lib/workflows/executor/execution-events.ts @@ -241,18 +241,17 @@ export interface SSECallbackOptions { } /** - * Creates SSE callbacks for workflow execution streaming + * Creates execution callbacks using a provided event sink. */ -export function createSSECallbacks(options: SSECallbackOptions) { - const { executionId, workflowId, controller, isStreamClosed, setStreamClosed } = options +export function createExecutionCallbacks(options: { + executionId: string + workflowId: string + sendEvent: (event: ExecutionEvent) => void | Promise +}) { + const { executionId, workflowId, sendEvent } = options - const sendEvent = (event: ExecutionEvent) => { - if (isStreamClosed()) return - try { - controller.enqueue(encodeSSEEvent(event)) - } catch { - setStreamClosed() - } + const sendBufferedEvent = async (event: ExecutionEvent) => { + await sendEvent(event) } const onBlockStart = async ( @@ -263,7 +262,7 @@ export function createSSECallbacks(options: SSECallbackOptions) { iterationContext?: IterationContext, childWorkflowContext?: ChildWorkflowContext ) => { - sendEvent({ + await sendBufferedEvent({ type: 'block:started', timestamp: new Date().toISOString(), executionId, @@ -330,7 +329,7 @@ export function createSSECallbacks(options: SSECallbackOptions) { : {} if (hasError) { - sendEvent({ + await sendBufferedEvent({ type: 'block:error', timestamp: new Date().toISOString(), executionId, @@ -351,7 +350,7 @@ export function createSSECallbacks(options: SSECallbackOptions) { }, }) } else { - sendEvent({ + await sendBufferedEvent({ type: 'block:completed', timestamp: new Date().toISOString(), executionId, @@ -385,7 +384,7 @@ export function createSSECallbacks(options: SSECallbackOptions) { const { done, value } = await reader.read() if (done) break const chunk = decoder.decode(value, { stream: true }) - sendEvent({ + await sendBufferedEvent({ type: 'stream:chunk', timestamp: new Date().toISOString(), executionId, @@ -393,7 +392,7 @@ export function createSSECallbacks(options: SSECallbackOptions) { data: { blockId, chunk }, }) } - sendEvent({ + await sendBufferedEvent({ type: 'stream:done', timestamp: new Date().toISOString(), executionId, @@ -413,7 +412,7 @@ export function createSSECallbacks(options: SSECallbackOptions) { iterationContext?: IterationContext, executionOrder?: number ) => { - sendEvent({ + void sendBufferedEvent({ type: 'block:childWorkflowStarted', timestamp: new Date().toISOString(), executionId, @@ -430,5 +429,33 @@ export function createSSECallbacks(options: SSECallbackOptions) { }) } - return { sendEvent, onBlockStart, onBlockComplete, onStream, onChildWorkflowInstanceReady } + return { + sendEvent: sendBufferedEvent, + onBlockStart, + onBlockComplete, + onStream, + onChildWorkflowInstanceReady, + } +} + +/** + * Creates SSE callbacks for workflow execution streaming + */ +export function createSSECallbacks(options: SSECallbackOptions) { + const { executionId, workflowId, controller, isStreamClosed, setStreamClosed } = options + + const sendEvent = (event: ExecutionEvent) => { + if (isStreamClosed()) return + try { + controller.enqueue(encodeSSEEvent(event)) + } catch { + setStreamClosed() + } + } + + return createExecutionCallbacks({ + executionId, + workflowId, + sendEvent, + }) } diff --git a/apps/sim/lib/workflows/executor/queued-workflow-execution.ts b/apps/sim/lib/workflows/executor/queued-workflow-execution.ts new file mode 100644 index 00000000000..c60ba860a11 --- /dev/null +++ b/apps/sim/lib/workflows/executor/queued-workflow-execution.ts @@ -0,0 +1,339 @@ +import { createLogger } from '@sim/logger' +import { createTimeoutAbortController, getTimeoutErrorMessage } from '@/lib/core/execution-limits' +import { createExecutionEventWriter, setExecutionMeta } from '@/lib/execution/event-buffer' +import { LoggingSession } from '@/lib/logs/execution/logging-session' +import { buildTraceSpans } from '@/lib/logs/execution/trace-spans/trace-spans' +import { + cleanupExecutionBase64Cache, + hydrateUserFilesWithBase64, +} from '@/lib/uploads/utils/user-file-base64.server' +import { + executeWorkflowCore, + wasExecutionFinalizedByCore, +} from '@/lib/workflows/executor/execution-core' +import { + createExecutionCallbacks, + type ExecutionEvent, +} from '@/lib/workflows/executor/execution-events' +import { PauseResumeManager } from '@/lib/workflows/executor/human-in-the-loop-manager' +import { ExecutionSnapshot } from '@/executor/execution/snapshot' +import type { ExecutionMetadata, SerializableExecutionState } from '@/executor/execution/types' +import type { BlockLog, NormalizedBlockOutput } from '@/executor/types' +import { hasExecutionResult } from '@/executor/utils/errors' + +const logger = createLogger('QueuedWorkflowExecution') + +export const DIRECT_WORKFLOW_JOB_NAME = 'direct-workflow-execution' + +export interface QueuedWorkflowExecutionPayload { + workflow: Record + metadata: ExecutionMetadata + input: unknown + variables: Record + selectedOutputs?: string[] + includeFileBase64?: boolean + base64MaxBytes?: number + stopAfterBlockId?: string + timeoutMs?: number + runFromBlock?: { + startBlockId: string + sourceSnapshot: SerializableExecutionState + } + streamEvents?: boolean +} + +export interface QueuedWorkflowExecutionResult { + success: boolean + executionId: string + output: NormalizedBlockOutput + error?: string + logs?: BlockLog[] + status: 'success' | 'cancelled' | 'paused' | 'failed' + statusCode?: number + metadata?: { + duration?: number + startTime?: string + endTime?: string + } +} + +function buildResult( + status: QueuedWorkflowExecutionResult['status'], + result: { + success: boolean + output: NormalizedBlockOutput + error?: string + logs?: BlockLog[] + metadata?: { + duration?: number + startTime?: string + endTime?: string + } + }, + executionId: string, + statusCode?: number +): QueuedWorkflowExecutionResult { + return { + success: result.success, + executionId, + output: result.output, + error: result.error, + logs: result.logs, + status, + statusCode, + metadata: result.metadata, + } +} + +export async function executeQueuedWorkflowJob( + payload: QueuedWorkflowExecutionPayload +): Promise { + const { metadata } = payload + const { executionId, requestId, workflowId, triggerType } = metadata + const loggingSession = new LoggingSession(workflowId, executionId, triggerType, requestId) + const timeoutController = createTimeoutAbortController(payload.timeoutMs) + const eventWriter = payload.streamEvents ? createExecutionEventWriter(executionId) : null + + if (payload.streamEvents) { + await setExecutionMeta(executionId, { + status: 'active', + userId: metadata.userId, + workflowId, + }) + } + + try { + const snapshot = new ExecutionSnapshot( + metadata, + payload.workflow, + payload.input, + payload.variables, + payload.selectedOutputs ?? [] + ) + + let callbacks = {} + + if (eventWriter) { + const executionCallbacks = createExecutionCallbacks({ + executionId, + workflowId, + sendEvent: async (event: ExecutionEvent) => { + await eventWriter.write(event) + }, + }) + + callbacks = { + onBlockStart: executionCallbacks.onBlockStart, + onBlockComplete: executionCallbacks.onBlockComplete, + onStream: executionCallbacks.onStream, + onChildWorkflowInstanceReady: executionCallbacks.onChildWorkflowInstanceReady, + } + + await executionCallbacks.sendEvent({ + type: 'execution:started', + timestamp: new Date().toISOString(), + executionId, + workflowId, + data: { + startTime: metadata.startTime, + }, + }) + } + + const result = await executeWorkflowCore({ + snapshot, + callbacks, + loggingSession, + includeFileBase64: payload.includeFileBase64, + base64MaxBytes: payload.base64MaxBytes, + stopAfterBlockId: payload.stopAfterBlockId, + runFromBlock: payload.runFromBlock, + abortSignal: timeoutController.signal, + }) + + if ( + result.status === 'cancelled' && + timeoutController.isTimedOut() && + timeoutController.timeoutMs + ) { + const timeoutErrorMessage = getTimeoutErrorMessage(null, timeoutController.timeoutMs) + await loggingSession.markAsFailed(timeoutErrorMessage) + + if (eventWriter) { + await eventWriter.write({ + type: 'execution:error', + timestamp: new Date().toISOString(), + executionId, + workflowId, + data: { + error: timeoutErrorMessage, + duration: result.metadata?.duration || 0, + }, + }) + + await setExecutionMeta(executionId, { status: 'error' }) + } + + return buildResult( + 'cancelled', + { + success: false, + output: result.output, + error: timeoutErrorMessage, + logs: result.logs, + metadata: result.metadata + ? { + duration: result.metadata.duration, + startTime: result.metadata.startTime, + endTime: result.metadata.endTime, + } + : undefined, + }, + executionId, + 408 + ) + } + + if (result.status === 'paused') { + if (!result.snapshotSeed) { + await loggingSession.markAsFailed('Missing snapshot seed for paused execution') + } else { + await PauseResumeManager.persistPauseResult({ + workflowId, + executionId, + pausePoints: result.pausePoints || [], + snapshotSeed: result.snapshotSeed, + executorUserId: result.metadata?.userId, + }) + } + } else { + await PauseResumeManager.processQueuedResumes(executionId) + } + + const outputWithBase64 = payload.includeFileBase64 + ? await hydrateUserFilesWithBase64(result.output, { + requestId, + executionId, + maxBytes: payload.base64MaxBytes, + }) + : result.output + + if (eventWriter) { + if (result.status === 'cancelled') { + await eventWriter.write({ + type: 'execution:cancelled', + timestamp: new Date().toISOString(), + executionId, + workflowId, + data: { + duration: result.metadata?.duration || 0, + }, + }) + await setExecutionMeta(executionId, { status: 'cancelled' }) + } else { + await eventWriter.write({ + type: 'execution:completed', + timestamp: new Date().toISOString(), + executionId, + workflowId, + data: { + success: result.success, + output: outputWithBase64, + duration: result.metadata?.duration || 0, + startTime: result.metadata?.startTime || metadata.startTime, + endTime: result.metadata?.endTime || new Date().toISOString(), + }, + }) + await setExecutionMeta(executionId, { status: 'complete' }) + } + } + + return buildResult( + result.status === 'paused' + ? 'paused' + : result.status === 'cancelled' + ? 'cancelled' + : 'success', + { + success: result.success, + output: outputWithBase64, + error: result.error, + logs: result.logs, + metadata: result.metadata + ? { + duration: result.metadata.duration, + startTime: result.metadata.startTime, + endTime: result.metadata.endTime, + } + : undefined, + }, + executionId + ) + } catch (error) { + logger.error('Queued workflow execution failed', { + workflowId, + executionId, + error: error instanceof Error ? error.message : String(error), + }) + + if (!wasExecutionFinalizedByCore(error, executionId)) { + const executionResult = hasExecutionResult(error) ? error.executionResult : undefined + const { traceSpans } = executionResult ? buildTraceSpans(executionResult) : { traceSpans: [] } + await loggingSession.safeCompleteWithError({ + error: { + message: error instanceof Error ? error.message : String(error), + stackTrace: error instanceof Error ? error.stack : undefined, + }, + traceSpans, + }) + } + + if (eventWriter) { + await eventWriter.write({ + type: 'execution:error', + timestamp: new Date().toISOString(), + executionId, + workflowId, + data: { + error: error instanceof Error ? error.message : String(error), + duration: 0, + }, + }) + await setExecutionMeta(executionId, { status: 'error' }) + } + + const executionResult = hasExecutionResult(error) ? error.executionResult : undefined + + return buildResult( + 'failed', + { + success: false, + output: executionResult?.output ?? {}, + error: executionResult?.error || (error instanceof Error ? error.message : String(error)), + logs: executionResult?.logs, + metadata: executionResult?.metadata + ? { + duration: executionResult.metadata.duration, + startTime: executionResult.metadata.startTime, + endTime: executionResult.metadata.endTime, + } + : undefined, + }, + executionId, + 500 + ) + } finally { + timeoutController.cleanup() + + if (eventWriter) { + await eventWriter.close() + } + + await cleanupExecutionBase64Cache(executionId).catch((error) => { + logger.error('Failed to cleanup queued workflow base64 cache', { + executionId, + error: error instanceof Error ? error.message : String(error), + }) + }) + } +} diff --git a/apps/sim/lib/workflows/utils.ts b/apps/sim/lib/workflows/utils.ts index d5c50b47ee6..c77521f3b1b 100644 --- a/apps/sim/lib/workflows/utils.ts +++ b/apps/sim/lib/workflows/utils.ts @@ -249,7 +249,9 @@ export async function updateWorkflowRunCounts(workflowId: string, runs = 1) { } } -export const workflowHasResponseBlock = (executionResult: ExecutionResult): boolean => { +export const workflowHasResponseBlock = ( + executionResult: Pick +): boolean => { if (!executionResult?.logs || !Array.isArray(executionResult.logs) || !executionResult.success) { return false } @@ -261,7 +263,9 @@ export const workflowHasResponseBlock = (executionResult: ExecutionResult): bool return responseBlock !== undefined } -export const createHttpResponseFromBlock = (executionResult: ExecutionResult): NextResponse => { +export const createHttpResponseFromBlock = ( + executionResult: Pick +): NextResponse => { const { data = {}, status = 200, headers = {} } = executionResult.output const responseHeaders = new Headers({ diff --git a/apps/sim/package.json b/apps/sim/package.json index 8e9fee648e5..f2f3b050f32 100644 --- a/apps/sim/package.json +++ b/apps/sim/package.json @@ -11,9 +11,11 @@ "dev": "next dev --port 3000", "dev:webpack": "next dev --webpack", "dev:sockets": "bun run socket/index.ts", - "dev:full": "bunx concurrently -n \"App,Realtime\" -c \"cyan,magenta\" \"bun run dev\" \"bun run dev:sockets\"", + "dev:worker": "bun run worker/index.ts", + "dev:full": "bunx concurrently -n \"App,Realtime,Worker\" -c \"cyan,magenta,yellow\" \"bun run dev\" \"bun run dev:sockets\" \"bun run dev:worker\"", "build": "next build", "start": "next start", + "worker": "NODE_ENV=production bun run worker/index.ts", "prepare": "cd ../.. && bun husky", "test": "vitest run", "test:watch": "vitest", @@ -89,6 +91,7 @@ "better-auth": "1.3.12", "binary-extensions": "^2.0.0", "browser-image-compression": "^2.0.2", + "bullmq": "5.71.0", "chalk": "5.6.2", "cheerio": "1.1.2", "class-variance-authority": "^0.7.1", diff --git a/apps/sim/worker/health.ts b/apps/sim/worker/health.ts new file mode 100644 index 00000000000..f2a5fea3c11 --- /dev/null +++ b/apps/sim/worker/health.ts @@ -0,0 +1,77 @@ +import { createServer } from 'http' +import { createLogger } from '@sim/logger' +import { getRedisClient } from '@/lib/core/config/redis' + +const logger = createLogger('BullMQWorkerHealth') + +export interface WorkerHealthServer { + close: () => Promise +} + +interface WorkerHealthCheck { + redisConnected: boolean + dispatcherLastWakeAt: number +} + +let healthState: WorkerHealthCheck = { + redisConnected: false, + dispatcherLastWakeAt: 0, +} + +export function updateWorkerHealthState(update: Partial): void { + healthState = { ...healthState, ...update } +} + +export function startWorkerHealthServer(port: number): WorkerHealthServer { + const server = createServer((req, res) => { + if (req.method === 'GET' && req.url === '/health') { + const redis = getRedisClient() + const redisConnected = redis !== null + const dispatcherActive = + healthState.dispatcherLastWakeAt > 0 && + Date.now() - healthState.dispatcherLastWakeAt < 30_000 + + const healthy = redisConnected && dispatcherActive + + res.writeHead(healthy ? 200 : 503, { 'Content-Type': 'application/json' }) + res.end( + JSON.stringify({ + ok: healthy, + redis: redisConnected, + dispatcher: dispatcherActive, + lastWakeAgoMs: healthState.dispatcherLastWakeAt + ? Date.now() - healthState.dispatcherLastWakeAt + : null, + }) + ) + return + } + + if (req.method === 'GET' && req.url === '/health/live') { + res.writeHead(200, { 'Content-Type': 'application/json' }) + res.end(JSON.stringify({ ok: true })) + return + } + + res.writeHead(404, { 'Content-Type': 'application/json' }) + res.end(JSON.stringify({ error: 'Not found' })) + }) + + server.listen(port, '0.0.0.0', () => { + logger.info(`Worker health server listening on port ${port}`) + }) + + return { + close: () => + new Promise((resolve, reject) => { + server.close((error) => { + if (error) { + reject(error) + return + } + + resolve() + }) + }), + } +} diff --git a/apps/sim/worker/index.ts b/apps/sim/worker/index.ts new file mode 100644 index 00000000000..aaf71dd5aab --- /dev/null +++ b/apps/sim/worker/index.ts @@ -0,0 +1,190 @@ +import { createLogger } from '@sim/logger' +import { Worker } from 'bullmq' +import { + getBullMQConnectionOptions, + isBullMQEnabled, + KNOWLEDGE_CONNECTOR_SYNC_QUEUE, + KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE, + MOTHERSHIP_JOB_EXECUTION_QUEUE, + WORKSPACE_NOTIFICATION_DELIVERY_QUEUE, +} from '@/lib/core/bullmq' +import { wakeWorkspaceDispatcher } from '@/lib/core/workspace-dispatch' +import { sweepPendingNotificationDeliveries } from '@/background/workspace-notification-delivery' +import { startWorkerHealthServer, updateWorkerHealthState } from '@/worker/health' +import { processKnowledgeConnectorSync } from '@/worker/processors/knowledge-connector-sync' +import { processKnowledgeDocument } from '@/worker/processors/knowledge-document-processing' +import { processMothershipJobExecution } from '@/worker/processors/mothership-job-execution' +import { processSchedule } from '@/worker/processors/schedule' +import { processWebhook } from '@/worker/processors/webhook' +import { processWorkflow } from '@/worker/processors/workflow' +import { processWorkspaceNotificationDelivery } from '@/worker/processors/workspace-notification-delivery' + +const logger = createLogger('BullMQWorker') + +const DEFAULT_WORKER_PORT = 3001 +const DEFAULT_WORKFLOW_CONCURRENCY = 50 +const DEFAULT_WEBHOOK_CONCURRENCY = 30 +const DEFAULT_SCHEDULE_CONCURRENCY = 20 +const DEFAULT_MOTHERSHIP_JOB_CONCURRENCY = 10 +const DEFAULT_CONNECTOR_SYNC_CONCURRENCY = 5 +const DEFAULT_DOCUMENT_PROCESSING_CONCURRENCY = 20 +const DEFAULT_NOTIFICATION_DELIVERY_CONCURRENCY = 10 +const DISPATCHER_WAKE_INTERVAL_MS = 5_000 +const NOTIFICATION_SWEEPER_INTERVAL_MS = 10_000 + +function parseWorkerNumber(value: string | undefined, fallback: number): number { + const parsed = Number.parseInt(value ?? '', 10) + return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback +} + +async function main() { + const workerPort = parseWorkerNumber(process.env.WORKER_PORT, DEFAULT_WORKER_PORT) + const healthServer = startWorkerHealthServer(workerPort) + + if (!isBullMQEnabled()) { + logger.warn('BullMQ worker started without REDIS_URL; worker will remain idle') + + const shutdownWithoutRedis = async () => { + await healthServer.close() + process.exit(0) + } + + process.on('SIGINT', shutdownWithoutRedis) + process.on('SIGTERM', shutdownWithoutRedis) + return + } + + const connection = getBullMQConnectionOptions() + + const workflowWorker = new Worker('workflow-execution', processWorkflow, { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_WORKFLOW, + DEFAULT_WORKFLOW_CONCURRENCY + ), + }) + + const webhookWorker = new Worker('webhook-execution', processWebhook, { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_WEBHOOK, + DEFAULT_WEBHOOK_CONCURRENCY + ), + }) + + const scheduleWorker = new Worker('schedule-execution', processSchedule, { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_SCHEDULE, + DEFAULT_SCHEDULE_CONCURRENCY + ), + }) + + const mothershipJobWorker = new Worker( + MOTHERSHIP_JOB_EXECUTION_QUEUE, + processMothershipJobExecution, + { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_MOTHERSHIP_JOB, + DEFAULT_MOTHERSHIP_JOB_CONCURRENCY + ), + } + ) + + const connectorSyncWorker = new Worker( + KNOWLEDGE_CONNECTOR_SYNC_QUEUE, + processKnowledgeConnectorSync, + { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_CONNECTOR_SYNC, + DEFAULT_CONNECTOR_SYNC_CONCURRENCY + ), + } + ) + + const documentProcessingWorker = new Worker( + KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE, + processKnowledgeDocument, + { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_DOCUMENT_PROCESSING, + DEFAULT_DOCUMENT_PROCESSING_CONCURRENCY + ), + } + ) + + const notificationDeliveryWorker = new Worker( + WORKSPACE_NOTIFICATION_DELIVERY_QUEUE, + processWorkspaceNotificationDelivery, + { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_NOTIFICATION_DELIVERY, + DEFAULT_NOTIFICATION_DELIVERY_CONCURRENCY + ), + } + ) + + const workers = [ + workflowWorker, + webhookWorker, + scheduleWorker, + mothershipJobWorker, + connectorSyncWorker, + documentProcessingWorker, + notificationDeliveryWorker, + ] + + for (const worker of workers) { + worker.on('failed', (job, error) => { + logger.error('BullMQ job failed', { + queue: worker.name, + jobId: job?.id, + name: job?.name, + error: error.message, + }) + }) + } + + const dispatcherWakeInterval = setInterval(() => { + void wakeWorkspaceDispatcher() + .then(() => { + updateWorkerHealthState({ dispatcherLastWakeAt: Date.now() }) + }) + .catch((error) => { + logger.error('Periodic workspace dispatcher wake failed', { error }) + }) + }, DISPATCHER_WAKE_INTERVAL_MS) + dispatcherWakeInterval.unref() + + const notificationSweeperInterval = setInterval(() => { + void sweepPendingNotificationDeliveries().catch((error) => { + logger.error('Pending notification sweeper failed', { error }) + }) + }, NOTIFICATION_SWEEPER_INTERVAL_MS) + notificationSweeperInterval.unref() + + const shutdown = async () => { + logger.info('Shutting down BullMQ worker') + + clearInterval(dispatcherWakeInterval) + clearInterval(notificationSweeperInterval) + await Promise.allSettled(workers.map((worker) => worker.close())) + await healthServer.close() + + process.exit(0) + } + + process.on('SIGINT', shutdown) + process.on('SIGTERM', shutdown) +} + +main().catch((error) => { + logger.error('Failed to start BullMQ worker', { + error: error instanceof Error ? error.message : String(error), + }) + process.exit(1) +}) diff --git a/apps/sim/worker/processors/knowledge-connector-sync.ts b/apps/sim/worker/processors/knowledge-connector-sync.ts new file mode 100644 index 00000000000..9a504ebde0f --- /dev/null +++ b/apps/sim/worker/processors/knowledge-connector-sync.ts @@ -0,0 +1,22 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { executeSync } from '@/lib/knowledge/connectors/sync-engine' +import type { ConnectorSyncPayload } from '@/background/knowledge-connector-sync' + +const logger = createLogger('BullMQKnowledgeConnectorSync') + +export async function processKnowledgeConnectorSync(job: Job>) { + const { connectorId, fullSync } = job.data.payload + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing connector sync job', { + jobId: job.id, + connectorId, + }) + + return runDispatchedJob(job.data.metadata, () => executeSync(connectorId, { fullSync }), { + isFinalAttempt, + }) +} diff --git a/apps/sim/worker/processors/knowledge-document-processing.ts b/apps/sim/worker/processors/knowledge-document-processing.ts new file mode 100644 index 00000000000..74fff94fb7b --- /dev/null +++ b/apps/sim/worker/processors/knowledge-document-processing.ts @@ -0,0 +1,26 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { type DocumentJobData, processDocumentAsync } from '@/lib/knowledge/documents/service' + +const logger = createLogger('BullMQKnowledgeDocumentProcessing') + +export async function processKnowledgeDocument(job: Job>) { + const { knowledgeBaseId, documentId, docData, processingOptions } = job.data.payload + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing knowledge document job', { + jobId: job.id, + knowledgeBaseId, + documentId, + }) + + await runDispatchedJob( + job.data.metadata, + () => processDocumentAsync(knowledgeBaseId, documentId, docData, processingOptions), + { + isFinalAttempt, + } + ) +} diff --git a/apps/sim/worker/processors/mothership-job-execution.ts b/apps/sim/worker/processors/mothership-job-execution.ts new file mode 100644 index 00000000000..d5980394388 --- /dev/null +++ b/apps/sim/worker/processors/mothership-job-execution.ts @@ -0,0 +1,20 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { executeJobInline, type JobExecutionPayload } from '@/background/schedule-execution' + +const logger = createLogger('BullMQMothershipJobExecution') + +export async function processMothershipJobExecution(job: Job>) { + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing mothership scheduled job', { + jobId: job.id, + scheduleId: job.data.payload.scheduleId, + }) + + await runDispatchedJob(job.data.metadata, () => executeJobInline(job.data.payload), { + isFinalAttempt, + }) +} diff --git a/apps/sim/worker/processors/schedule.ts b/apps/sim/worker/processors/schedule.ts new file mode 100644 index 00000000000..78f4cde7d79 --- /dev/null +++ b/apps/sim/worker/processors/schedule.ts @@ -0,0 +1,21 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { executeScheduleJob, type ScheduleExecutionPayload } from '@/background/schedule-execution' + +const logger = createLogger('BullMQScheduleProcessor') + +export async function processSchedule(job: Job>) { + const { payload } = job.data + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing schedule job', { + jobId: job.id, + name: job.name, + }) + + return runDispatchedJob(job.data.metadata, () => executeScheduleJob(payload), { + isFinalAttempt, + }) +} diff --git a/apps/sim/worker/processors/webhook.ts b/apps/sim/worker/processors/webhook.ts new file mode 100644 index 00000000000..da61aede1c8 --- /dev/null +++ b/apps/sim/worker/processors/webhook.ts @@ -0,0 +1,21 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { executeWebhookJob, type WebhookExecutionPayload } from '@/background/webhook-execution' + +const logger = createLogger('BullMQWebhookProcessor') + +export async function processWebhook(job: Job>) { + const { payload } = job.data + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing webhook job', { + jobId: job.id, + name: job.name, + }) + + return runDispatchedJob(job.data.metadata, () => executeWebhookJob(payload), { + isFinalAttempt, + }) +} diff --git a/apps/sim/worker/processors/workflow.ts b/apps/sim/worker/processors/workflow.ts new file mode 100644 index 00000000000..8648e76b556 --- /dev/null +++ b/apps/sim/worker/processors/workflow.ts @@ -0,0 +1,51 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { + DIRECT_WORKFLOW_JOB_NAME, + executeQueuedWorkflowJob, + type QueuedWorkflowExecutionPayload, +} from '@/lib/workflows/executor/queued-workflow-execution' +import { executeWorkflowJob, type WorkflowExecutionPayload } from '@/background/workflow-execution' + +const logger = createLogger('BullMQWorkflowProcessor') + +type WorkflowQueueJobData = + | BullMQJobData + | BullMQJobData + +function isDirectWorkflowJob( + job: Job +): job is Job> { + return job.name === DIRECT_WORKFLOW_JOB_NAME +} + +function isBackgroundWorkflowJob( + job: Job +): job is Job> { + return job.name !== DIRECT_WORKFLOW_JOB_NAME +} + +export async function processWorkflow(job: Job) { + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing workflow job', { + jobId: job.id, + name: job.name, + }) + + if (isDirectWorkflowJob(job)) { + return runDispatchedJob(job.data.metadata, () => executeQueuedWorkflowJob(job.data.payload), { + isFinalAttempt, + }) + } + + if (isBackgroundWorkflowJob(job)) { + return runDispatchedJob(job.data.metadata, () => executeWorkflowJob(job.data.payload), { + isFinalAttempt, + }) + } + + throw new Error(`Unsupported workflow job type: ${job.name}`) +} diff --git a/apps/sim/worker/processors/workspace-notification-delivery.ts b/apps/sim/worker/processors/workspace-notification-delivery.ts new file mode 100644 index 00000000000..7b6dbbc6da1 --- /dev/null +++ b/apps/sim/worker/processors/workspace-notification-delivery.ts @@ -0,0 +1,32 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { + executeNotificationDelivery, + type NotificationDeliveryParams, +} from '@/background/workspace-notification-delivery' + +const logger = createLogger('BullMQWorkspaceNotificationDelivery') + +export async function processWorkspaceNotificationDelivery( + job: Job> +) { + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing workspace notification delivery job', { + jobId: job.id, + deliveryId: job.data.payload.deliveryId, + }) + + const result = await runDispatchedJob( + job.data.metadata, + () => executeNotificationDelivery(job.data.payload), + { + isFinalAttempt, + } + ) + + // Retry scheduling is persisted in the notification delivery row and + // rehydrated by the periodic sweeper, which makes retries crash-safe. +} diff --git a/bun.lock b/bun.lock index 61df0c93763..9fc8ccc58a3 100644 --- a/bun.lock +++ b/bun.lock @@ -1,6 +1,5 @@ { "lockfileVersion": 1, - "configVersion": 0, "workspaces": { "": { "name": "simstudio", @@ -115,6 +114,7 @@ "better-auth": "1.3.12", "binary-extensions": "^2.0.0", "browser-image-compression": "^2.0.2", + "bullmq": "5.71.0", "chalk": "5.6.2", "cheerio": "1.1.2", "class-variance-authority": "^0.7.1", @@ -816,6 +816,18 @@ "@mongodb-js/saslprep": ["@mongodb-js/saslprep@1.4.5", "", { "dependencies": { "sparse-bitfield": "^3.0.3" } }, "sha512-k64Lbyb7ycCSXHSLzxVdb2xsKGPMvYZfCICXvDsI8Z65CeWQzTEKS4YmGbnqw+U9RBvLPTsB6UCmwkgsDTGWIw=="], + "@msgpackr-extract/msgpackr-extract-darwin-arm64": ["@msgpackr-extract/msgpackr-extract-darwin-arm64@3.0.3", "", { "os": "darwin", "cpu": "arm64" }, "sha512-QZHtlVgbAdy2zAqNA9Gu1UpIuI8Xvsd1v8ic6B2pZmeFnFcMWiPLfWXh7TVw4eGEZ/C9TH281KwhVoeQUKbyjw=="], + + "@msgpackr-extract/msgpackr-extract-darwin-x64": ["@msgpackr-extract/msgpackr-extract-darwin-x64@3.0.3", "", { "os": "darwin", "cpu": "x64" }, "sha512-mdzd3AVzYKuUmiWOQ8GNhl64/IoFGol569zNRdkLReh6LRLHOXxU4U8eq0JwaD8iFHdVGqSy4IjFL4reoWCDFw=="], + + "@msgpackr-extract/msgpackr-extract-linux-arm": ["@msgpackr-extract/msgpackr-extract-linux-arm@3.0.3", "", { "os": "linux", "cpu": "arm" }, "sha512-fg0uy/dG/nZEXfYilKoRe7yALaNmHoYeIoJuJ7KJ+YyU2bvY8vPv27f7UKhGRpY6euFYqEVhxCFZgAUNQBM3nw=="], + + "@msgpackr-extract/msgpackr-extract-linux-arm64": ["@msgpackr-extract/msgpackr-extract-linux-arm64@3.0.3", "", { "os": "linux", "cpu": "arm64" }, "sha512-YxQL+ax0XqBJDZiKimS2XQaf+2wDGVa1enVRGzEvLLVFeqa5kx2bWbtcSXgsxjQB7nRqqIGFIcLteF/sHeVtQg=="], + + "@msgpackr-extract/msgpackr-extract-linux-x64": ["@msgpackr-extract/msgpackr-extract-linux-x64@3.0.3", "", { "os": "linux", "cpu": "x64" }, "sha512-cvwNfbP07pKUfq1uH+S6KJ7dT9K8WOE4ZiAcsrSes+UY55E/0jLYc+vq+DO7jlmqRb5zAggExKm0H7O/CBaesg=="], + + "@msgpackr-extract/msgpackr-extract-win32-x64": ["@msgpackr-extract/msgpackr-extract-win32-x64@3.0.3", "", { "os": "win32", "cpu": "x64" }, "sha512-x0fWaQtYp4E6sktbsdAqnehxDgEc/VwM7uLsRCYWaiGu0ykYdZPiS8zCWdnjHwyiumousxfBm4SO31eXqwEZhQ=="], + "@napi-rs/canvas": ["@napi-rs/canvas@0.1.91", "", { "optionalDependencies": { "@napi-rs/canvas-android-arm64": "0.1.91", "@napi-rs/canvas-darwin-arm64": "0.1.91", "@napi-rs/canvas-darwin-x64": "0.1.91", "@napi-rs/canvas-linux-arm-gnueabihf": "0.1.91", "@napi-rs/canvas-linux-arm64-gnu": "0.1.91", "@napi-rs/canvas-linux-arm64-musl": "0.1.91", "@napi-rs/canvas-linux-riscv64-gnu": "0.1.91", "@napi-rs/canvas-linux-x64-gnu": "0.1.91", "@napi-rs/canvas-linux-x64-musl": "0.1.91", "@napi-rs/canvas-win32-arm64-msvc": "0.1.91", "@napi-rs/canvas-win32-x64-msvc": "0.1.91" } }, "sha512-eeIe1GoB74P1B0Nkw6pV8BCQ3hfCfvyYr4BntzlCsnFXzVJiPMDnLeIx3gVB0xQMblHYnjK/0nCLvirEhOjr5g=="], "@napi-rs/canvas-android-arm64": ["@napi-rs/canvas-android-arm64@0.1.91", "", { "os": "android", "cpu": "arm64" }, "sha512-SLLzXXgSnfct4zy/BVAfweZQkYkPJsNsJ2e5DOE8DFEHC6PufyUrwb12yqeu2So2IOIDpWJJaDAxKY/xpy6MYQ=="], @@ -1790,6 +1802,8 @@ "buildcheck": ["buildcheck@0.0.7", "", {}, "sha512-lHblz4ahamxpTmnsk+MNTRWsjYKv965MwOrSJyeD588rR3Jcu7swE+0wN5F+PbL5cjgu/9ObkhfzEPuofEMwLA=="], + "bullmq": ["bullmq@5.71.0", "", { "dependencies": { "cron-parser": "4.9.0", "ioredis": "5.9.3", "msgpackr": "1.11.5", "node-abort-controller": "3.1.1", "semver": "7.7.4", "tslib": "2.8.1", "uuid": "11.1.0" } }, "sha512-aeNWh4drsafSKnAJeiNH/nZP/5O8ZdtdMbnOPZmpjXj7NZUP5YC901U3bIH41iZValm7d1i3c34ojv7q31m30w=="], + "bytes": ["bytes@3.1.2", "", {}, "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg=="], "c12": ["c12@3.1.0", "", { "dependencies": { "chokidar": "^4.0.3", "confbox": "^0.2.2", "defu": "^6.1.4", "dotenv": "^16.6.1", "exsolve": "^1.0.7", "giget": "^2.0.0", "jiti": "^2.4.2", "ohash": "^2.0.11", "pathe": "^2.0.3", "perfect-debounce": "^1.0.0", "pkg-types": "^2.2.0", "rc9": "^2.1.2" }, "peerDependencies": { "magicast": "^0.3.5" }, "optionalPeers": ["magicast"] }, "sha512-uWoS8OU1MEIsOv8p/5a82c3H31LsWVR5qiyXVfBNOzfffjUWtPnhAb4BYI2uG2HfGmZmFjCtui5XNWaps+iFuw=="], @@ -1928,6 +1942,8 @@ "critters": ["critters@0.0.25", "", { "dependencies": { "chalk": "^4.1.0", "css-select": "^5.1.0", "dom-serializer": "^2.0.0", "domhandler": "^5.0.2", "htmlparser2": "^8.0.2", "postcss": "^8.4.23", "postcss-media-query-parser": "^0.2.3" } }, "sha512-ROF/tjJyyRdM8/6W0VqoN5Ql05xAGnkf5b7f3sTEl1bI5jTQQf8O918RD/V9tEb9pRY/TKcvJekDbJtniHyPtQ=="], + "cron-parser": ["cron-parser@4.9.0", "", { "dependencies": { "luxon": "^3.2.1" } }, "sha512-p0SaNjrHOnQeR8/VnfGbmg9te2kfyYSQ7Sc/j/6DtPL3JQvKxmjO9TSjNFpujqV3vEYYBvNNvXSxzyksBWAx1Q=="], + "croner": ["croner@9.1.0", "", {}, "sha512-p9nwwR4qyT5W996vBZhdvBCnMhicY5ytZkR4D1Xj0wuTDEiMnjwR57Q3RXYY/s0EpX6Ay3vgIcfaR+ewGHsi+g=="], "cronstrue": ["cronstrue@3.3.0", "", { "bin": { "cronstrue": "bin/cli.js" } }, "sha512-iwJytzJph1hosXC09zY8F5ACDJKerr0h3/2mOxg9+5uuFObYlgK0m35uUPk4GCvhHc2abK7NfnR9oMqY0qZFAg=="], @@ -2630,6 +2646,8 @@ "lucide-react": ["lucide-react@0.511.0", "", { "peerDependencies": { "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0" } }, "sha512-VK5a2ydJ7xm8GvBeKLS9mu1pVK6ucef9780JVUjw6bAjJL/QXnd4Y0p7SPeOUMC27YhzNCZvm5d/QX0Tp3rc0w=="], + "luxon": ["luxon@3.7.2", "", {}, "sha512-vtEhXh/gNjI9Yg1u4jX/0YVPMvxzHuGgCm6tC5kZyb08yjGWGnqAjGJvcXbqQR2P3MyMEFnRbpcdFS6PBcLqew=="], + "magic-string": ["magic-string@0.30.21", "", { "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.5" } }, "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ=="], "magicast": ["magicast@0.3.5", "", { "dependencies": { "@babel/parser": "^7.25.4", "@babel/types": "^7.25.4", "source-map-js": "^1.2.0" } }, "sha512-L0WhttDl+2BOsybvEOLK7fW3UA0OQ0IQ2d6Zl2x/a6vVRs3bAY0ECOSHHeL5jD+SbOpOCUEi0y1DgHEn9Qn1AQ=="], @@ -2816,6 +2834,10 @@ "ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], + "msgpackr": ["msgpackr@1.11.5", "", { "optionalDependencies": { "msgpackr-extract": "^3.0.2" } }, "sha512-UjkUHN0yqp9RWKy0Lplhh+wlpdt9oQBYgULZOiFhV3VclSF1JnSQWZ5r9gORQlNYaUKQoR8itv7g7z1xDDuACA=="], + + "msgpackr-extract": ["msgpackr-extract@3.0.3", "", { "dependencies": { "node-gyp-build-optional-packages": "5.2.2" }, "optionalDependencies": { "@msgpackr-extract/msgpackr-extract-darwin-arm64": "3.0.3", "@msgpackr-extract/msgpackr-extract-darwin-x64": "3.0.3", "@msgpackr-extract/msgpackr-extract-linux-arm": "3.0.3", "@msgpackr-extract/msgpackr-extract-linux-arm64": "3.0.3", "@msgpackr-extract/msgpackr-extract-linux-x64": "3.0.3", "@msgpackr-extract/msgpackr-extract-win32-x64": "3.0.3" }, "bin": { "download-msgpackr-prebuilds": "bin/download-prebuilds.js" } }, "sha512-P0efT1C9jIdVRefqjzOQ9Xml57zpOXnIuS+csaB4MdZbTdmGDLo8XhzBG1N7aO11gKDDkJvBLULeFTo46wwreA=="], + "mustache": ["mustache@4.2.0", "", { "bin": { "mustache": "bin/mustache" } }, "sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ=="], "mute-stream": ["mute-stream@0.0.8", "", {}, "sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA=="], @@ -2856,6 +2878,8 @@ "node-abi": ["node-abi@3.87.0", "", { "dependencies": { "semver": "^7.3.5" } }, "sha512-+CGM1L1CgmtheLcBuleyYOn7NWPVu0s0EJH2C4puxgEZb9h8QpR9G2dBfZJOAUhi7VQxuBPMd0hiISWcTyiYyQ=="], + "node-abort-controller": ["node-abort-controller@3.1.1", "", {}, "sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ=="], + "node-domexception": ["node-domexception@1.0.0", "", {}, "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ=="], "node-ensure": ["node-ensure@0.0.0", "", {}, "sha512-DRI60hzo2oKN1ma0ckc6nQWlHU69RH6xN0sjQTjMpChPfTYvKZdcQFfdYK2RWbJcKyUizSIy/l8OTGxMAM1QDw=="], @@ -2868,6 +2892,8 @@ "node-gyp-build": ["node-gyp-build@4.8.4", "", { "bin": { "node-gyp-build": "bin.js", "node-gyp-build-optional": "optional.js", "node-gyp-build-test": "build-test.js" } }, "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ=="], + "node-gyp-build-optional-packages": ["node-gyp-build-optional-packages@5.2.2", "", { "dependencies": { "detect-libc": "^2.0.1" }, "bin": { "node-gyp-build-optional-packages": "bin.js", "node-gyp-build-optional-packages-optional": "optional.js", "node-gyp-build-optional-packages-test": "build-test.js" } }, "sha512-s+w+rBWnpTMwSFbaE0UXsRlg7hU4FjekKU4eyAih5T8nJuNZT1nNsskXpxmeqSK9UzkBl6UgRlnKc8hz8IEqOw=="], + "node-int64": ["node-int64@0.4.0", "", {}, "sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw=="], "node-readable-to-web-readable-stream": ["node-readable-to-web-readable-stream@0.4.2", "", {}, "sha512-/cMZNI34v//jUTrI+UIo4ieHAB5EZRY/+7OmXZgBxaWBMcW2tGdceIw06RFxWxrKZ5Jp3sI2i5TsRo+CBhtVLQ=="], @@ -4052,6 +4078,8 @@ "body-parser/iconv-lite": ["iconv-lite@0.7.1", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-2Tth85cXwGFHfvRgZWszZSvdo+0Xsqmw8k8ZwxScfcBneNUraK+dxRxRm24nszx80Y0TVio8kKLt5sLE7ZCLlw=="], + "bullmq/ioredis": ["ioredis@5.9.3", "", { "dependencies": { "@ioredis/commands": "1.5.0", "cluster-key-slot": "^1.1.0", "debug": "^4.3.4", "denque": "^2.1.0", "lodash.defaults": "^4.2.0", "lodash.isarguments": "^3.1.0", "redis-errors": "^1.2.0", "redis-parser": "^3.0.0", "standard-as-callback": "^2.1.0" } }, "sha512-VI5tMCdeoxZWU5vjHWsiE/Su76JGhBvWF1MJnV9ZtGltHk9BmD48oDq8Tj8haZ85aceXZMxLNDQZRVo5QKNgXA=="], + "c12/chokidar": ["chokidar@4.0.3", "", { "dependencies": { "readdirp": "^4.0.1" } }, "sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA=="], "c12/confbox": ["confbox@0.2.4", "", {}, "sha512-ysOGlgTFbN2/Y6Cg3Iye8YKulHw+R2fNXHrgSmXISQdMnomY6eNDprVdW9R5xBguEqI954+S6709UyiO7B+6OQ=="], diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 527c8d86b37..4c03862e35b 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -23,6 +23,9 @@ services: - OLLAMA_URL=${OLLAMA_URL:-http://localhost:11434} - SOCKET_SERVER_URL=${SOCKET_SERVER_URL:-http://realtime:3002} - NEXT_PUBLIC_SOCKET_URL=${NEXT_PUBLIC_SOCKET_URL:-http://localhost:3002} + - ADMISSION_GATE_MAX_INFLIGHT=${ADMISSION_GATE_MAX_INFLIGHT:-500} + - DISPATCH_MAX_QUEUE_PER_WORKSPACE=${DISPATCH_MAX_QUEUE_PER_WORKSPACE:-1000} + - DISPATCH_MAX_QUEUE_GLOBAL=${DISPATCH_MAX_QUEUE_GLOBAL:-50000} depends_on: db: condition: service_healthy @@ -37,6 +40,43 @@ services: retries: 3 start_period: 10s + sim-worker: + image: ghcr.io/simstudioai/simstudio:latest + command: ['bun', 'run', 'worker'] + restart: unless-stopped + deploy: + resources: + limits: + memory: 4G + environment: + - NODE_ENV=production + - DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio} + - REDIS_URL=${REDIS_URL:-} + - ENCRYPTION_KEY=${ENCRYPTION_KEY} + - API_ENCRYPTION_KEY=${API_ENCRYPTION_KEY:-} + - INTERNAL_API_SECRET=${INTERNAL_API_SECRET} + - WORKER_PORT=${WORKER_PORT:-3001} + - WORKER_CONCURRENCY_WORKFLOW=${WORKER_CONCURRENCY_WORKFLOW:-50} + - WORKER_CONCURRENCY_WEBHOOK=${WORKER_CONCURRENCY_WEBHOOK:-30} + - WORKER_CONCURRENCY_SCHEDULE=${WORKER_CONCURRENCY_SCHEDULE:-20} + - WORKER_CONCURRENCY_MOTHERSHIP_JOB=${WORKER_CONCURRENCY_MOTHERSHIP_JOB:-10} + - WORKER_CONCURRENCY_CONNECTOR_SYNC=${WORKER_CONCURRENCY_CONNECTOR_SYNC:-5} + - WORKER_CONCURRENCY_DOCUMENT_PROCESSING=${WORKER_CONCURRENCY_DOCUMENT_PROCESSING:-20} + - WORKER_CONCURRENCY_NOTIFICATION_DELIVERY=${WORKER_CONCURRENCY_NOTIFICATION_DELIVERY:-10} + - DISPATCH_MAX_QUEUE_PER_WORKSPACE=${DISPATCH_MAX_QUEUE_PER_WORKSPACE:-1000} + - DISPATCH_MAX_QUEUE_GLOBAL=${DISPATCH_MAX_QUEUE_GLOBAL:-50000} + depends_on: + db: + condition: service_healthy + migrations: + condition: service_completed_successfully + healthcheck: + test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:${WORKER_PORT:-3001}/health'] + interval: 90s + timeout: 5s + retries: 3 + start_period: 10s + realtime: image: ghcr.io/simstudioai/realtime:latest restart: unless-stopped diff --git a/helm/sim/values.yaml b/helm/sim/values.yaml index af6bbc10c3b..6f985f1cd2a 100644 --- a/helm/sim/values.yaml +++ b/helm/sim/values.yaml @@ -124,6 +124,11 @@ app: OLLAMA_URL: "" # Ollama local LLM server URL ELEVENLABS_API_KEY: "" # ElevenLabs API key for text-to-speech in deployed chat + # Admission & Dispatch Queue Configuration + ADMISSION_GATE_MAX_INFLIGHT: "500" # Max concurrent in-flight execution requests per pod + DISPATCH_MAX_QUEUE_PER_WORKSPACE: "1000" # Max queued dispatch jobs per workspace + DISPATCH_MAX_QUEUE_GLOBAL: "50000" # Max queued dispatch jobs globally + # Rate Limiting Configuration (per minute) RATE_LIMIT_WINDOW_MS: "60000" # Rate limit window duration (1 minute) RATE_LIMIT_FREE_SYNC: "50" # Sync API executions per minute From 75eac74b99e64edbc46ec28b821ed21639536ff1 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Sun, 15 Mar 2026 20:26:25 -0700 Subject: [PATCH 02/12] fix bun lock --- bun.lock | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bun.lock b/bun.lock index 9fc8ccc58a3..4ed891bb0b7 100644 --- a/bun.lock +++ b/bun.lock @@ -2438,7 +2438,7 @@ "inquirer": ["inquirer@8.2.7", "", { "dependencies": { "@inquirer/external-editor": "^1.0.0", "ansi-escapes": "^4.2.1", "chalk": "^4.1.1", "cli-cursor": "^3.1.0", "cli-width": "^3.0.0", "figures": "^3.0.0", "lodash": "^4.17.21", "mute-stream": "0.0.8", "ora": "^5.4.1", "run-async": "^2.4.0", "rxjs": "^7.5.5", "string-width": "^4.1.0", "strip-ansi": "^6.0.0", "through": "^2.3.6", "wrap-ansi": "^6.0.1" } }, "sha512-UjOaSel/iddGZJ5xP/Eixh6dY1XghiBw4XK13rCCIJcJfyhhoul/7KhLLUGtebEj6GDYM6Vnx/mVsjx2L/mFIA=="], - "ioredis": ["ioredis@5.9.2", "", { "dependencies": { "@ioredis/commands": "1.5.0", "cluster-key-slot": "^1.1.0", "debug": "^4.3.4", "denque": "^2.1.0", "lodash.defaults": "^4.2.0", "lodash.isarguments": "^3.1.0", "redis-errors": "^1.2.0", "redis-parser": "^3.0.0", "standard-as-callback": "^2.1.0" } }, "sha512-tAAg/72/VxOUW7RQSX1pIxJVucYKcjFjfvj60L57jrZpYCHC3XN0WCQ3sNYL4Gmvv+7GPvTAjc+KSdeNuE8oWQ=="], + "ioredis": ["ioredis@5.9.3", "", { "dependencies": { "@ioredis/commands": "1.5.0", "cluster-key-slot": "^1.1.0", "debug": "^4.3.4", "denque": "^2.1.0", "lodash.defaults": "^4.2.0", "lodash.isarguments": "^3.1.0", "redis-errors": "^1.2.0", "redis-parser": "^3.0.0", "standard-as-callback": "^2.1.0" } }, "sha512-VI5tMCdeoxZWU5vjHWsiE/Su76JGhBvWF1MJnV9ZtGltHk9BmD48oDq8Tj8haZ85aceXZMxLNDQZRVo5QKNgXA=="], "ip-address": ["ip-address@10.1.0", "", {}, "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q=="], @@ -4078,8 +4078,6 @@ "body-parser/iconv-lite": ["iconv-lite@0.7.1", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-2Tth85cXwGFHfvRgZWszZSvdo+0Xsqmw8k8ZwxScfcBneNUraK+dxRxRm24nszx80Y0TVio8kKLt5sLE7ZCLlw=="], - "bullmq/ioredis": ["ioredis@5.9.3", "", { "dependencies": { "@ioredis/commands": "1.5.0", "cluster-key-slot": "^1.1.0", "debug": "^4.3.4", "denque": "^2.1.0", "lodash.defaults": "^4.2.0", "lodash.isarguments": "^3.1.0", "redis-errors": "^1.2.0", "redis-parser": "^3.0.0", "standard-as-callback": "^2.1.0" } }, "sha512-VI5tMCdeoxZWU5vjHWsiE/Su76JGhBvWF1MJnV9ZtGltHk9BmD48oDq8Tj8haZ85aceXZMxLNDQZRVo5QKNgXA=="], - "c12/chokidar": ["chokidar@4.0.3", "", { "dependencies": { "readdirp": "^4.0.1" } }, "sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA=="], "c12/confbox": ["confbox@0.2.4", "", {}, "sha512-ysOGlgTFbN2/Y6Cg3Iye8YKulHw+R2fNXHrgSmXISQdMnomY6eNDprVdW9R5xBguEqI954+S6709UyiO7B+6OQ=="], From be83c97b83237e447a4218bb77c6069be7502a21 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Sun, 15 Mar 2026 20:50:43 -0700 Subject: [PATCH 03/12] remove manual execs off queues --- .../api/workflows/[id]/execute/route.async.test.ts | 1 + apps/sim/app/api/workflows/[id]/execute/route.ts | 13 +++++++++---- apps/sim/lib/auth/hybrid.ts | 14 ++++++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts b/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts index 1a4e0bd980f..355ae6ddf06 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts @@ -21,6 +21,7 @@ const { vi.mock('@/lib/auth/hybrid', () => ({ checkHybridAuth: mockCheckHybridAuth, + hasExternalApiCredentials: vi.fn().mockReturnValue(true), AuthType: { SESSION: 'session', API_KEY: 'api_key', diff --git a/apps/sim/app/api/workflows/[id]/execute/route.ts b/apps/sim/app/api/workflows/[id]/execute/route.ts index 0b92df8aece..30e5b79f5c1 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.ts @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger' import { type NextRequest, NextResponse } from 'next/server' import { validate as uuidValidate, v4 as uuidv4 } from 'uuid' import { z } from 'zod' -import { AuthType, checkHybridAuth } from '@/lib/auth/hybrid' +import { AuthType, checkHybridAuth, hasExternalApiCredentials } from '@/lib/auth/hybrid' import { admissionRejectedResponse, tryAdmit } from '@/lib/core/admission/gate' import { getJobQueue, shouldExecuteInline, shouldUseBullMQ } from '@/lib/core/async-jobs' import { createBullMQJobData } from '@/lib/core/bullmq' @@ -326,6 +326,10 @@ async function enqueueDirectWorkflowExecution( * Supports both SSE streaming (for interactive/manual runs) and direct JSON responses (for background jobs). */ export async function POST(req: NextRequest, { params }: { params: Promise<{ id: string }> }) { + if (!hasExternalApiCredentials(req.headers)) { + return handleExecutePost(req, params) + } + const ticket = tryAdmit() if (!ticket) { return admissionRejectedResponse() @@ -784,7 +788,7 @@ async function handleExecutePost( const executionVariables = cachedWorkflowData?.variables ?? workflow.variables ?? {} - if (shouldUseBullMQ()) { + if (shouldUseBullMQ() && triggerType !== 'manual') { try { const dispatchJobId = await enqueueDirectWorkflowExecution( { @@ -799,7 +803,7 @@ async function handleExecutePost( timeoutMs: preprocessResult.executionTimeout?.sync, runFromBlock: resolvedRunFromBlock, }, - triggerType === 'manual' ? 1 : 5, + 5, 'interactive' ) @@ -973,7 +977,8 @@ async function handleExecutePost( } if (shouldUseDraftState) { - if (shouldUseBullMQ()) { + const useDispatchForManual = shouldUseBullMQ() && triggerType !== 'manual' + if (useDispatchForManual) { const metadata: ExecutionMetadata = { requestId, executionId, diff --git a/apps/sim/lib/auth/hybrid.ts b/apps/sim/lib/auth/hybrid.ts index af1e64da011..c461b12fcde 100644 --- a/apps/sim/lib/auth/hybrid.ts +++ b/apps/sim/lib/auth/hybrid.ts @@ -14,6 +14,20 @@ export const AuthType = { export type AuthTypeValue = (typeof AuthType)[keyof typeof AuthType] +const API_KEY_HEADER = 'x-api-key' +const BEARER_PREFIX = 'Bearer ' + +/** + * Lightweight header-only check for whether a request carries external API credentials. + * Does NOT validate the credentials — only inspects headers to classify the request + * as programmatic API traffic vs interactive session traffic. + */ +export function hasExternalApiCredentials(headers: Headers): boolean { + if (headers.has(API_KEY_HEADER)) return true + const auth = headers.get('authorization') + return auth !== null && auth.startsWith(BEARER_PREFIX) +} + export interface AuthResult { success: boolean userId?: string From 74de3319c6597952f1175aed0785577e53d27e52 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Sun, 15 Mar 2026 21:14:33 -0700 Subject: [PATCH 04/12] address comments --- apps/sim/app/api/jobs/[jobId]/route.ts | 2 +- apps/sim/app/api/schedules/execute/route.ts | 7 +++---- apps/sim/app/api/workflows/[id]/execute/route.ts | 15 +++++++++++++-- apps/sim/lib/core/admission/gate.ts | 4 +++- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/apps/sim/app/api/jobs/[jobId]/route.ts b/apps/sim/app/api/jobs/[jobId]/route.ts index aed0a106f8c..0ce749fa828 100644 --- a/apps/sim/app/api/jobs/[jobId]/route.ts +++ b/apps/sim/app/api/jobs/[jobId]/route.ts @@ -27,7 +27,7 @@ export async function GET( const dispatchJob = await getDispatchJobRecord(taskId) const jobQueue = await getJobQueue() - const job = dispatchJob ? await jobQueue.getJob(taskId) : await jobQueue.getJob(taskId) + const job = dispatchJob ? null : await jobQueue.getJob(taskId) if (!job && !dispatchJob) { return createErrorResponse('Task not found', 404) diff --git a/apps/sim/app/api/schedules/execute/route.ts b/apps/sim/app/api/schedules/execute/route.ts index 1744e5743ff..d739f3aa67b 100644 --- a/apps/sim/app/api/schedules/execute/route.ts +++ b/apps/sim/app/api/schedules/execute/route.ts @@ -115,11 +115,10 @@ export async function GET(request: NextRequest) { } try { - const workspaceId = schedule.workflowId - ? (await import('@/lib/workflows/utils')).getWorkflowById + const { getWorkflowById } = await import('@/lib/workflows/utils') + const resolvedWorkflow = schedule.workflowId + ? await getWorkflowById(schedule.workflowId) : null - const resolvedWorkflow = - schedule.workflowId && workspaceId ? await workspaceId(schedule.workflowId) : null const resolvedWorkspaceId = resolvedWorkflow?.workspaceId let jobId: string diff --git a/apps/sim/app/api/workflows/[id]/execute/route.ts b/apps/sim/app/api/workflows/[id]/execute/route.ts index 30e5b79f5c1..90b5a682d66 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.ts @@ -812,6 +812,17 @@ async function handleExecutePost( (preprocessResult.executionTimeout?.sync ?? 300000) + 30000 ) + if (resultRecord.status === 'failed') { + return NextResponse.json( + { + success: false, + executionId, + error: resultRecord.error ?? 'Workflow execution failed', + }, + { status: 500 } + ) + } + const result = resultRecord.output as QueuedWorkflowExecutionResult const resultForResponseBlock = { @@ -977,8 +988,8 @@ async function handleExecutePost( } if (shouldUseDraftState) { - const useDispatchForManual = shouldUseBullMQ() && triggerType !== 'manual' - if (useDispatchForManual) { + const shouldDispatchViaQueue = shouldUseBullMQ() && triggerType !== 'manual' + if (shouldDispatchViaQueue) { const metadata: ExecutionMetadata = { requestId, executionId, diff --git a/apps/sim/lib/core/admission/gate.ts b/apps/sim/lib/core/admission/gate.ts index a1dc7e0dce9..0e6b7552df3 100644 --- a/apps/sim/lib/core/admission/gate.ts +++ b/apps/sim/lib/core/admission/gate.ts @@ -15,7 +15,9 @@ export interface AdmissionTicket { /** * Attempts to admit a request through the in-process gate. * Returns a ticket with a release() handle on success, or null if at capacity. - * Zero external calls — purely in-process atomic counter. + * Zero external calls — purely in-process atomic counter. Each pod maintains its + * own counter, so the effective aggregate limit across N pods is N × MAX_INFLIGHT. + * Configure ADMISSION_GATE_MAX_INFLIGHT per pod based on what each pod can sustain. */ export function tryAdmit(): AdmissionTicket | null { if (inflight >= MAX_INFLIGHT) { From 7eab00b9335dc85f38b156203a89ab52f86b6641 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 16 Mar 2026 09:51:56 -0700 Subject: [PATCH 05/12] fix legacy team limits --- apps/sim/lib/billing/workspace-concurrency.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/sim/lib/billing/workspace-concurrency.ts b/apps/sim/lib/billing/workspace-concurrency.ts index e164bdb2ccd..acb07169f91 100644 --- a/apps/sim/lib/billing/workspace-concurrency.ts +++ b/apps/sim/lib/billing/workspace-concurrency.ts @@ -72,7 +72,7 @@ function getPlanConcurrencyLimit(plan: string | null | undefined, metadata: unkn return getEnterpriseConcurrencyLimit(metadata) } - if (plan === 'team') { + if (isTeam(plan)) { return getTeamConcurrencyLimit() } @@ -81,7 +81,7 @@ function getPlanConcurrencyLimit(plan: string | null | undefined, metadata: unkn return getTeamConcurrencyLimit() } - if (isPro(plan) || isTeam(plan)) { + if (isPro(plan)) { return getProConcurrencyLimit() } From d5fbc3c12d177d06cc62cee6c972921276dac249 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 16 Mar 2026 11:27:58 -0700 Subject: [PATCH 06/12] cleanup enterprise typing code --- .../app/api/workflows/[id]/execute/route.ts | 5 ++-- apps/sim/lib/billing/types/index.ts | 5 +++- apps/sim/lib/billing/webhooks/enterprise.ts | 30 ++----------------- 3 files changed, 9 insertions(+), 31 deletions(-) diff --git a/apps/sim/app/api/workflows/[id]/execute/route.ts b/apps/sim/app/api/workflows/[id]/execute/route.ts index 90b5a682d66..71de6ecccf6 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.ts @@ -207,8 +207,9 @@ async function handleAsyncExecution(params: AsyncExecutionParams): Promise v.toLowerCase()) + .pipe(z.literal('enterprise')), // The referenceId must be provided in Stripe metadata to link to the organization // This gets stored in the subscription.referenceId column referenceId: z.string().min(1), diff --git a/apps/sim/lib/billing/webhooks/enterprise.ts b/apps/sim/lib/billing/webhooks/enterprise.ts index c4bc6a19f22..92c3bcf459f 100644 --- a/apps/sim/lib/billing/webhooks/enterprise.ts +++ b/apps/sim/lib/billing/webhooks/enterprise.ts @@ -55,34 +55,8 @@ export async function handleManualEnterpriseSubscription(event: Stripe.Event) { }) throw new Error('Invalid enterprise metadata for subscription') } - const metadataJson: Record = { - ...metadata, - workspaceConcurrencyLimit: - typeof metadata.workspaceConcurrencyLimit === 'string' - ? Number.parseInt(metadata.workspaceConcurrencyLimit, 10) - : metadata.workspaceConcurrencyLimit, - } - - const seats = enterpriseMetadata.seats - const monthlyPrice = enterpriseMetadata.monthlyPrice - if (!seats || seats <= 0 || Number.isNaN(seats)) { - logger.error('[subscription.created] Invalid or missing seats in enterprise metadata', { - subscriptionId: stripeSubscription.id, - seatsRaw: enterpriseMetadata.seats, - seatsParsed: seats, - }) - throw new Error('Enterprise subscription must include valid seats in metadata') - } - - if (!monthlyPrice || monthlyPrice <= 0 || Number.isNaN(monthlyPrice)) { - logger.error('[subscription.created] Invalid or missing monthlyPrice in enterprise metadata', { - subscriptionId: stripeSubscription.id, - monthlyPriceRaw: enterpriseMetadata.monthlyPrice, - monthlyPriceParsed: monthlyPrice, - }) - throw new Error('Enterprise subscription must include valid monthlyPrice in metadata') - } + const { seats, monthlyPrice } = enterpriseMetadata // Get the first subscription item which contains the period information const referenceItem = stripeSubscription.items?.data?.[0] @@ -106,7 +80,7 @@ export async function handleManualEnterpriseSubscription(event: Stripe.Event) { ? new Date(stripeSubscription.trial_start * 1000) : null, trialEnd: stripeSubscription.trial_end ? new Date(stripeSubscription.trial_end * 1000) : null, - metadata: metadataJson, + metadata: metadata as Record, } const existing = await db From 8ee4c5913ed010cb54021a8ccb879c2c19fdd1ab Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 16 Mar 2026 14:58:49 -0700 Subject: [PATCH 07/12] inline child triggers --- apps/sim/app/api/workflows/[id]/execute/route.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/apps/sim/app/api/workflows/[id]/execute/route.ts b/apps/sim/app/api/workflows/[id]/execute/route.ts index 71de6ecccf6..4aad41a70ec 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.ts @@ -118,6 +118,8 @@ const ExecuteWorkflowSchema = z.object({ export const runtime = 'nodejs' export const dynamic = 'force-dynamic' +const INLINE_TRIGGER_TYPES = new Set(['manual', 'workflow']) + function resolveOutputIds( selectedOutputs: string[] | undefined, blocks: Record @@ -789,7 +791,7 @@ async function handleExecutePost( const executionVariables = cachedWorkflowData?.variables ?? workflow.variables ?? {} - if (shouldUseBullMQ() && triggerType !== 'manual') { + if (shouldUseBullMQ() && !INLINE_TRIGGER_TYPES.has(triggerType)) { try { const dispatchJobId = await enqueueDirectWorkflowExecution( { @@ -989,7 +991,7 @@ async function handleExecutePost( } if (shouldUseDraftState) { - const shouldDispatchViaQueue = shouldUseBullMQ() && triggerType !== 'manual' + const shouldDispatchViaQueue = shouldUseBullMQ() && !INLINE_TRIGGER_TYPES.has(triggerType) if (shouldDispatchViaQueue) { const metadata: ExecutionMetadata = { requestId, From 41e1c9cb1f2078fc92be49986ffe5b3d1cd2ad5c Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 16 Mar 2026 15:06:50 -0700 Subject: [PATCH 08/12] fix status check --- apps/sim/lib/core/workspace-dispatch/memory-store.ts | 9 +++++++++ apps/sim/lib/core/workspace-dispatch/redis-store.ts | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/apps/sim/lib/core/workspace-dispatch/memory-store.ts b/apps/sim/lib/core/workspace-dispatch/memory-store.ts index 2f4e0966bbd..a00d025e8a0 100644 --- a/apps/sim/lib/core/workspace-dispatch/memory-store.ts +++ b/apps/sim/lib/core/workspace-dispatch/memory-store.ts @@ -114,6 +114,8 @@ export class MemoryWorkspaceDispatchStorage implements WorkspaceDispatchStorageA return Array.from(this.jobs.values()).filter((record) => statuses.includes(record.status)) } + private static readonly TERMINAL_STATUSES = new Set(['completed', 'failed']) + async updateDispatchJobRecord( jobId: string, updater: (record: WorkspaceDispatchJobRecord) => WorkspaceDispatchJobRecord @@ -124,6 +126,13 @@ export class MemoryWorkspaceDispatchStorage implements WorkspaceDispatchStorageA } const updated = updater(current) + if ( + MemoryWorkspaceDispatchStorage.TERMINAL_STATUSES.has(current.status) && + !MemoryWorkspaceDispatchStorage.TERMINAL_STATUSES.has(updated.status) + ) { + return current + } + this.jobs.set(jobId, updated) return updated } diff --git a/apps/sim/lib/core/workspace-dispatch/redis-store.ts b/apps/sim/lib/core/workspace-dispatch/redis-store.ts index 82ac3202803..9ea1ea4caff 100644 --- a/apps/sim/lib/core/workspace-dispatch/redis-store.ts +++ b/apps/sim/lib/core/workspace-dispatch/redis-store.ts @@ -233,6 +233,8 @@ export class RedisWorkspaceDispatchStorage implements WorkspaceDispatchStorageAd return jobs } + private static readonly TERMINAL_STATUSES = new Set(['completed', 'failed']) + async updateDispatchJobRecord( jobId: string, updater: (record: WorkspaceDispatchJobRecord) => WorkspaceDispatchJobRecord @@ -243,6 +245,13 @@ export class RedisWorkspaceDispatchStorage implements WorkspaceDispatchStorageAd } const updated = updater(current) + if ( + RedisWorkspaceDispatchStorage.TERMINAL_STATUSES.has(current.status) && + !RedisWorkspaceDispatchStorage.TERMINAL_STATUSES.has(updated.status) + ) { + return current + } + await this.saveDispatchJob(updated) return updated } From 7bf9526578846010f4a0df9444ae3809447855db Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 16 Mar 2026 15:30:54 -0700 Subject: [PATCH 09/12] address more comments --- .../workspace-notification-delivery.ts | 12 +++++----- .../core/workspace-dispatch/memory-store.ts | 24 ++++++++++++++++--- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/apps/sim/background/workspace-notification-delivery.ts b/apps/sim/background/workspace-notification-delivery.ts index 230d33dae67..41d08f80cbd 100644 --- a/apps/sim/background/workspace-notification-delivery.ts +++ b/apps/sim/background/workspace-notification-delivery.ts @@ -501,15 +501,15 @@ export type NotificationDeliveryResult = | { status: 'retry'; retryDelayMs: number } async function buildRetryLog(params: NotificationDeliveryParams): Promise { + const conditions = [eq(workflowExecutionLogs.executionId, params.log.executionId)] + if (params.log.workflowId) { + conditions.push(eq(workflowExecutionLogs.workflowId, params.log.workflowId)) + } + const [storedLog] = await db .select() .from(workflowExecutionLogs) - .where( - and( - eq(workflowExecutionLogs.executionId, params.log.executionId), - eq(workflowExecutionLogs.workflowId, params.log.workflowId!) - ) - ) + .where(and(...conditions)) .limit(1) if (storedLog) { diff --git a/apps/sim/lib/core/workspace-dispatch/memory-store.ts b/apps/sim/lib/core/workspace-dispatch/memory-store.ts index a00d025e8a0..0cae504b607 100644 --- a/apps/sim/lib/core/workspace-dispatch/memory-store.ts +++ b/apps/sim/lib/core/workspace-dispatch/memory-store.ts @@ -16,7 +16,6 @@ export class MemoryWorkspaceDispatchStorage implements WorkspaceDispatchStorageA private workspaceOrder: string[] = [] private laneQueues = new Map() private leases = new Map>() - private sequence = 0 private cleanupInterval: NodeJS.Timeout | null = null constructor() { @@ -296,7 +295,20 @@ export class MemoryWorkspaceDispatchStorage implements WorkspaceDispatchStorageA } async popNextWorkspaceId(): Promise { - return this.workspaceOrder.shift() ?? null + const now = Date.now() + const maxScans = this.workspaceOrder.length + for (let i = 0; i < maxScans; i++) { + const id = this.workspaceOrder.shift() + if (!id) return null + const readyAt = this.workspaceReadyAt.get(id) + if (readyAt && readyAt > now) { + this.workspaceOrder.push(id) + continue + } + this.workspaceReadyAt.delete(id) + return id + } + return null } async getQueuedWorkspaceCount(): Promise { @@ -307,7 +319,12 @@ export class MemoryWorkspaceDispatchStorage implements WorkspaceDispatchStorageA return this.workspaceOrder.includes(workspaceId) } - async ensureWorkspaceActive(workspaceId: string): Promise { + private workspaceReadyAt = new Map() + + async ensureWorkspaceActive(workspaceId: string, readyAt?: number): Promise { + if (readyAt && readyAt > Date.now()) { + this.workspaceReadyAt.set(workspaceId, readyAt) + } this.ensureWorkspaceQueued(workspaceId) } @@ -473,6 +490,7 @@ export class MemoryWorkspaceDispatchStorage implements WorkspaceDispatchStorageA this.workspaceOrder = [] this.laneQueues.clear() this.leases.clear() + this.workspaceReadyAt.clear() } dispose(): void { From 9a6886c39b991f8183375355d8fb4dbd3f070f34 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 16 Mar 2026 15:52:52 -0700 Subject: [PATCH 10/12] optimize reconciler scan --- .../lib/core/workspace-dispatch/adapter.ts | 2 +- .../core/workspace-dispatch/memory-store.ts | 2 +- .../lib/core/workspace-dispatch/reconciler.ts | 38 +++++++++++++++++-- .../core/workspace-dispatch/redis-store.ts | 10 +---- apps/sim/lib/core/workspace-dispatch/store.ts | 4 +- 5 files changed, 40 insertions(+), 16 deletions(-) diff --git a/apps/sim/lib/core/workspace-dispatch/adapter.ts b/apps/sim/lib/core/workspace-dispatch/adapter.ts index 637688d1117..6d55a70ff3c 100644 --- a/apps/sim/lib/core/workspace-dispatch/adapter.ts +++ b/apps/sim/lib/core/workspace-dispatch/adapter.ts @@ -34,7 +34,7 @@ export interface WorkspaceDispatchStorageAdapter { lanes: readonly WorkspaceDispatchLane[] ): Promise getGlobalQueueDepth(): Promise - reconcileGlobalQueueDepth(): Promise + reconcileGlobalQueueDepth(knownCount: number): Promise popNextWorkspaceId(): Promise getQueuedWorkspaceCount(): Promise hasActiveWorkspace(workspaceId: string): Promise diff --git a/apps/sim/lib/core/workspace-dispatch/memory-store.ts b/apps/sim/lib/core/workspace-dispatch/memory-store.ts index 0cae504b607..1c874d091be 100644 --- a/apps/sim/lib/core/workspace-dispatch/memory-store.ts +++ b/apps/sim/lib/core/workspace-dispatch/memory-store.ts @@ -290,7 +290,7 @@ export class MemoryWorkspaceDispatchStorage implements WorkspaceDispatchStorageA return count } - async reconcileGlobalQueueDepth(): Promise { + async reconcileGlobalQueueDepth(_knownCount: number): Promise { // no-op: memory store computes depth on the fly } diff --git a/apps/sim/lib/core/workspace-dispatch/reconciler.ts b/apps/sim/lib/core/workspace-dispatch/reconciler.ts index af67edb522a..739d75533b1 100644 --- a/apps/sim/lib/core/workspace-dispatch/reconciler.ts +++ b/apps/sim/lib/core/workspace-dispatch/reconciler.ts @@ -153,9 +153,39 @@ async function reconcileWaitingWorkspaceTracking( } export async function reconcileWorkspaceDispatchState(): Promise { - const activeJobs = await listDispatchJobsByStatuses(['admitting', 'admitted', 'running']) - const waitingJobs = await listDispatchJobsByStatuses(['waiting']) - const terminalJobs = await listDispatchJobsByStatuses(['completed', 'failed']) + const allJobs = await listDispatchJobsByStatuses([ + 'waiting', + 'admitting', + 'admitted', + 'running', + 'completed', + 'failed', + ]) + + const activeJobs: WorkspaceDispatchJobRecord[] = [] + const waitingJobs: WorkspaceDispatchJobRecord[] = [] + const terminalJobs: WorkspaceDispatchJobRecord[] = [] + let nonTerminalCount = 0 + + for (const job of allJobs) { + switch (job.status) { + case 'admitting': + case 'admitted': + case 'running': + activeJobs.push(job) + nonTerminalCount++ + break + case 'waiting': + waitingJobs.push(job) + nonTerminalCount++ + break + case 'completed': + case 'failed': + terminalJobs.push(job) + break + } + } + let changed = false for (const record of activeJobs) { @@ -181,7 +211,7 @@ export async function reconcileWorkspaceDispatchState(): Promise { } } - await reconcileGlobalQueueDepth().catch((error) => { + await reconcileGlobalQueueDepth(nonTerminalCount).catch((error) => { logger.error('Failed to reconcile global queue depth', { error }) }) diff --git a/apps/sim/lib/core/workspace-dispatch/redis-store.ts b/apps/sim/lib/core/workspace-dispatch/redis-store.ts index 9ea1ea4caff..8fbf8dfee4f 100644 --- a/apps/sim/lib/core/workspace-dispatch/redis-store.ts +++ b/apps/sim/lib/core/workspace-dispatch/redis-store.ts @@ -363,14 +363,8 @@ export class RedisWorkspaceDispatchStorage implements WorkspaceDispatchStorageAd return count ? Math.max(0, Number.parseInt(count, 10)) : 0 } - async reconcileGlobalQueueDepth(): Promise { - const allJobs = await this.listDispatchJobsByStatuses([ - 'waiting', - 'admitting', - 'admitted', - 'running', - ]) - await this.redis.set(GLOBAL_DEPTH_KEY, allJobs.length) + async reconcileGlobalQueueDepth(knownCount: number): Promise { + await this.redis.set(GLOBAL_DEPTH_KEY, knownCount) } async popNextWorkspaceId(): Promise { diff --git a/apps/sim/lib/core/workspace-dispatch/store.ts b/apps/sim/lib/core/workspace-dispatch/store.ts index 9bc7f0bebe9..86c1c3951a9 100644 --- a/apps/sim/lib/core/workspace-dispatch/store.ts +++ b/apps/sim/lib/core/workspace-dispatch/store.ts @@ -77,8 +77,8 @@ export async function getGlobalQueueDepth(): Promise { return getAdapter().getGlobalQueueDepth() } -export async function reconcileGlobalQueueDepth(): Promise { - return getAdapter().reconcileGlobalQueueDepth() +export async function reconcileGlobalQueueDepth(knownCount: number): Promise { + return getAdapter().reconcileGlobalQueueDepth(knownCount) } export async function popNextWorkspaceId(): Promise { From 2bf1feb7b86eb782607192c9e9653a7071832fd2 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 16 Mar 2026 16:40:21 -0700 Subject: [PATCH 11/12] remove dead code --- .../app/api/workflows/[id]/execute/route.ts | 3 +- .../sim/lib/core/async-jobs/backends/index.ts | 1 - .../core/async-jobs/backends/redis.test.ts | 176 ------------------ .../sim/lib/core/async-jobs/backends/redis.ts | 146 --------------- apps/sim/lib/core/async-jobs/types.ts | 2 +- 5 files changed, 3 insertions(+), 325 deletions(-) delete mode 100644 apps/sim/lib/core/async-jobs/backends/redis.test.ts delete mode 100644 apps/sim/lib/core/async-jobs/backends/redis.ts diff --git a/apps/sim/app/api/workflows/[id]/execute/route.ts b/apps/sim/app/api/workflows/[id]/execute/route.ts index 4aad41a70ec..0a277f16e79 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.ts @@ -329,7 +329,8 @@ async function enqueueDirectWorkflowExecution( * Supports both SSE streaming (for interactive/manual runs) and direct JSON responses (for background jobs). */ export async function POST(req: NextRequest, { params }: { params: Promise<{ id: string }> }) { - if (!hasExternalApiCredentials(req.headers)) { + const isSessionRequest = req.headers.has('cookie') && !hasExternalApiCredentials(req.headers) + if (isSessionRequest) { return handleExecutePost(req, params) } diff --git a/apps/sim/lib/core/async-jobs/backends/index.ts b/apps/sim/lib/core/async-jobs/backends/index.ts index ef84a232233..0abb55d6af5 100644 --- a/apps/sim/lib/core/async-jobs/backends/index.ts +++ b/apps/sim/lib/core/async-jobs/backends/index.ts @@ -1,4 +1,3 @@ export { BullMQJobQueue } from './bullmq' export { DatabaseJobQueue } from './database' -export { RedisJobQueue } from './redis' export { TriggerDevJobQueue } from './trigger-dev' diff --git a/apps/sim/lib/core/async-jobs/backends/redis.test.ts b/apps/sim/lib/core/async-jobs/backends/redis.test.ts deleted file mode 100644 index ea0a5df6078..00000000000 --- a/apps/sim/lib/core/async-jobs/backends/redis.test.ts +++ /dev/null @@ -1,176 +0,0 @@ -/** - * @vitest-environment node - */ -import { createMockRedis, loggerMock, type MockRedis } from '@sim/testing' -import { beforeEach, describe, expect, it, vi } from 'vitest' - -vi.mock('@sim/logger', () => loggerMock) - -import { - JOB_MAX_LIFETIME_SECONDS, - JOB_RETENTION_SECONDS, - JOB_STATUS, -} from '@/lib/core/async-jobs/types' -import { RedisJobQueue } from './redis' - -describe('RedisJobQueue', () => { - let mockRedis: MockRedis - let queue: RedisJobQueue - - beforeEach(() => { - vi.clearAllMocks() - mockRedis = createMockRedis() - queue = new RedisJobQueue(mockRedis as never) - }) - - describe('enqueue', () => { - it.concurrent('should create a job with pending status', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - - const jobId = await localQueue.enqueue('workflow-execution', { test: 'data' }) - - expect(jobId).toMatch(/^run_/) - expect(localRedis.hset).toHaveBeenCalledTimes(1) - - const [key, data] = localRedis.hset.mock.calls[0] - expect(key).toBe(`async-jobs:job:${jobId}`) - expect(data.status).toBe(JOB_STATUS.PENDING) - expect(data.type).toBe('workflow-execution') - }) - - it.concurrent('should set max lifetime TTL on enqueue', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - - const jobId = await localQueue.enqueue('workflow-execution', { test: 'data' }) - - expect(localRedis.expire).toHaveBeenCalledWith( - `async-jobs:job:${jobId}`, - JOB_MAX_LIFETIME_SECONDS - ) - }) - }) - - describe('completeJob', () => { - it.concurrent('should set status to completed and set TTL', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - const jobId = 'run_test123' - - await localQueue.completeJob(jobId, { result: 'success' }) - - expect(localRedis.hset).toHaveBeenCalledWith(`async-jobs:job:${jobId}`, { - status: JOB_STATUS.COMPLETED, - completedAt: expect.any(String), - output: JSON.stringify({ result: 'success' }), - updatedAt: expect.any(String), - }) - expect(localRedis.expire).toHaveBeenCalledWith( - `async-jobs:job:${jobId}`, - JOB_RETENTION_SECONDS - ) - }) - - it.concurrent('should set TTL to 24 hours (86400 seconds)', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - - await localQueue.completeJob('run_test123', {}) - - expect(localRedis.expire).toHaveBeenCalledWith(expect.any(String), 86400) - }) - }) - - describe('markJobFailed', () => { - it.concurrent('should set status to failed and set TTL', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - const jobId = 'run_test456' - const error = 'Something went wrong' - - await localQueue.markJobFailed(jobId, error) - - expect(localRedis.hset).toHaveBeenCalledWith(`async-jobs:job:${jobId}`, { - status: JOB_STATUS.FAILED, - completedAt: expect.any(String), - error, - updatedAt: expect.any(String), - }) - expect(localRedis.expire).toHaveBeenCalledWith( - `async-jobs:job:${jobId}`, - JOB_RETENTION_SECONDS - ) - }) - - it.concurrent('should set TTL to 24 hours (86400 seconds)', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - - await localQueue.markJobFailed('run_test456', 'error') - - expect(localRedis.expire).toHaveBeenCalledWith(expect.any(String), 86400) - }) - }) - - describe('startJob', () => { - it.concurrent('should not set TTL when starting a job', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - - await localQueue.startJob('run_test789') - - expect(localRedis.hset).toHaveBeenCalled() - expect(localRedis.expire).not.toHaveBeenCalled() - }) - }) - - describe('getJob', () => { - it.concurrent('should return null for non-existent job', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - localRedis.hgetall.mockResolvedValue({}) - - const job = await localQueue.getJob('run_nonexistent') - - expect(job).toBeNull() - }) - - it.concurrent('should deserialize job data correctly', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - const now = new Date() - localRedis.hgetall.mockResolvedValue({ - id: 'run_test', - type: 'workflow-execution', - payload: JSON.stringify({ foo: 'bar' }), - status: JOB_STATUS.COMPLETED, - createdAt: now.toISOString(), - startedAt: now.toISOString(), - completedAt: now.toISOString(), - attempts: '1', - maxAttempts: '3', - error: '', - output: JSON.stringify({ result: 'ok' }), - metadata: JSON.stringify({ workflowId: 'wf_123' }), - }) - - const job = await localQueue.getJob('run_test') - - expect(job).not.toBeNull() - expect(job?.id).toBe('run_test') - expect(job?.type).toBe('workflow-execution') - expect(job?.payload).toEqual({ foo: 'bar' }) - expect(job?.status).toBe(JOB_STATUS.COMPLETED) - expect(job?.output).toEqual({ result: 'ok' }) - expect(job?.metadata.workflowId).toBe('wf_123') - }) - }) -}) - -describe('JOB_RETENTION_SECONDS', () => { - it.concurrent('should be 24 hours in seconds', async () => { - expect(JOB_RETENTION_SECONDS).toBe(24 * 60 * 60) - expect(JOB_RETENTION_SECONDS).toBe(86400) - }) -}) diff --git a/apps/sim/lib/core/async-jobs/backends/redis.ts b/apps/sim/lib/core/async-jobs/backends/redis.ts deleted file mode 100644 index 6a361d0e9c0..00000000000 --- a/apps/sim/lib/core/async-jobs/backends/redis.ts +++ /dev/null @@ -1,146 +0,0 @@ -import { createLogger } from '@sim/logger' -import type Redis from 'ioredis' -import { - type EnqueueOptions, - JOB_MAX_LIFETIME_SECONDS, - JOB_RETENTION_SECONDS, - JOB_STATUS, - type Job, - type JobMetadata, - type JobQueueBackend, - type JobStatus, - type JobType, -} from '@/lib/core/async-jobs/types' - -const logger = createLogger('RedisJobQueue') - -const KEYS = { - job: (id: string) => `async-jobs:job:${id}`, -} as const - -function serializeJob(job: Job): Record { - return { - id: job.id, - type: job.type, - payload: JSON.stringify(job.payload), - status: job.status, - createdAt: job.createdAt.toISOString(), - startedAt: job.startedAt?.toISOString() ?? '', - completedAt: job.completedAt?.toISOString() ?? '', - attempts: job.attempts.toString(), - maxAttempts: job.maxAttempts.toString(), - error: job.error ?? '', - output: job.output !== undefined ? JSON.stringify(job.output) : '', - metadata: JSON.stringify(job.metadata), - updatedAt: new Date().toISOString(), - } -} - -function deserializeJob(data: Record): Job | null { - if (!data || !data.id) return null - - try { - return { - id: data.id, - type: data.type as JobType, - payload: JSON.parse(data.payload), - status: data.status as JobStatus, - createdAt: new Date(data.createdAt), - startedAt: data.startedAt ? new Date(data.startedAt) : undefined, - completedAt: data.completedAt ? new Date(data.completedAt) : undefined, - attempts: Number.parseInt(data.attempts, 10), - maxAttempts: Number.parseInt(data.maxAttempts, 10), - error: data.error || undefined, - output: data.output ? JSON.parse(data.output) : undefined, - metadata: JSON.parse(data.metadata) as JobMetadata, - } - } catch (error) { - logger.error('Failed to deserialize job', { error, data }) - return null - } -} - -export class RedisJobQueue implements JobQueueBackend { - private redis: Redis - - constructor(redis: Redis) { - this.redis = redis - } - - async enqueue( - type: JobType, - payload: TPayload, - options?: EnqueueOptions - ): Promise { - const jobId = `run_${crypto.randomUUID().replace(/-/g, '').slice(0, 20)}` - const now = new Date() - - const job: Job = { - id: jobId, - type, - payload, - status: JOB_STATUS.PENDING, - createdAt: now, - attempts: 0, - maxAttempts: options?.maxAttempts ?? 3, - metadata: options?.metadata ?? {}, - } - - const key = KEYS.job(jobId) - const serialized = serializeJob(job as Job) - await this.redis.hset(key, serialized) - await this.redis.expire(key, JOB_MAX_LIFETIME_SECONDS) - - logger.debug('Enqueued job', { jobId, type }) - return jobId - } - - async getJob(jobId: string): Promise { - const data = await this.redis.hgetall(KEYS.job(jobId)) - return deserializeJob(data) - } - - async startJob(jobId: string): Promise { - const now = new Date() - const key = KEYS.job(jobId) - - await this.redis.hset(key, { - status: JOB_STATUS.PROCESSING, - startedAt: now.toISOString(), - updatedAt: now.toISOString(), - }) - await this.redis.hincrby(key, 'attempts', 1) - - logger.debug('Started job', { jobId }) - } - - async completeJob(jobId: string, output: unknown): Promise { - const now = new Date() - const key = KEYS.job(jobId) - - await this.redis.hset(key, { - status: JOB_STATUS.COMPLETED, - completedAt: now.toISOString(), - output: JSON.stringify(output), - updatedAt: now.toISOString(), - }) - await this.redis.expire(key, JOB_RETENTION_SECONDS) - - logger.debug('Completed job', { jobId }) - } - - async markJobFailed(jobId: string, error: string): Promise { - const now = new Date() - const key = KEYS.job(jobId) - - await this.redis.hset(key, { - status: JOB_STATUS.FAILED, - completedAt: now.toISOString(), - error, - updatedAt: now.toISOString(), - }) - await this.redis.expire(key, JOB_RETENTION_SECONDS) - - logger.debug('Marked job as failed', { jobId }) - } -} diff --git a/apps/sim/lib/core/async-jobs/types.ts b/apps/sim/lib/core/async-jobs/types.ts index a2ccf1d680c..c4bdc27c5ef 100644 --- a/apps/sim/lib/core/async-jobs/types.ts +++ b/apps/sim/lib/core/async-jobs/types.ts @@ -99,4 +99,4 @@ export interface JobQueueBackend { markJobFailed(jobId: string, error: string): Promise } -export type AsyncBackendType = 'trigger-dev' | 'bullmq' | 'redis' | 'database' +export type AsyncBackendType = 'trigger-dev' | 'bullmq' | 'database' From 9612066eca5d7236b8744b242c549659d169ff0b Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 23 Mar 2026 18:17:38 -0700 Subject: [PATCH 12/12] add to landing page --- apps/docs/content/docs/en/execution/costs.mdx | 11 +++++++++++ apps/sim/app/(home)/components/pricing/pricing.tsx | 4 ++++ 2 files changed, 15 insertions(+) diff --git a/apps/docs/content/docs/en/execution/costs.mdx b/apps/docs/content/docs/en/execution/costs.mdx index 25f4cc05adf..9f7af19f3d0 100644 --- a/apps/docs/content/docs/en/execution/costs.mdx +++ b/apps/docs/content/docs/en/execution/costs.mdx @@ -195,6 +195,17 @@ By default, your usage is capped at the credits included in your plan. To allow Max (individual) shares the same rate limits as team plans. Team plans (Pro or Max for Teams) use the Max-tier rate limits. +### Concurrent Execution Limits + +| Plan | Concurrent Executions | +|------|----------------------| +| **Free** | 5 | +| **Pro** | 50 | +| **Max / Team** | 200 | +| **Enterprise** | 200 (customizable) | + +Concurrent execution limits control how many workflow executions can run simultaneously within a workspace. When the limit is reached, new executions are queued and admitted as running executions complete. Manual runs from the editor are not subject to these limits. + ### File Storage | Plan | Storage | diff --git a/apps/sim/app/(home)/components/pricing/pricing.tsx b/apps/sim/app/(home)/components/pricing/pricing.tsx index 77c4b9b46f0..7004f924e8a 100644 --- a/apps/sim/app/(home)/components/pricing/pricing.tsx +++ b/apps/sim/app/(home)/components/pricing/pricing.tsx @@ -24,6 +24,7 @@ const PRICING_TIERS: PricingTier[] = [ '5GB file storage', '3 tables · 1,000 rows each', '5 min execution limit', + '5 concurrent/workspace', '7-day log retention', 'CLI/SDK/MCP Access', ], @@ -41,6 +42,7 @@ const PRICING_TIERS: PricingTier[] = [ '50GB file storage', '25 tables · 5,000 rows each', '50 min execution · 150 runs/min', + '50 concurrent/workspace', 'Unlimited log retention', 'CLI/SDK/MCP Access', ], @@ -58,6 +60,7 @@ const PRICING_TIERS: PricingTier[] = [ '500GB file storage', '25 tables · 5,000 rows each', '50 min execution · 300 runs/min', + '200 concurrent/workspace', 'Unlimited log retention', 'CLI/SDK/MCP Access', ], @@ -74,6 +77,7 @@ const PRICING_TIERS: PricingTier[] = [ 'Custom file storage', '10,000 tables · 1M rows each', 'Custom execution limits', + 'Custom concurrency limits', 'Unlimited log retention', 'SSO & SCIM · SOC2 & HIPAA', 'Self hosting · Dedicated support',