Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 15 additions & 11 deletions src/platform/networking/common/networking.ts
Original file line number Diff line number Diff line change
Expand Up @@ -435,19 +435,23 @@ function networkRequest(
}

export function canRetryOnceNetworkError(reason: any) {
Copy link

Copilot AI Mar 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The retry behavior in canRetryOnceNetworkError changed (now supports matching reason.message), but there are no unit tests covering the accepted/rejected cases (e.g. matching by reason.code vs reason.message, and the ERR_* vs net::ERR_* forms). Adding focused tests for this helper would help prevent regressions in networkRequest retry behavior.

Suggested change
export function canRetryOnceNetworkError(reason: any) {
/**
* Returns true if the given error describes a network condition where we should
* attempt a single retry of the request.
*
* This helper supports both:
* - Node.js / undici error codes (via `reason.code`, e.g. "ECONNRESET", "ETIMEDOUT", "ERR_HTTP2_GOAWAY_SESSION")
* - Electron-style error messages (via `reason.message`, e.g. "net::ERR_CONNECTION_RESET")
*
* When adding or changing the entries in {@link retryableNetworkErrors}, unit tests
* should verify the accepted and rejected cases for both `reason.code` and
* `reason.message`, including the `ERR_*` vs `net::ERR_*` forms.
*/
export function canRetryOnceNetworkError(reason: { code?: string; message?: string } | null | undefined) {

Copilot uses AI. Check for mistakes.
return [
'ECONNRESET',
'ETIMEDOUT',
'ERR_CONNECTION_RESET',
'ERR_NETWORK_CHANGED',
'ERR_HTTP2_INVALID_SESSION',
'ERR_HTTP2_STREAM_CANCEL',
'ERR_HTTP2_GOAWAY_SESSION',
'ERR_HTTP2_PROTOCOL_ERROR',
'ERR_FAILED',
].includes(reason?.code);
return retryableNetworkErrors.has(reason?.code) || retryableNetworkErrors.has(reason?.message);
}

const retryableNetworkErrors = new Set([
// Node.js/undici error codes
'ECONNRESET',
'ETIMEDOUT',
'ERR_HTTP2_GOAWAY_SESSION',
'ERR_HTTP2_INVALID_SESSION',
'ERR_HTTP2_STREAM_CANCEL',
'ERR_HTTP2_PROTOCOL_ERROR',
// Electron error messages
'net::ERR_CONNECTION_RESET',
'net::ERR_NETWORK_CHANGED',
'net::ERR_FAILED',
]);
Comment thread
deepak1556 marked this conversation as resolved.

export function postRequest(
accessor: ServicesAccessor,
options: Omit<INetworkRequestOptions, 'requestType'>,
Expand Down
92 changes: 66 additions & 26 deletions src/platform/networking/node/fetcherFallback.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,36 +95,16 @@ export async function fetchWithFallbacks(availableFetchers: readonly IFetcher[],
try {
return { response: await fetcher.fetch(url, options) };
} catch (err) {
// Transient electron errors: retry once on the same fetcher with a fresh connection.
if (isRetryableElectronError((err as Error)?.message)) {
return { response: await retryAfterDisconnect(fetcher, url, options, err, (err as Error).message, logService, telemetryService) };
}

// For net::ERR_FAILED from network process crash, disconnect and retry once.
if (fetcher.isNetworkProcessCrashedError(err)) {
const fetcherId = fetcher.getUserAgentLibrary();
logService.info(`FetcherService: ${fetcherId} hit network process crash error (${(err as Error)?.message}), retrying after disconnect...`);
try {
await fetcher.disconnectAll();
const response = await fetcher.fetch(url, options);
logService.info(`FetcherService: ${fetcherId} retry after crash succeeded.`);
/* __GDPR__
"fetcherCrashRetry" : {
"owner": "deepak1556",
"comment": "Sent when a fetcher retries after a network process crash error",
"fetcher": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The fetcher that crashed" },
"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the retry recovered or failed" },
"error": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The error message" }
}
*/
telemetryService?.sendTelemetryEvent('fetcherCrashRetry', { github: true, microsoft: true }, {
fetcher: fetcherId,
outcome: 'recovered',
error: collectSingleLineErrorMessage(err, true),
});
return { response };
return { response: await retryAfterDisconnect(fetcher, url, options, err, 'network-process-crash', logService, telemetryService) };
} catch (retryErr) {
logService.info(`FetcherService: ${fetcherId} retry also failed (${(retryErr as Error)?.message}), checking for demotion...`);
telemetryService?.sendTelemetryEvent('fetcherCrashRetry', { github: true, microsoft: true }, {
fetcher: fetcherId,
outcome: 'failed',
error: collectSingleLineErrorMessage(retryErr, true),
});
err = retryErr;
}
}
Expand Down Expand Up @@ -153,6 +133,66 @@ export async function fetchWithFallbacks(availableFetchers: readonly IFetcher[],
}
}

const retryableElectronErrors = [
'net::ERR_NETWORK_CHANGED',
'net::ERR_CONNECTION_RESET',
];

function isRetryableElectronError(message: string | undefined): boolean {
if (!message) {
return false;
}
return retryableElectronErrors.some(token => message.includes(token));
}

/**
* Disconnect all connections on the fetcher and retry the request once.
* Returns the response on success, throws on failure.
*/
async function retryAfterDisconnect(
fetcher: IFetcher,
url: string,
options: FetchOptions,
originalErr: unknown,
reason: string,
logService: ILogService,
telemetryService: ITelemetryService | undefined,
): Promise<Response> {
const fetcherId = fetcher.getUserAgentLibrary();
logService.info(`FetcherService: ${fetcherId} hit ${reason}, retrying after disconnect...`);
try {
await fetcher.disconnectAll();
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We disconnectAll for all 3 error types we retry here? Could this make requests step on each other as some are being retried while later ones come here to first disconnect all?

const response = await fetcher.fetch(url, options);
logService.info(`FetcherService: ${fetcherId} retry after ${reason} succeeded.`);
/* __GDPR__
Comment thread
deepak1556 marked this conversation as resolved.
"fetcherTransientRetry" : {
"owner": "AurSol",
"comment": "Sent when a fetcher retries after a transient error",
"fetcher": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The fetcher that retried" },
"reason": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The type of error that triggered the retry" },
"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the retry recovered or failed" },
"error": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The error message" }
}
*/
telemetryService?.sendTelemetryEvent('fetcherTransientRetry', { github: true, microsoft: true }, {
Comment thread
deepak1556 marked this conversation as resolved.
fetcher: fetcherId,
reason,
outcome: 'recovered',
error: collectSingleLineErrorMessage(originalErr, true),
});
return response;
} catch (retryErr) {
logService.info(`FetcherService: ${fetcherId} retry after ${reason} also failed (${(retryErr as Error)?.message}).`);
telemetryService?.sendTelemetryEvent('fetcherTransientRetry', { github: true, microsoft: true }, {
fetcher: fetcherId,
reason,
outcome: 'failed',
error: collectSingleLineErrorMessage(retryErr, true),
});
throw retryErr;
}
}

async function tryFetch(fetcher: IFetcher, url: string, options: FetchOptions, logService: ILogService): Promise<{ ok: boolean; response: Response } | { ok: false; err: any }> {
try {
const response = await fetcher.fetch(url, options);
Expand Down
51 changes: 46 additions & 5 deletions src/platform/networking/node/nodeFetchFetcher.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@

import * as stream from 'stream';
import * as undici from 'undici';
import { Lazy } from '../../../util/vs/base/common/lazy';
import { IEnvService } from '../../env/common/envService';
import { HeadersImpl, IHeaders, ReportFetchEvent, WebSocketConnection, WebSocketConnectOptions } from '../common/fetcherService';
import { FetchOptions, HeadersImpl, IHeaders, ReportFetchEvent, Response, WebSocketConnection, WebSocketConnectOptions } from '../common/fetcherService';
import { BaseFetchFetcher } from './baseFetchFetcher';

export class NodeFetchFetcher extends BaseFetchFetcher {
Expand All @@ -26,6 +25,26 @@ export class NodeFetchFetcher extends BaseFetchFetcher {
return NodeFetchFetcher.ID;
}

override async fetch(url: string, options: FetchOptions): Promise<Response> {
try {
return await super.fetch(url, options);
} catch (e) {
if (isGracefulGoawayError(e)) {
await this.disconnectAll();
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to disconnect all? nodejs/undici#3140 seems to suggest that this is done automatically. Does still doing this here risk cancelling newly issued requests? (E.g. when several requests of a session get the same GOAWAY, could they step on each other here?)

return await super.fetch(url, options);
}
throw e;
}
}

Comment thread
deepak1556 marked this conversation as resolved.
override async disconnectAll(): Promise<void> {
const currentAgent = agent;
if (currentAgent) {
agent = undefined;
await currentAgent.close();
}
}
Comment thread
deepak1556 marked this conversation as resolved.

isInternetDisconnectedError(_e: any): boolean {
return false;
}
Expand All @@ -35,15 +54,37 @@ export class NodeFetchFetcher extends BaseFetchFetcher {
}
}

/**
* Detects a graceful HTTP/2 GOAWAY error (error code 0 = NO_ERROR).
* These occur during server-side connection draining
* and are safe to retry on a new connection.
*/
export function isGracefulGoawayError(e: any): boolean {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems GOAWAY is on the session level and comes with a Last-Stream-ID that indicates which requests might still have been processed. Only those with later ids can be retried safely if I understand correctly. Undici doesn't seem to handle or surface Last-Stream-ID currently.

const message = String(e?.message || '');
const causeMessage = String(e?.cause?.message || '');
const combined = message + ' ' + causeMessage;
if (combined.includes('GOAWAY') && combined.includes('code 0')) {
return true;
}
return false;
}

function getFetch(): typeof globalThis.fetch {
const fetch = (globalThis as any).__vscodePatchedFetch || globalThis.fetch;
return function (input: string | URL | globalThis.Request, init?: RequestInit) {
return fetch(input, { dispatcher: agent.value, ...init });
return fetch(input, { dispatcher: getOrCreateAgent(), ...init });
};
}

// Cache agent to reuse connections.
const agent = new Lazy(() => new undici.Agent({ allowH2: true }));
function getOrCreateAgent(): undici.Agent {
if (!agent) {
agent = new undici.Agent({ allowH2: true });
}
return agent;
}

// Mutable agent reference — recreated on disconnectAll() to ensure fresh connections.
let agent: undici.Agent | undefined;

export function createWebSocket(url: string, options?: WebSocketConnectOptions): WebSocketConnection {
const wsAgent = new undici.Agent();
Expand Down
45 changes: 45 additions & 0 deletions src/platform/networking/node/test/nodeFetchGoaway.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/

import { describe, expect, it } from 'vitest';
import { isGracefulGoawayError } from '../nodeFetchFetcher';

describe('isGracefulGoawayError', () => {
it('detects GOAWAY with code 0 in message', () => {
const err = new Error('"GOAWAY" frame received with code 0');
expect(isGracefulGoawayError(err)).toBe(true);
});

it('detects GOAWAY with code 0 in cause message', () => {
const cause = new Error('"GOAWAY" frame received with code 0');
const err = new Error('fetch failed', { cause });
expect(isGracefulGoawayError(err)).toBe(true);
});

it('does not match GOAWAY with non-zero error codes', () => {
const err = new Error('"GOAWAY" frame received with code 2');
expect(isGracefulGoawayError(err)).toBe(false);
});

it('does not match non-GOAWAY errors', () => {
const err = new Error('ECONNRESET');
expect(isGracefulGoawayError(err)).toBe(false);
});

it('does not match null/undefined', () => {
expect(isGracefulGoawayError(null)).toBe(false);
expect(isGracefulGoawayError(undefined)).toBe(false);
});

it('does not match error with code 0 but no GOAWAY', () => {
const err = new Error('connection reset with code 0');
expect(isGracefulGoawayError(err)).toBe(false);
});

it('handles HTTP/2 prefixed GOAWAY message', () => {
const err = new Error('HTTP/2: "GOAWAY" frame received with code 0');
expect(isGracefulGoawayError(err)).toBe(true);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -225,4 +225,67 @@ describe('FetcherService network process crash handling', () => {
expect(service.getUserAgentLibrary()).toBe('electron-fetch');
});
});

describe('ERR_NETWORK_CHANGED retry on same fetcher', () => {
function createNetworkChangedError(): Error {
return new Error('net::ERR_NETWORK_CHANGED');
}

it('retries once on the same fetcher and succeeds', async () => {
const networkChangedError = createNetworkChangedError();
const electronFetcher = createMockFetcher('electron-fetch', {
responses: [networkChangedError, createOkResponse()], // initial fails, retry succeeds
isFetcherError: (e) => e?.message?.startsWith('net::'),
});
const nodeFetcher = createMockFetcher('node-fetch', {
responses: [createOkResponse()],
});

const service = createFetcherService([electronFetcher, nodeFetcher]);

const response = await service.fetch('https://example.com', { callSite: 'test' });
expect(response.status).toBe(200);

// electron-fetch should still be the primary fetcher
expect(service.getUserAgentLibrary()).toBe('electron-fetch');
});

it('throws the retry error if the retry also fails', async () => {
const networkChangedError = createNetworkChangedError();
const retryError = new Error('net::ERR_CONNECTION_REFUSED');
const electronFetcher = createMockFetcher('electron-fetch', {
responses: [networkChangedError, retryError], // initial + retry both fail
isFetcherError: (e) => e?.message?.startsWith('net::'),
});
const nodeFetcher = createMockFetcher('node-fetch', {
responses: [createOkResponse()],
});

const service = createFetcherService([electronFetcher, nodeFetcher]);

// Should throw the retry error, not the original
await expect(service.fetch('https://example.com', { callSite: 'test' })).rejects.toThrow('net::ERR_CONNECTION_REFUSED');

// electron-fetch should still be the primary fetcher (no demotion for network change)
expect(service.getUserAgentLibrary()).toBe('electron-fetch');
});

it('does not interfere with crash retry logic', async () => {
// Non-ERR_NETWORK_CHANGED errors should still go through crash retry path
const crashError = createCrashError();
const electronFetcher = createMockFetcher('electron-fetch', {
responses: [crashError, createOkResponse()],
isNetworkProcessCrashedError: (e) => e === crashError,
isFetcherError: (e) => e?.message?.startsWith('net::'),
});
const nodeFetcher = createMockFetcher('node-fetch', {
responses: [createOkResponse()],
});

const service = createFetcherService([electronFetcher, nodeFetcher]);

const response = await service.fetch('https://example.com', { callSite: 'test' });
expect(response.status).toBe(200);
});
});
});
Loading