diff --git a/.changeset/bright-insects-peel.md b/.changeset/bright-insects-peel.md new file mode 100644 index 000000000000..69a1213cbfbc --- /dev/null +++ b/.changeset/bright-insects-peel.md @@ -0,0 +1,5 @@ +--- +'astro': patch +--- + +Harden the limits on the number of decoding on the URL. diff --git a/packages/astro/src/core/fetch/fetch-state.ts b/packages/astro/src/core/fetch/fetch-state.ts index 816dc94758f3..b0dc5b79163d 100644 --- a/packages/astro/src/core/fetch/fetch-state.ts +++ b/packages/astro/src/core/fetch/fetch-state.ts @@ -35,7 +35,7 @@ import { getParams, getProps } from '../render/index.js'; import { Rewrites } from '../rewrites/handler.js'; import { isRoute404or500, isRouteServerIsland } from '../routing/match.js'; import { normalizeUrl } from '../util/normalized-url.js'; -import { validateAndDecodePathname } from '../util/pathname.js'; +import { MultiLevelEncodingError, validateAndDecodePathname } from '../util/pathname.js'; import { getOriginPathname, setOriginPathname } from '../routing/rewrite.js'; import { computePathnameFromDomain } from '../i18n/domain.js'; import { getCustom404Route, routeHasHtmlExtension } from '../routing/helpers.js'; @@ -170,6 +170,12 @@ export class FetchState implements AstroFetchState { status = 200; /** Whether user middleware should be skipped for this request. */ skipMiddleware = false; + /** + * Set to `true` when the request path was encoded too many times to fully + * decode (see {@link validateAndDecodePathname}). These requests are + * rejected with a `400` before middleware or routing run. + */ + invalidEncoding = false; /** A flag that tells the render content if the rewriting was triggered. */ isRewriting = false; /** A safety net in case of loops (rewrite counter). */ @@ -915,6 +921,13 @@ export class FetchState implements AstroFetchState { try { return validateAndDecodePathname(pathname); } catch (e: any) { + // The path was encoded too many times to fully decode. Mark it so + // the handler can reject the request with a 400 before middleware + // or routing run, instead of working with a half-decoded path. + if (e instanceof MultiLevelEncodingError) { + this.invalidEncoding = true; + return pathname; + } this.pipeline.logger.error(null, e.toString()); return pathname; } diff --git a/packages/astro/src/core/i18n/handler.ts b/packages/astro/src/core/i18n/handler.ts index e798c7fde7e3..7574029b9b2e 100644 --- a/packages/astro/src/core/i18n/handler.ts +++ b/packages/astro/src/core/i18n/handler.ts @@ -70,7 +70,9 @@ export class I18n { return response; } - const url = new URL(state.request.url); + // Use Astro's already-decoded URL (`state.url`) instead of reading the + // raw request URL again, so locale checks use the same path as routing. + const url = state.url; const currentLocale = state.computeCurrentLocale(); const isPrerendered = state.routeData!.prerender; diff --git a/packages/astro/src/core/routing/handler.ts b/packages/astro/src/core/routing/handler.ts index c900921908a8..84f3d4aa3c5e 100644 --- a/packages/astro/src/core/routing/handler.ts +++ b/packages/astro/src/core/routing/handler.ts @@ -73,6 +73,14 @@ export class AstroHandler { // forget to include anything. state.pipeline.usedFeatures |= ALL_PIPELINE_FEATURES; + // Reject paths that were encoded too many times to fully decode, before + // any routing or middleware runs. If we let them through, middleware + // could check one path while a later decode turns it into a different + // route. + if (state.invalidEncoding) { + return new Response(null, { status: 400, statusText: 'Bad Request' }); + } + const trailingSlashRedirect = this.#trailingSlashHandler.handle(state); if (trailingSlashRedirect) { return trailingSlashRedirect; diff --git a/packages/astro/src/core/routing/rewrite.ts b/packages/astro/src/core/routing/rewrite.ts index 6f4f3e5516fb..07790e28cc5a 100644 --- a/packages/astro/src/core/routing/rewrite.ts +++ b/packages/astro/src/core/routing/rewrite.ts @@ -14,6 +14,7 @@ import { trimSlashes, } from '../path.js'; import { createRequest } from '../request.js'; +import { validateAndDecodePathname } from '../util/pathname.js'; import { DEFAULT_404_ROUTE } from './internal/astro-designed-error-pages.js'; import { isRoute404, isRoute500 } from './internal/route-errors.js'; @@ -64,7 +65,13 @@ export function findRouteToRewrite({ ); newUrl.pathname = resolvedUrlPathname; - const decodedPathname = decodeURI(pathname); + // Decode the path the same way the first route match did (see + // `validateAndDecodePathname`) instead of calling `decodeURI` once here. + // Routing has to use the exact same path that middleware checked; decoding + // one extra time here is what let an encoded path slip past middleware and + // still reach a protected route. For an already-decoded path this changes + // nothing. + const decodedPathname = validateAndDecodePathname(pathname); // Error pages (404/500) take precedence over dynamic routes that might // capture the same path (e.g. [locale] matching /404). See #15098. diff --git a/packages/astro/src/core/util/pathname.ts b/packages/astro/src/core/util/pathname.ts index cf78d09973a7..8885f3bd8a70 100644 --- a/packages/astro/src/core/util/pathname.ts +++ b/packages/astro/src/core/util/pathname.ts @@ -1,30 +1,40 @@ /** - * Error thrown when multi-level URL encoding is detected in a pathname. - * This is a distinct error type so callers can handle it specifically - * (e.g., returning a 400 response) rather than falling back to partial decoding. - * - * @deprecated No longer thrown internally — multi-level encoding is now - * decoded iteratively instead of rejected. Kept for backwards compatibility - * in case third-party code references the class. + * Thrown when a URL path is encoded so many times that we give up decoding it + * (see {@link validateAndDecodePathname}). When this happens we reject the + * request with a `400` instead of guessing the path. If we let a half-decoded + * path through, your middleware might check one path while Astro routes to a + * different one. */ export class MultiLevelEncodingError extends Error { constructor() { - super('Multi-level URL encoding is not allowed'); + super('URL encoding depth exceeded the maximum number of decode iterations'); this.name = 'MultiLevelEncodingError'; } } /** - * Decodes a pathname iteratively until stable, collapsing all levels of - * percent-encoding into a single canonical form. This prevents - * double/triple encoding from bypassing middleware authorization checks - * (CVE-2025-66202) — instead of rejecting multi-level encoding, we - * fully resolve it so middleware always sees the true decoded path. + * How many times {@link validateAndDecodePathname} will decode a path before + * giving up. A normal URL is encoded once, or at most twice — for example a + * `[` (`%5B`) can arrive as `%255B` when a link is built from a value that was + * already encoded. A path that is still encoded after this many tries is + * almost certainly an attack, so we reject it instead of decoding again. + */ +const MAX_DECODE_ITERATIONS = 10; + +/** + * Decodes a URL path over and over until it stops changing, so a path that was + * encoded several times ends up as a single, final path. This stops someone + * from sneaking a path like `/admin` past middleware by encoding it multiple + * times — middleware always sees the real, decoded path. * - * @param pathname - The pathname to decode - * @returns The fully decoded pathname - * @throws Error if the pathname contains invalid URL encoding that - * cannot be decoded at all (e.g., a bare `%` not followed by hex digits) + * @param pathname - The path to decode + * @returns The final, fully decoded path + * @throws Error if the path has broken encoding that can't be decoded at all + * (for example a lone `%` that isn't followed by two hex digits) + * @throws MultiLevelEncodingError if the path is still changing after + * {@link MAX_DECODE_ITERATIONS} tries (it was encoded too many times). + * Handing back a half-decoded path here would bring back the security hole + * this function exists to close. */ export function validateAndDecodePathname(pathname: string): string { let decoded: string; @@ -33,21 +43,28 @@ export function validateAndDecodePathname(pathname: string): string { } catch (_e) { throw new Error('Invalid URL encoding'); } - // Iteratively decode until stable. Multi-level encoding (e.g., - // %2561 → %61 → a) is resolved completely so that downstream code - // — especially middleware auth checks — always sees the canonical - // pathname regardless of how many encoding layers the client used. - // We cap iterations to prevent infinite loops on pathological input. + // Keep decoding until the path stops changing. A path can be encoded more + // than once (for example %2561 → %61 → a), and we want the final decoded + // path so the rest of Astro — especially middleware security checks — + // always sees the same real path, no matter how many times it was encoded. let iterations = 0; - while (decoded !== pathname && iterations < 10) { + while (decoded !== pathname) { + // The path is still changing after the maximum number of tries, so it + // was encoded too many times for us to fully decode. Stop and reject + // it: handing back a half-decoded path could let middleware check one + // path while a later decode (during rewrite routing) turns it into a + // different, possibly protected, path. + if (iterations >= MAX_DECODE_ITERATIONS) { + throw new MultiLevelEncodingError(); + } pathname = decoded; try { decoded = decodeURI(pathname); } catch { - // decodeURI can fail when a decoded literal '%' forms an - // invalid sequence with adjacent characters (e.g., '%?.pdf' - // after decoding %25%3F). This is fine — we've decoded as - // far as possible. + // decodeURI throws when decoding leaves a real '%' next to + // characters that look like broken encoding (for example '%?.pdf' + // after decoding %25%3F). That's fine — we've decoded as far as we + // can and the path won't change any further. break; } iterations++; diff --git a/packages/astro/test/units/app/double-encoding-bypass.test.ts b/packages/astro/test/units/app/double-encoding-bypass.test.ts index 30e3b333cad4..54fba02c604f 100644 --- a/packages/astro/test/units/app/double-encoding-bypass.test.ts +++ b/packages/astro/test/units/app/double-encoding-bypass.test.ts @@ -78,6 +78,26 @@ function createAuthMiddleware() { })) as () => Promise<{ onRequest: MiddlewareHandler }>; } +/** + * Like {@link createAuthMiddleware}, but every allowed request is sent back + * through routing again with `next(context.url)`. The route it lands on must + * match the same path the `/api/admin` check looked at; otherwise an encoded + * path can slip past the check and still be decoded into `/api/admin`. + */ +function createRewriteAuthMiddleware() { + return (async () => ({ + onRequest: (async (context, next) => { + if (context.url.pathname.startsWith('/api/admin')) { + return new Response(JSON.stringify({ error: 'Unauthorized' }), { + status: 401, + headers: { 'Content-Type': 'application/json' }, + }); + } + return next(context.url); + }) satisfies MiddlewareHandler, + })) as () => Promise<{ onRequest: MiddlewareHandler }>; +} + function createApp(middleware: ReturnType) { return new App( createManifest({ @@ -233,3 +253,48 @@ describe('URL normalization: double-encoding middleware bypass', () => { }); // #endregion }); + +describe('URL normalization: rewrite-based middleware bypass', () => { + it('blocks /api/admin even when middleware calls next(context.url)', async () => { + const app = createApp(createRewriteAuthMiddleware()); + const response = await app.render(new Request('http://example.com/api/admin')); + assert.equal(response.status, 401, '/api/admin must be blocked by middleware'); + }); + + it('blocks double-encoded /api/%2561dmin with a rewriting middleware', async () => { + // This decodes to /api/admin, so middleware must see /api/admin and + // block it, even though the request is then sent back through routing. + const app = createApp(createRewriteAuthMiddleware()); + const response = await app.render(new Request('http://example.com/api/%2561dmin')); + assert.equal( + response.status, + 401, + 'double-encoded /api/admin must be blocked even with a rewriting middleware', + ); + }); + + it('rejects an over-encoded path with 400 instead of bypassing to /api/admin', async () => { + // Encoded more times than we decode. + const app = createApp(createRewriteAuthMiddleware()); + const response = await app.render( + new Request('http://example.com/api/%2525252525252525252561dmin'), + ); + assert.equal(response.status, 400, 'over-encoded path must be rejected, not served'); + }); + + it('rejects a deeply over-encoded payload with 400', async () => { + const app = createApp(createRewriteAuthMiddleware()); + const response = await app.render( + new Request('http://example.com/api/%252525252525252525252561dmin'), + ); + assert.equal(response.status, 400); + }); + + it('still serves non-protected routes through the rewriting middleware', async () => { + const app = createApp(createRewriteAuthMiddleware()); + const response = await app.render(new Request('http://example.com/api/public/data')); + assert.equal(response.status, 200, '/api/public/data should be accessible'); + const body = await response.json(); + assert.equal(body.path, 'public/data'); + }); +}); diff --git a/packages/astro/test/units/util/validate-and-decode-pathname.test.ts b/packages/astro/test/units/util/validate-and-decode-pathname.test.ts index 713876a1a28d..1c6be6c62dda 100644 --- a/packages/astro/test/units/util/validate-and-decode-pathname.test.ts +++ b/packages/astro/test/units/util/validate-and-decode-pathname.test.ts @@ -1,6 +1,9 @@ import assert from 'node:assert/strict'; import { describe, it } from 'node:test'; -import { validateAndDecodePathname } from '../../../dist/core/util/pathname.js'; +import { + MultiLevelEncodingError, + validateAndDecodePathname, +} from '../../../dist/core/util/pathname.js'; describe('validateAndDecodePathname', () => { // #region Plain paths (no encoding) @@ -33,7 +36,7 @@ describe('validateAndDecodePathname', () => { // // Multi-level encoding is decoded iteratively until stable. This // ensures middleware always sees the canonical path and can make - // correct authorization decisions (CVE-2025-66202 mitigation). + // correct authorization decisions. it('fully decodes double-encoded unreserved chars: %2561 (a)', () => { // %2561 → decodeURI → %61 → decodeURI → a @@ -187,4 +190,39 @@ describe('validateAndDecodePathname', () => { ); }); // #endregion + // #region Encoded too many times (rejected, must throw) + // + // If a path is still encoded after the maximum number of decode tries, we + // can't fully decode it. Returning the half-decoded path would let + // middleware check one path while a later decode (during rewrite routing) + // turns it into a different, protected path. So we reject these instead. + + it('decodes a path encoded right up to the limit (10 times)', () => { + // Encoded 10 times — the most we allow — still decodes fully. + assert.equal(validateAndDecodePathname('/api/%25252525252525252561dmin'), '/api/admin'); + }); + + it('throws MultiLevelEncodingError once a path is encoded past the limit (11 times)', () => { + assert.throws( + () => validateAndDecodePathname('/api/%2525252525252525252561dmin'), + (err: any) => { + assert.equal(err instanceof MultiLevelEncodingError, true); + return true; + }, + 'a path encoded past the limit must be rejected', + ); + }); + + it('throws MultiLevelEncodingError for a path encoded many times', () => { + // Before the fix, this decoded only part way to `/%61dmin` and slipped + // past middleware. + assert.throws( + () => validateAndDecodePathname('/%252525252525252525252561dmin'), + (err: any) => { + assert.equal(err instanceof MultiLevelEncodingError, true); + return true; + }, + ); + }); + // #endregion });