Skip to content

Commit f682823

Browse files
committed
Proxy internal sitemaps for prettier sitemap URLs
1 parent 3d7f8f7 commit f682823

2 files changed

Lines changed: 99 additions & 2 deletions

File tree

core/app/sitemap.xml/route.ts

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,72 @@ import { getChannelIdFromLocale } from '~/channels.config';
77
import { client } from '~/client';
88
import { defaultLocale } from '~/i18n/locales';
99

10-
export const GET = async () => {
10+
export const GET = async (request: Request) => {
11+
const url = new URL(request.url);
12+
const incomingHost = request.headers.get('host') ?? url.host;
13+
const incomingProto = request.headers.get('x-forwarded-proto') ?? url.protocol.replace(':', '');
14+
15+
const type = url.searchParams.get('type');
16+
const page = url.searchParams.get('page');
17+
18+
// If a specific sitemap within the index is requested, require both params
19+
if (type !== null || page !== null) {
20+
if (!type || !page) {
21+
return new Response('Both "type" and "page" query params are required', {
22+
status: 400,
23+
headers: { 'Content-Type': 'text/plain; charset=utf-8' },
24+
});
25+
}
26+
27+
const upstream = await client.fetchSitemapResponse(
28+
{ type, page },
29+
getChannelIdFromLocale(defaultLocale),
30+
);
31+
32+
// Pass-through upstream status/body but enforce XML content-type
33+
const body = await upstream.text();
34+
return new Response(body, {
35+
status: upstream.status,
36+
statusText: upstream.statusText,
37+
headers: { 'Content-Type': 'application/xml' },
38+
});
39+
}
40+
41+
// Otherwise, return the sitemap index with normalized internal links
1142
const sitemapIndex = await client.fetchSitemapIndex(getChannelIdFromLocale(defaultLocale));
1243

13-
return new Response(sitemapIndex, {
44+
const rewritten = sitemapIndex.replace(/<loc>([^<]+)<\/loc>/g, (match, locUrlStr) => {
45+
try {
46+
// Decode XML entities for '&' so URL parsing works
47+
const decoded = locUrlStr.replace(/&amp;/g, '&');
48+
const original = new URL(decoded);
49+
50+
if (!original.pathname.endsWith('/xmlsitemap.php')) {
51+
return match;
52+
}
53+
54+
const normalized = new URL(`${incomingProto}://${incomingHost}/sitemap.xml`);
55+
56+
const t = original.searchParams.get('type');
57+
const p = original.searchParams.get('page');
58+
59+
// Only rewrite entries that include both type and page; otherwise leave untouched
60+
if (!t || !p) {
61+
return match;
62+
}
63+
64+
normalized.searchParams.set('type', t);
65+
normalized.searchParams.set('page', p);
66+
67+
// Re-encode '&' for XML output
68+
const normalizedXml = normalized.toString().replace(/&/g, '&amp;');
69+
return `<loc>${normalizedXml}</loc>`;
70+
} catch {
71+
return match;
72+
}
73+
});
74+
75+
return new Response(rewritten, {
1476
headers: {
1577
'Content-Type': 'application/xml',
1678
},

packages/client/src/client.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,41 @@ class Client<FetcherRequestInit extends RequestInit = RequestInit> {
210210
return response.text();
211211
}
212212

213+
async fetchSitemap(
214+
params: { type?: string | null; page?: string | number | null },
215+
channelId?: string,
216+
): Promise<string> {
217+
const response = await this.fetchSitemapResponse(params, channelId);
218+
if (!response.ok) {
219+
throw new Error(`Unable to get Sitemap: ${response.statusText}`);
220+
}
221+
return response.text();
222+
}
223+
224+
async fetchSitemapResponse(
225+
params: { type?: string | null; page?: string | number | null },
226+
channelId?: string,
227+
): Promise<Response> {
228+
const baseUrl = new URL(`${await this.getCanonicalUrl(channelId)}/xmlsitemap.php`);
229+
230+
// Only forward well-known params
231+
if (params.type) baseUrl.searchParams.set('type', String(params.type));
232+
if (params.page !== undefined && params.page !== null)
233+
baseUrl.searchParams.set('page', String(params.page));
234+
235+
const response = await fetch(baseUrl.toString(), {
236+
method: 'GET',
237+
headers: {
238+
Accept: 'application/xml',
239+
'Content-Type': 'application/xml',
240+
'User-Agent': this.backendUserAgent,
241+
...(this.trustedProxySecret && { 'X-BC-Trusted-Proxy-Secret': this.trustedProxySecret }),
242+
},
243+
});
244+
245+
return response;
246+
}
247+
213248
private async getCanonicalUrl(channelId?: string) {
214249
const resolvedChannelId = channelId ?? (await this.getChannelId(this.defaultChannelId));
215250

0 commit comments

Comments
 (0)