From 94a5214aa02f6ba684a3bc1ed39d1aff30adb4b3 Mon Sep 17 00:00:00 2001 From: Harlan Wilton Date: Mon, 13 Nov 2023 13:49:56 +1100 Subject: [PATCH] fix: filter i18n URLs based on non-prefixed path --- src/runtime/sitemap/builder/sitemap.ts | 4 +- src/runtime/sitemap/urlset/filter.ts | 28 ++++++++++---- test/integration/i18n/filtering.test.ts | 51 +++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 9 deletions(-) create mode 100644 test/integration/i18n/filtering.test.ts diff --git a/src/runtime/sitemap/builder/sitemap.ts b/src/runtime/sitemap/builder/sitemap.ts index 116564fd..f97d4ca4 100644 --- a/src/runtime/sitemap/builder/sitemap.ts +++ b/src/runtime/sitemap/builder/sitemap.ts @@ -93,7 +93,7 @@ export async function buildSitemap(sitemap: SitemapDefinition, resolvers: NitroU ).reverse()) as NitroRouteRules // apply top-level path without prefix, users can still target the localed path - if (autoI18n?.locales && autoI18n?.strategy === 'no_prefix') { + if (autoI18n?.locales && autoI18n?.strategy !== 'no_prefix') { // remove the locale path from the prefix, if it exists, need to use regex const match = path.match(new RegExp(`^/(${autoI18n.locales.map(l => l.code).join('|')})(.*)`)) const pathWithoutPrefix = match?.[2] @@ -117,7 +117,7 @@ export async function buildSitemap(sitemap: SitemapDefinition, resolvers: NitroU enhancedUrls = applyI18nEnhancements(enhancedUrls, { isI18nMapped, autoI18n, sitemapName: sitemap.sitemapName }) // 3. filtered urls // TODO make sure include and exclude start with baseURL? - const filteredUrls = filterSitemapUrls(enhancedUrls, sitemap) + const filteredUrls = filterSitemapUrls(enhancedUrls, { autoI18n, ...sitemap }) // 4. sort const sortedUrls = maybeSort(filteredUrls) // 5. maybe slice for chunked diff --git a/src/runtime/sitemap/urlset/filter.ts b/src/runtime/sitemap/urlset/filter.ts index b94b112b..87493898 100644 --- a/src/runtime/sitemap/urlset/filter.ts +++ b/src/runtime/sitemap/urlset/filter.ts @@ -1,6 +1,6 @@ import { parseURL } from 'ufo' import { createRouter, toRouteMatcher } from 'radix3' -import type { ResolvedSitemapUrl, SitemapDefinition } from '../../types' +import type { ModuleRuntimeConfig, ResolvedSitemapUrl, SitemapDefinition } from '../../types' interface CreateFilterOptions { include?: (string | RegExp)[] @@ -40,15 +40,29 @@ function createFilter(options: CreateFilterOptions = {}): (path: string) => bool } } -export function filterSitemapUrls(_urls: ResolvedSitemapUrl[], filter: Pick) { +export function filterSitemapUrls(_urls: ResolvedSitemapUrl[], options: Pick & Pick) { // base may be wrong here - const urlFilter = createFilter(filter) + const urlFilter = createFilter(options) return _urls.filter((e) => { - if (e._sitemap && filter.sitemapName) - return e._sitemap === filter.sitemapName + if (e._sitemap && options.sitemapName) + return e._sitemap === options.sitemapName try { - const url = parseURL(e.loc) - return urlFilter(url.pathname) + const path = parseURL(e.loc).pathname + if (!urlFilter(path)) + return false + + const { autoI18n } = options + // if the non-prefixed locale is blocked then we block the prefixed versions + if (autoI18n?.locales && autoI18n?.strategy !== 'no_prefix') { + // remove the locale path from the prefix, if it exists, need to use regex + const match = path.match(new RegExp(`^/(${autoI18n.locales.map(l => l.code).join('|')})(.*)`)) + const pathWithoutPrefix = match?.[2] + if (pathWithoutPrefix && pathWithoutPrefix !== path) { + if (!urlFilter(pathWithoutPrefix)) + return false + } + } + return true } catch { // invalid URL diff --git a/test/integration/i18n/filtering.test.ts b/test/integration/i18n/filtering.test.ts new file mode 100644 index 00000000..797be2c0 --- /dev/null +++ b/test/integration/i18n/filtering.test.ts @@ -0,0 +1,51 @@ +import { describe, expect, it } from 'vitest' +import { createResolver } from '@nuxt/kit' +import { $fetch, setup } from '@nuxt/test-utils' + +const { resolve } = createResolver(import.meta.url) + +await setup({ + rootDir: resolve('../../fixtures/i18n'), + nuxtConfig: { + sitemap: { + sitemaps: { + foo: { + urls: [ + // custom blocked routes + '/admin', + '/es/admin', + '/fr/admin', + '/admin/foo', + '/es/admin/foo', + '/fr/admin/foo', + '/admin/foo/bar', + '/es/admin/foo/bar', + '/fr/admin/foo/bar', + // should be only route + '/valid', + ], + exclude: [ + '/admin/**', + ], + }, + }, + }, + }, +}) +describe('multi filtering', () => { + it('basic', async () => { + let sitemap = await $fetch('/foo-sitemap.xml') + + // strip lastmod + sitemap = sitemap.replace(/.*<\/lastmod>/g, '') + + expect(sitemap).toMatchInlineSnapshot(` + " + + + https://nuxtseo.com/valid + + " + `) + }, 60000) +})