Skip to content

Commit

Permalink
fix: broken regex for <NuxtImage> components
Browse files Browse the repository at this point in the history
Fixes #298
  • Loading branch information
harlan-zw committed Aug 22, 2024
1 parent 691409a commit 469e7bd
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
5 changes: 3 additions & 2 deletions src/util/extractSitemapMetaFromHtml.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { withSiteUrl } from 'nuxt-site-config-kit/urls'
import { parseURL } from 'ufo'
import { tryUseNuxt } from '@nuxt/kit'
import type { ResolvedSitemapUrl, SitemapUrl, VideoEntry } from '../runtime/types'

export function extractSitemapMetaFromHtml(html: string, options?: { images?: boolean, videos?: boolean, lastmod?: boolean, alternatives?: boolean }) {
Expand All @@ -12,7 +13,7 @@ export function extractSitemapMetaFromHtml(html: string, options?: { images?: bo
if (mainMatch?.[1] && mainMatch[1].includes('<img')) {
// Extract image src attributes using regex on the HTML, but ignore elements with invalid values such as data:, blob:, or file:
// eslint-disable-next-line regexp/no-useless-lazy
const imgRegex = /<img\s+src=["']((?!data:|blob:|file:)[^"']+?)["'][^>]*>/gi
const imgRegex = /<img\s+(?:[^>]*?\s)?src=["']((?!data:|blob:|file:)[^"']+?)["'][^>]*>/gi

let match
while ((match = imgRegex.exec(mainMatch[1])) !== null) {
Expand All @@ -22,7 +23,7 @@ export function extractSitemapMetaFromHtml(html: string, options?: { images?: bo
let url = match[1]
// if the match is relative
if (url.startsWith('/'))
url = withSiteUrl(url)
url = tryUseNuxt() ? withSiteUrl(url) : url
images.add(url)
}
}
Expand Down
12 changes: 12 additions & 0 deletions test/unit/extractSitemapMetaFromHtml.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,18 @@ describe('extractSitemapMetaFromHtml', () => {
],
}
`)

const html3 = `<div id="__nuxt"><div><main><div class="document-driven-page"><!--[--><div><h1 id="index"><!--[-->index<!--]--></h1><ul><!--[--><li><!--[--><a href="/bar" class=""><!--[-->/bar<!--]--></a><!--]--></li><li><!--[--><a href="/foo" class=""><!--[-->/foo<!--]--></a><!--]--></li><!--]--></ul><img onerror="this.setAttribute(&#39;data-error&#39;, 1)" alt="Test image" data-nuxt-img srcset="/_ipx/_/logo.svg 1x, /_ipx/_/logo.svg 2x" src="/_ipx/_/logo.svg" class="test"><p><!--[--><a href="/sitemap.xml" class=""><!--[-->/sitemap.xml<!--]--></a><!--]--></p></div><!--]--></div></main></div></div><div id="teleports"></div>`
const testcase3 = extractSitemapMetaFromHtml(html3)
expect(testcase3).toMatchInlineSnapshot(`
{
"images": [
{
"loc": "/_ipx/_/logo.svg",
},
],
}
`)
})

it('extracts videos from HTML', async () => {
Expand Down

0 comments on commit 469e7bd

Please sign in to comment.