From 6d25310fb88d9872e9b1be0a0bc749e1977c6953 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Tue, 23 Jul 2024 10:04:09 +0200 Subject: [PATCH 1/2] fix: append shadow root content in a separate sibling element --- packages/utils/src/internals/general.ts | 4 +++- test/core/playwright_utils.test.ts | 30 +++++++++++++++++++++++++ test/shared/_helper.ts | 29 ++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) diff --git a/packages/utils/src/internals/general.ts b/packages/utils/src/internals/general.ts index 221c84d4b9a9..a381b91caf6f 100644 --- a/packages/utils/src/internals/general.ts +++ b/packages/utils/src/internals/general.ts @@ -115,7 +115,9 @@ export function expandShadowRoots(document: Document): string { for (const el of rootElement.querySelectorAll('*')) { if (el.shadowRoot) { replaceShadowDomsWithHtml(el.shadowRoot); - el.innerHTML += getShadowDomHtml(el.shadowRoot) ?? ''; + const content = document.createElement('div'); + content.innerHTML = getShadowDomHtml(el.shadowRoot) ?? ''; + el.appendChild(content); } } } diff --git a/test/core/playwright_utils.test.ts b/test/core/playwright_utils.test.ts index 67d6b09427e1..74d60b8e9f23 100644 --- a/test/core/playwright_utils.test.ts +++ b/test/core/playwright_utils.test.ts @@ -244,6 +244,36 @@ describe('playwrightUtils', () => { } }, 60_000); + describe.only('shadow root expansion', () => { + let browser: Browser; + beforeAll(async () => { + browser = await launchPlaywright(launchContext); + }); + afterAll(async () => { + await browser.close(); + }); + + test('no expansion with ignoreShadowRoots: true', async () => { + const page = await browser.newPage(); + await page.goto(`${serverAddress}/special/shadow-root`); + const result = await playwrightUtils.parseWithCheerio(page, true); + + const text = result('body').text().trim(); + expect([...text.matchAll(/\[GOOD\]/g)]).toHaveLength(0); + expect([...text.matchAll(/\[BAD\]/g)]).toHaveLength(0); + }); + + test('expansion works', async () => { + const page = await browser.newPage(); + await page.goto(`${serverAddress}/special/shadow-root`); + const result = await playwrightUtils.parseWithCheerio(page); + + const text = result('body').text().trim(); + expect([...text.matchAll(/\[GOOD\]/g)]).toHaveLength(2); + expect([...text.matchAll(/\[BAD\]/g)]).toHaveLength(0); + }); + }); + describe('infiniteScroll()', () => { function isAtBottom() { return window.innerHeight + window.pageYOffset >= document.body.offsetHeight; diff --git a/test/shared/_helper.ts b/test/shared/_helper.ts index a797cb256b5d..c309465b1fb7 100644 --- a/test/shared/_helper.ts +++ b/test/shared/_helper.ts @@ -194,6 +194,31 @@ console.log('Hello world!');

Some content from inside of an iframe.

`, + shadowRoots: ` + + +
+ +
+
+ +
+ + `, }; export async function runExampleComServer(): Promise<[Server, number]> { @@ -298,6 +323,10 @@ export async function runExampleComServer(): Promise<[Server, number]> { special.get('/inside-iframe', (_req, res) => { res.type('html').send(responseSamples.insideIframe); }); + + special.get('/shadow-root', (_req, res) => { + res.type('html').send(responseSamples.shadowRoots); + }); })(); // "cacheable" site with one page, scripts and stylesheets From 3a1d17b1882f15e01aa634366fd19ead773722b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Wed, 24 Jul 2024 09:48:05 +0300 Subject: [PATCH 2/2] feat: use `getHTML` method with compatibility fallbacks --- packages/utils/src/internals/general.ts | 9 +++++--- test/core/playwright_utils.test.ts | 2 +- test/core/puppeteer_utils.test.ts | 30 +++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/packages/utils/src/internals/general.ts b/packages/utils/src/internals/general.ts index a381b91caf6f..af748a333fc7 100644 --- a/packages/utils/src/internals/general.ts +++ b/packages/utils/src/internals/general.ts @@ -115,9 +115,12 @@ export function expandShadowRoots(document: Document): string { for (const el of rootElement.querySelectorAll('*')) { if (el.shadowRoot) { replaceShadowDomsWithHtml(el.shadowRoot); - const content = document.createElement('div'); - content.innerHTML = getShadowDomHtml(el.shadowRoot) ?? ''; - el.appendChild(content); + let content = el.getHTML?.({ serializableShadowRoots: true }).trim(); + + if (!(content?.length > 0)) { + content = getShadowDomHtml(el.shadowRoot) ?? ''; + } + el.innerHTML += content; } } } diff --git a/test/core/playwright_utils.test.ts b/test/core/playwright_utils.test.ts index 74d60b8e9f23..936718a68956 100644 --- a/test/core/playwright_utils.test.ts +++ b/test/core/playwright_utils.test.ts @@ -244,7 +244,7 @@ describe('playwrightUtils', () => { } }, 60_000); - describe.only('shadow root expansion', () => { + describe('shadow root expansion', () => { let browser: Browser; beforeAll(async () => { browser = await launchPlaywright(launchContext); diff --git a/test/core/puppeteer_utils.test.ts b/test/core/puppeteer_utils.test.ts index 13be599f1256..e46a7239ded2 100644 --- a/test/core/puppeteer_utils.test.ts +++ b/test/core/puppeteer_utils.test.ts @@ -178,6 +178,36 @@ describe('puppeteerUtils', () => { } }); + describe('parseWithCheerio() shadow root expansion works', () => { + let browser: Browser; + beforeAll(async () => { + browser = await launchPuppeteer(launchContext); + }); + afterAll(async () => { + await browser.close(); + }); + + test('no expansion with ignoreShadowRoots: true', async () => { + const page = await browser.newPage(); + await page.goto(`${serverAddress}/special/shadow-root`); + const result = await puppeteerUtils.parseWithCheerio(page, true); + + const text = result('body').text().trim(); + expect([...text.matchAll(/\[GOOD\]/g)]).toHaveLength(0); + expect([...text.matchAll(/\[BAD\]/g)]).toHaveLength(0); + }); + + test('expansion works', async () => { + const page = await browser.newPage(); + await page.goto(`${serverAddress}/special/shadow-root`); + const result = await puppeteerUtils.parseWithCheerio(page); + + const text = result('body').text().trim(); + expect([...text.matchAll(/\[GOOD\]/g)]).toHaveLength(2); + expect([...text.matchAll(/\[BAD\]/g)]).toHaveLength(0); + }); + }); + describe('blockRequests()', () => { let browser: Browser = null; beforeAll(async () => {