diff --git a/packages/gatsby-source-wordpress/__tests__/process-node.test.js b/packages/gatsby-source-wordpress/__tests__/process-node.test.js
new file mode 100644
index 0000000000000..2e74410265063
--- /dev/null
+++ b/packages/gatsby-source-wordpress/__tests__/process-node.test.js
@@ -0,0 +1,25 @@
+import {
+ getImgSrcRemoteFileMatchesFromNodeString,
+ getImgTagMatchesWithUrl,
+} from "../dist/steps/source-nodes/create-nodes/process-node"
+
+test(`HTML image transformation regex matches images`, async () => {
+ const wpUrl = `http://wp.fakesite.com`
+
+ const nodeString = `
+
+
+
+
`
+
+ const matches = getImgSrcRemoteFileMatchesFromNodeString(nodeString)
+
+ expect(matches.length).toBe(3)
+
+ const imgTagMatches = getImgTagMatchesWithUrl({
+ nodeString,
+ wpUrl,
+ })
+
+ expect(imgTagMatches.length).toBe(3)
+})
diff --git a/packages/gatsby-source-wordpress/src/steps/source-nodes/create-nodes/process-node.js b/packages/gatsby-source-wordpress/src/steps/source-nodes/create-nodes/process-node.js
index 4ac9c42581c9b..f2cb23da72615 100644
--- a/packages/gatsby-source-wordpress/src/steps/source-nodes/create-nodes/process-node.js
+++ b/packages/gatsby-source-wordpress/src/steps/source-nodes/create-nodes/process-node.js
@@ -1,3 +1,4 @@
+/* eslint-disable no-useless-escape */
import { isWebUri } from "valid-url"
import { fluid } from "gatsby-plugin-sharp"
import Img from "gatsby-image"
@@ -30,7 +31,7 @@ const getNodeEditLink = node => {
const findReferencedImageNodeIds = ({ nodeString, pluginOptions, node }) => {
// if the lazyNodes plugin option is set we don't need to find
- // image node id's because those nodes will be fetched lazily in resolvers
+ // image node id's because those nodes will be fetched lazily in resolvers.
if (pluginOptions.type.MediaItem.lazyNodes) {
return []
}
@@ -327,6 +328,17 @@ const getCheerioElementFromMatch = wpUrl => ({ match, tag = `img` }) => {
}
}
+const getCheerioElementsFromMatches = ({ imgTagMatches, wpUrl }) =>
+ imgTagMatches
+ .map(getCheerioElementFromMatch(wpUrl))
+ .filter(({ cheerioImg: { attribs } }) => {
+ if (!attribs.src) {
+ return false
+ }
+
+ return isWebUri(encodeURI(attribs.src))
+ })
+
const getLargestSizeFromSizesAttribute = sizesString => {
const sizesStringsArray = sizesString.split(`,`)
@@ -444,6 +456,28 @@ const cacheCreatedFileNodeBySrc = ({ node, src }) => {
}
}
+const imgSrcRemoteFileRegex = /(?:src=\\")((?:(?:https?|ftp|file):\/\/|www\.|ftp\.|\/)(?:[^'"])*\.(?:jpeg|jpg|png|gif|ico|mpg|ogv|svg|bmp|tif|tiff))(\?[^\\" \.]*|)(?=\\"| |\.)/gim
+
+export const getImgSrcRemoteFileMatchesFromNodeString = nodeString =>
+ execall(imgSrcRemoteFileRegex, nodeString).filter(({ subMatches }) => {
+ // if our match is json encoded, that means it's inside a JSON
+ // encoded string field.
+ const isInJSON = subMatches[0].includes(`\\/\\/`)
+
+ // we shouldn't process encoded JSON, so skip this match if it's JSON
+ return !isInJSON
+ })
+
+export const getImgTagMatchesWithUrl = ({ nodeString, wpUrl }) =>
+ execall(
+ /
/gim,
+ nodeString
+ // we don't want to match images inside pre
+ .replace(/
.*(<\/pre>)/gim, ``)
+ // and code tags, so temporarily remove those tags and everything inside them
+ .replace(/.*(<\/code>)/gim, ``)
+ ).filter(filterMatches(wpUrl))
+
const replaceNodeHtmlImages = async ({
nodeString,
node,
@@ -456,38 +490,15 @@ const replaceNodeHtmlImages = async ({
return nodeString
}
- const imgSrcRemoteFileRegex = /(?:src=\\")((?:(?:https?|ftp|file):\/\/|www\.|ftp\.|\/)(?:\([-A-Z0-9+&@#/%=~_|$?!:,.]*\)|[-A-Z0-9+&@#/%=~_|$?!:,.])*(?:\([-A-Z0-9+&@#/%=~_|$?!:,.]*\)|[A-Z0-9+&@#/%=~_|$])\.(?:jpeg|jpg|png|gif|ico|mpg|ogv|svg|bmp|tif|tiff))(\?[^\\" .]*|)(?=\\"| |\.)/gim
+ const imageUrlMatches = getImgSrcRemoteFileMatchesFromNodeString(nodeString)
- const imageUrlMatches = execall(imgSrcRemoteFileRegex, nodeString).filter(
- ({ subMatches }) => {
- // if our match is json encoded, that means it's inside a JSON
- // encoded string field.
- const isInJSON = subMatches[0].includes(`\\/\\/`)
-
- // we shouldn't process encoded JSON, so skip this match if it's JSON
- return !isInJSON
- }
- )
-
- const imgTagMatches = execall(
- /
/gim,
- nodeString
- // we don't want to match images inside pre
- .replace(/.*(<\/pre>)/gim, ``)
- // and code tags, so temporarily remove those tags and everything inside them
- .replace(/.*(<\/code>)/gim, ``)
- ).filter(filterMatches(wpUrl))
+ const imgTagMatches = getImgTagMatchesWithUrl({ nodeString, wpUrl })
if (imageUrlMatches.length && imgTagMatches.length) {
- const cheerioImages = imgTagMatches
- .map(getCheerioElementFromMatch(wpUrl))
- .filter(({ cheerioImg: { attribs } }) => {
- if (!attribs.src) {
- return false
- }
-
- return isWebUri(attribs.src)
- })
+ const cheerioImages = getCheerioElementsFromMatches({
+ imgTagMatches,
+ wpUrl,
+ })
const htmlMatchesToMediaItemNodesMap = await fetchNodeHtmlImageMediaItemNodes(
{
diff --git a/packages/gatsby-source-wordpress/src/steps/source-nodes/fetch-nodes/fetch-referenced-media-items.js b/packages/gatsby-source-wordpress/src/steps/source-nodes/fetch-nodes/fetch-referenced-media-items.js
index ac3112a4f4976..64523645ca96b 100644
--- a/packages/gatsby-source-wordpress/src/steps/source-nodes/fetch-nodes/fetch-referenced-media-items.js
+++ b/packages/gatsby-source-wordpress/src/steps/source-nodes/fetch-nodes/fetch-referenced-media-items.js
@@ -211,7 +211,8 @@ export const stripImageSizesFromUrl = url => {
const fileExtension = urlToFileExtension(url)
const imageSizesPattern = new RegExp(
- `(?:[-_]([0-9]+)x([0-9]+))${fileExtension ? `.${fileExtension}` : ``}`
+ // eslint-disable-next-line no-useless-escape
+ `(?:[-_]([0-9]+)x([0-9]+))${fileExtension ? `\.${fileExtension}` : ``}`
)
let urlWithoutSizes = url.replace(imageSizesPattern, ``)