diff --git a/packages/gatsby-source-wordpress/__tests__/process-node.test.js b/packages/gatsby-source-wordpress/__tests__/process-node.test.js new file mode 100644 index 0000000000000..2e74410265063 --- /dev/null +++ b/packages/gatsby-source-wordpress/__tests__/process-node.test.js @@ -0,0 +1,25 @@ +import { + getImgSrcRemoteFileMatchesFromNodeString, + getImgTagMatchesWithUrl, +} from "../dist/steps/source-nodes/create-nodes/process-node" + +test(`HTML image transformation regex matches images`, async () => { + const wpUrl = `http://wp.fakesite.com` + + const nodeString = ` + + + + ` + + const matches = getImgSrcRemoteFileMatchesFromNodeString(nodeString) + + expect(matches.length).toBe(3) + + const imgTagMatches = getImgTagMatchesWithUrl({ + nodeString, + wpUrl, + }) + + expect(imgTagMatches.length).toBe(3) +}) diff --git a/packages/gatsby-source-wordpress/src/steps/source-nodes/create-nodes/process-node.js b/packages/gatsby-source-wordpress/src/steps/source-nodes/create-nodes/process-node.js index 4ac9c42581c9b..f2cb23da72615 100644 --- a/packages/gatsby-source-wordpress/src/steps/source-nodes/create-nodes/process-node.js +++ b/packages/gatsby-source-wordpress/src/steps/source-nodes/create-nodes/process-node.js @@ -1,3 +1,4 @@ +/* eslint-disable no-useless-escape */ import { isWebUri } from "valid-url" import { fluid } from "gatsby-plugin-sharp" import Img from "gatsby-image" @@ -30,7 +31,7 @@ const getNodeEditLink = node => { const findReferencedImageNodeIds = ({ nodeString, pluginOptions, node }) => { // if the lazyNodes plugin option is set we don't need to find - // image node id's because those nodes will be fetched lazily in resolvers + // image node id's because those nodes will be fetched lazily in resolvers. if (pluginOptions.type.MediaItem.lazyNodes) { return [] } @@ -327,6 +328,17 @@ const getCheerioElementFromMatch = wpUrl => ({ match, tag = `img` }) => { } } +const getCheerioElementsFromMatches = ({ imgTagMatches, wpUrl }) => + imgTagMatches + .map(getCheerioElementFromMatch(wpUrl)) + .filter(({ cheerioImg: { attribs } }) => { + if (!attribs.src) { + return false + } + + return isWebUri(encodeURI(attribs.src)) + }) + const getLargestSizeFromSizesAttribute = sizesString => { const sizesStringsArray = sizesString.split(`,`) @@ -444,6 +456,28 @@ const cacheCreatedFileNodeBySrc = ({ node, src }) => { } } +const imgSrcRemoteFileRegex = /(?:src=\\")((?:(?:https?|ftp|file):\/\/|www\.|ftp\.|\/)(?:[^'"])*\.(?:jpeg|jpg|png|gif|ico|mpg|ogv|svg|bmp|tif|tiff))(\?[^\\" \.]*|)(?=\\"| |\.)/gim + +export const getImgSrcRemoteFileMatchesFromNodeString = nodeString => + execall(imgSrcRemoteFileRegex, nodeString).filter(({ subMatches }) => { + // if our match is json encoded, that means it's inside a JSON + // encoded string field. + const isInJSON = subMatches[0].includes(`\\/\\/`) + + // we shouldn't process encoded JSON, so skip this match if it's JSON + return !isInJSON + }) + +export const getImgTagMatchesWithUrl = ({ nodeString, wpUrl }) => + execall( + //gim, + nodeString + // we don't want to match images inside pre + .replace(/
.*(<\/pre>)/gim, ``)
+ // and code tags, so temporarily remove those tags and everything inside them
+ .replace(/.*(<\/code>)/gim, ``)
+ ).filter(filterMatches(wpUrl))
+
const replaceNodeHtmlImages = async ({
nodeString,
node,
@@ -456,38 +490,15 @@ const replaceNodeHtmlImages = async ({
return nodeString
}
- const imgSrcRemoteFileRegex = /(?:src=\\")((?:(?:https?|ftp|file):\/\/|www\.|ftp\.|\/)(?:\([-A-Z0-9+&@#/%=~_|$?!:,.]*\)|[-A-Z0-9+&@#/%=~_|$?!:,.])*(?:\([-A-Z0-9+&@#/%=~_|$?!:,.]*\)|[A-Z0-9+&@#/%=~_|$])\.(?:jpeg|jpg|png|gif|ico|mpg|ogv|svg|bmp|tif|tiff))(\?[^\\" .]*|)(?=\\"| |\.)/gim
+ const imageUrlMatches = getImgSrcRemoteFileMatchesFromNodeString(nodeString)
- const imageUrlMatches = execall(imgSrcRemoteFileRegex, nodeString).filter(
- ({ subMatches }) => {
- // if our match is json encoded, that means it's inside a JSON
- // encoded string field.
- const isInJSON = subMatches[0].includes(`\\/\\/`)
-
- // we shouldn't process encoded JSON, so skip this match if it's JSON
- return !isInJSON
- }
- )
-
- const imgTagMatches = execall(
- //gim,
- nodeString
- // we don't want to match images inside pre
- .replace(/.*(<\/pre>)/gim, ``)
- // and code tags, so temporarily remove those tags and everything inside them
- .replace(/.*(<\/code>)/gim, ``)
- ).filter(filterMatches(wpUrl))
+ const imgTagMatches = getImgTagMatchesWithUrl({ nodeString, wpUrl })
if (imageUrlMatches.length && imgTagMatches.length) {
- const cheerioImages = imgTagMatches
- .map(getCheerioElementFromMatch(wpUrl))
- .filter(({ cheerioImg: { attribs } }) => {
- if (!attribs.src) {
- return false
- }
-
- return isWebUri(attribs.src)
- })
+ const cheerioImages = getCheerioElementsFromMatches({
+ imgTagMatches,
+ wpUrl,
+ })
const htmlMatchesToMediaItemNodesMap = await fetchNodeHtmlImageMediaItemNodes(
{
diff --git a/packages/gatsby-source-wordpress/src/steps/source-nodes/fetch-nodes/fetch-referenced-media-items.js b/packages/gatsby-source-wordpress/src/steps/source-nodes/fetch-nodes/fetch-referenced-media-items.js
index ac3112a4f4976..64523645ca96b 100644
--- a/packages/gatsby-source-wordpress/src/steps/source-nodes/fetch-nodes/fetch-referenced-media-items.js
+++ b/packages/gatsby-source-wordpress/src/steps/source-nodes/fetch-nodes/fetch-referenced-media-items.js
@@ -211,7 +211,8 @@ export const stripImageSizesFromUrl = url => {
const fileExtension = urlToFileExtension(url)
const imageSizesPattern = new RegExp(
- `(?:[-_]([0-9]+)x([0-9]+))${fileExtension ? `.${fileExtension}` : ``}`
+ // eslint-disable-next-line no-useless-escape
+ `(?:[-_]([0-9]+)x([0-9]+))${fileExtension ? `\.${fileExtension}` : ``}`
)
let urlWithoutSizes = url.replace(imageSizesPattern, ``)