diff --git a/package.json b/package.json index 542fee4..0b850b6 100644 --- a/package.json +++ b/package.json @@ -38,11 +38,11 @@ "@metascraper/helpers": "~5.2.0", "cheerio": "~1.0.0-rc.3", "debug": "~4.1.1", - "file-type": "~11.0.0", "got": "~9.6.0", "he": "~1.2.0", "html-encode": "~2.1.1", "mem": "~4.3.0", + "mime-types": "~2.1.24", "p-cancelable": "~2.0.0", "reachable-url": "~1.1.8", "require-one-of": "~1.0.3", diff --git a/src/auto-domains.json b/src/auto-domains.json index fd10417..2e3ece3 100644 --- a/src/auto-domains.json +++ b/src/auto-domains.json @@ -24,5 +24,6 @@ "medium", "techcrunch", "engadget", - "theverge" + "theverge", + "giphy" ] diff --git a/src/html.js b/src/html.js index 733ae4f..8c39f4d 100644 --- a/src/html.js +++ b/src/html.js @@ -1,8 +1,8 @@ 'use strict' const { isMime } = require('@metascraper/helpers') +const mimeTypes = require('mime-types') const { getDomain } = require('tldts') -const fileType = require('file-type') const cheerio = require('cheerio') const { URL } = require('url') const path = require('path') @@ -86,8 +86,9 @@ const htmlTemplate = () => ` module.exports = ({ html, url, headers }) => { const contentType = headers['content-type'] - const htmlTyle = fileType(Buffer.from(html, 0, fileType.minimumBytes)) - const content = htmlTyle === undefined ? html : htmlTemplate() + const isHTML = + mimeTypes.extension(contentType) === 'html' && typeof html === 'string' && html.length + const content = isHTML ? html : htmlTemplate() const $ = cheerio.load(content, { decodeEntities: false,