From 95026680817553a03eed7b41ee84992f045643d7 Mon Sep 17 00:00:00 2001 From: Patrick Weygand Date: Tue, 22 Dec 2020 20:16:42 -0800 Subject: [PATCH] add ability to silence logger or replace with own for parser, resolves #337 --- CHANGELOG.md | 7 ++ api.md | 6 +- examples/parse-existing-xml.js | 16 ++++- lib/sitemap-parser.ts | 51 ++++++++++--- package-lock.json | 2 +- package.json | 2 +- tests/mocks/bad-tag-sitemap.xml | 123 ++++++++++++++++++++++++++++++++ tests/sitemap-parser.test.ts | 38 ++++++++++ 8 files changed, 229 insertions(+), 16 deletions(-) create mode 100644 tests/mocks/bad-tag-sitemap.xml diff --git a/CHANGELOG.md b/CHANGELOG.md index 795de23d..0611c482 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## 6.3.5 + +- Add option to silence or redirect logs from parse #337 + - `new XMLToSitemapItemStream({ logger: false })` or + - `new XMLToSitemapItemStream({ level: ErrorLevel.SILENT })` or + - `new XMLToSitemapItemStream({ logger: (level, ...message) => your.custom.logger(...message) })` + ## 6.3.4 - bump dependencies diff --git a/api.md b/api.md index 016a399b..94a17f7c 100644 --- a/api.md +++ b/api.md @@ -49,7 +49,11 @@ const { XMLToSitemapItemStream, ObjectStreamToJSON } = require('sitemap'); createReadStream('./some/sitemap.xml') // turn the xml into sitemap option item options -.pipe(new XMLToSitemapItemStream()) +.pipe(new XMLToSitemapItemStream({ + // optional + level: ErrorLevel.Warn // default is WARN pass Silent to silence + logger: false // default is console log, pass false as another way to silence or your own custom logger +})) // convert the object stream to JSON .pipe(new ObjectStreamToJSON()) // write the library compatible options to disk diff --git a/examples/parse-existing-xml.js b/examples/parse-existing-xml.js index 047bdb7a..30086247 100644 --- a/examples/parse-existing-xml.js +++ b/examples/parse-existing-xml.js @@ -1,9 +1,21 @@ const { createReadStream, createWriteStream } = require('fs'); -const { XMLToSitemapItemStream, ObjectStreamToJSON } = require('sitemap'); +const { + XMLToSitemapItemStream, + ObjectStreamToJSON, + ErrorLevel, +} = require('sitemap'); createReadStream('./sitemap.xml') // turn the xml into sitemap option item options - .pipe(new XMLToSitemapItemStream()) + .pipe( + new XMLToSitemapItemStream({ + // Optional: pass a logger of your own. + // by default it uses built in console.log/warn + logger: (level, ...message) => console.log(...message), + // Optional, passing SILENT overrides logger + level: ErrorLevel.WARN, + }) + ) // convert the object stream to JSON .pipe(new ObjectStreamToJSON()) // write the library compatible options to disk diff --git a/lib/sitemap-parser.ts b/lib/sitemap-parser.ts index 31ecf535..31f5a03a 100644 --- a/lib/sitemap-parser.ts +++ b/lib/sitemap-parser.ts @@ -59,10 +59,19 @@ function newsTemplate(): NewsItem { title: '', }; } + +type Logger = ( + level: 'warn' | 'error' | 'info' | 'log', + ...message: Parameters[0] +) => void; export interface XMLToSitemapItemStreamOptions extends TransformOptions { level?: ErrorLevel; + logger?: Logger | false; } -const defaultStreamOpts: XMLToSitemapItemStreamOptions = {}; +const defaultLogger: Logger = (level, ...message) => console[level](...message); +const defaultStreamOpts: XMLToSitemapItemStreamOptions = { + logger: defaultLogger, +}; // TODO does this need to end with `options` /** @@ -71,6 +80,7 @@ const defaultStreamOpts: XMLToSitemapItemStreamOptions = {}; */ export class XMLToSitemapItemStream extends Transform { level: ErrorLevel; + logger: Logger; saxStream: SAXStream; constructor(opts = defaultStreamOpts) { opts.objectMode = true; @@ -83,6 +93,11 @@ export class XMLToSitemapItemStream extends Transform { trim: true, }); this.level = opts.level || ErrorLevel.WARN; + if (this.level !== ErrorLevel.SILENT && opts.logger !== false) { + this.logger = opts.logger ?? defaultLogger; + } else { + this.logger = () => undefined; + } let currentItem: SitemapItem = tagTemplate(); let currentTag: string; let currentVideo: VideoItem = videoTemplate(); @@ -119,11 +134,11 @@ export class XMLToSitemapItemStream extends Transform { dontpushCurrentLink = true; currentItem.ampLink = tag.attributes.href.value; } else { - console.log('unhandled attr for xhtml:link', tag.attributes); + this.logger('log', 'unhandled attr for xhtml:link', tag.attributes); } } } else { - console.warn('unhandled tag', tag.name); + this.logger('warn', 'unhandled tag', tag.name); } }); @@ -284,7 +299,12 @@ export class XMLToSitemapItemStream extends Transform { break; default: - console.log('unhandled text for tag:', currentTag, `'${text}'`); + this.logger( + 'log', + 'unhandled text for tag:', + currentTag, + `'${text}'` + ); break; } }); @@ -325,7 +345,7 @@ export class XMLToSitemapItemStream extends Transform { break; default: - console.log('unhandled cdata for tag:', currentTag); + this.logger('log', 'unhandled cdata for tag:', currentTag); break; } }); @@ -340,7 +360,7 @@ export class XMLToSitemapItemStream extends Transform { if (attr.name === 'relationship' && isAllowDeny(attr.value)) { currentVideo['restriction:relationship'] = attr.value; } else { - console.log('unhandled attr', currentTag, attr.name); + this.logger('log', 'unhandled attr', currentTag, attr.name); } break; case TagNames['video:price']: @@ -351,7 +371,7 @@ export class XMLToSitemapItemStream extends Transform { } else if (attr.name === 'resolution' && isResolution(attr.value)) { currentVideo['price:resolution'] = attr.value; } else { - console.log('unhandled attr for video:price', attr.name); + this.logger('log', 'unhandled attr for video:price', attr.name); } break; case TagNames['video:player_loc']: @@ -360,14 +380,19 @@ export class XMLToSitemapItemStream extends Transform { } else if (attr.name === 'allow_embed' && isValidYesNo(attr.value)) { currentVideo['player_loc:allow_embed'] = attr.value; } else { - console.log('unhandled attr for video:player_loc', attr.name); + this.logger( + 'log', + 'unhandled attr for video:player_loc', + attr.name + ); } break; case TagNames['video:platform']: if (attr.name === 'relationship' && isAllowDeny(attr.value)) { currentVideo['platform:relationship'] = attr.value; } else { - console.log( + this.logger( + 'log', 'unhandled attr for video:platform', attr.name, attr.value @@ -378,11 +403,15 @@ export class XMLToSitemapItemStream extends Transform { if (attr.name === 'title') { currentVideo['gallery_loc:title'] = attr.value; } else { - console.log('unhandled attr for video:galler_loc', attr.name); + this.logger( + 'log', + 'unhandled attr for video:galler_loc', + attr.name + ); } break; default: - console.log('unhandled attr', currentTag, attr.name); + this.logger('log', 'unhandled attr', currentTag, attr.name); } }); diff --git a/package-lock.json b/package-lock.json index 8d2415a9..83496bab 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "sitemap", - "version": "6.3.4", + "version": "6.3.5", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/package.json b/package.json index 6242880a..413b3056 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "sitemap", - "version": "6.3.4", + "version": "6.3.5", "description": "Sitemap-generating lib/cli", "keywords": [ "sitemap", diff --git a/tests/mocks/bad-tag-sitemap.xml b/tests/mocks/bad-tag-sitemap.xml new file mode 100644 index 00000000..d99ff878 --- /dev/null +++ b/tests/mocks/bad-tag-sitemap.xml @@ -0,0 +1,123 @@ + + + + This is not a good tag + + + https://roosterteeth.com/episode/rouletsplay-2018-goldeneye-source&%3E%3C'%22 + weekly + + https://rtv3-img-roosterteeth.akamaized.net/store/0e841100-289b-4184-ae30-b6a16736960a.jpg/sm/thumb3.jpg&><'" + 2018:E6 - GoldenEye: Source&><'" + We play gun game in GoldenEye: Source with a good friend of ours. His name is Gruchy. Dan Gruchy.&><'" + https://roosterteeth.com/embed/rouletsplay-2018-goldeneye-source&><'" + 1208 + 2018-04-27T17:00:00.000Z + fruit&><'" + flies&><'" + YES + http://example.com/url&><'" + + + + https://roosterteeth.com/episode/let-s-play-2018-minecraft-episode-310&%3E%3C'%22 + weekly + + https://rtv3-img-roosterteeth.akamaized.net/store/f255cd83-3d69-4ee8-959a-ac01817fa204.jpg/sm/thumblpchompinglistv2.jpg&><'" + 2018:E90 - Minecraft - Episode 310 - Chomping List&><'" + Now that the gang's a bit more settled into Achievement Cove, it's time for a competition. Whoever collects the most unique food items by the end of the episode wins. The winner may even receive a certain golden tower.&><'" + https://roosterteeth.com/embed/let-s-play-2018-minecraft-episode-310&><'" + 3070 + 2012-07-16T19:20:30+08:00 + 2.5 + 1000 + 2018-04-27T14:00:00.000Z + steak&><'" + Baking&><'" + no + IE GB US CA + https://roosterteeth.com/series/awhu&><'" + 1.99 + no + GrillyMcGrillerson&><'" + tv + no + + + + https://roosterteeth.com/episode/let-s-watch-2018-house-party-part-2 + 2016-09-12T00:00:00.000Z + daily + 0.6 + + https://rtv3-img-roosterteeth.akamaized.net/store/9dd9681a-0557-45fe-86b3-b662c91bbae7.jpg/sm/thumblwhouseparty2v4.jpg&><'" + 2018:E10 - House Party - Part 2 (Uncensored)&><'" + Achievement Hunter's House Party quest for some one-night intimacy continues. Can they use Ashley and Madison's sibling rivalry for their own dubious gains?&><'" + https://roosterteeth.com/embed/let-s-watch-2018-house-party-part-2&><'" + 2422 + 2018-04-26T17:00:00.000Z + no + + + + + + + + http://www.example.org/business/article55.html&%3E%3C'%22 + 2015-06-27T15:30:00.000Z + + + The Example Times&><'" + en + + Registration + PressRelease, Blog + 2008-12-23 + Companies A, B in Merger Talks&><'" + business, merger, acquisition, A, B&><'" + NASDAQ:A, NASDAQ:B + + + + http://example.com/2&%3E%3C'%22 + 2011-06-27T00:00:00.000Z + always + 0.9 + + http://test.com/img1.jpg&%3E%3C'%22 + An image&><'" + London, United Kingdom&><'" + The Title of Image One&><'" + https://creativecommons.org/licenses/by/4.0/&><'" + + + http://test.com/img2.jpg&%3E%3C'%22 + Another image&><'" + London, United Kingdom&><'" + The Title of Image Two&><'" + https://creativecommons.org/licenses/by/4.0/&><'" + + + + http://example.com/1&%3E%3C'%22 + 2011-06-27T00:00:00.000Z + always + 0.9 + + http://urltest.com&><'"/ + + + http://example.com/img.jpg&%3E%3C'%22 + + + + http://example.com&><'"/ + 2011-06-27T00:00:00.000Z + always + 0.9 + + http://urltest.com&><'"/ + + + diff --git a/tests/sitemap-parser.test.ts b/tests/sitemap-parser.test.ts index 1d7eea0b..917fa0ba 100644 --- a/tests/sitemap-parser.test.ts +++ b/tests/sitemap-parser.test.ts @@ -8,6 +8,7 @@ import { ObjectStreamToJSON, } from '../lib/sitemap-parser'; import { SitemapStreamOptions } from '../dist'; +import { ErrorLevel } from '../lib/types'; const pipeline = promisify(pipe); // eslint-disable-next-line @typescript-eslint/no-var-requires const normalizedSample = require('./mocks/sampleconfig.normalized.json'); @@ -41,6 +42,43 @@ describe('XMLToSitemapItemStream', () => { expect(sitemap).toEqual(normalizedSample.urls); }); + it('stream parses bad XML', async () => { + const sitemap: SitemapStreamOptions[] = []; + const logger = jest.fn(); + await pipeline( + createReadStream(resolve(__dirname, './mocks/bad-tag-sitemap.xml'), { + encoding: 'utf8', + }), + new XMLToSitemapItemStream({ logger }), + new Writable({ + objectMode: true, + write(chunk, a, cb): void { + sitemap.push(chunk); + cb(); + }, + }) + ); + expect(sitemap).toEqual(normalizedSample.urls); + expect(logger.mock.calls.length).toBe(2); + expect(logger.mock.calls[0][1]).toBe('unhandled tag'); + expect(logger.mock.calls[0][2]).toBe('foo'); + + await pipeline( + createReadStream(resolve(__dirname, './mocks/bad-tag-sitemap.xml'), { + encoding: 'utf8', + }), + new XMLToSitemapItemStream({ logger, level: ErrorLevel.SILENT }), + new Writable({ + objectMode: true, + write(chunk, a, cb): void { + sitemap.push(chunk); + cb(); + }, + }) + ); + expect(logger.mock.calls.length).toBe(2); + }); + it('stream parses XML with cdata', async () => { const sitemap: SitemapStreamOptions[] = []; await pipeline(