Skip to content

Commit

Permalink
add ability to silence logger or replace with own for parser, resolves
Browse files Browse the repository at this point in the history
  • Loading branch information
derduher committed Dec 23, 2020
1 parent f6fc9fc commit 9502668
Show file tree
Hide file tree
Showing 8 changed files with 229 additions and 16 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

## 6.3.5

- Add option to silence or redirect logs from parse #337
- `new XMLToSitemapItemStream({ logger: false })` or
- `new XMLToSitemapItemStream({ level: ErrorLevel.SILENT })` or
- `new XMLToSitemapItemStream({ logger: (level, ...message) => your.custom.logger(...message) })`

## 6.3.4

- bump dependencies
Expand Down
6 changes: 5 additions & 1 deletion api.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,11 @@ const { XMLToSitemapItemStream, ObjectStreamToJSON } = require('sitemap');

createReadStream('./some/sitemap.xml')
// turn the xml into sitemap option item options
.pipe(new XMLToSitemapItemStream())
.pipe(new XMLToSitemapItemStream({
// optional
level: ErrorLevel.Warn // default is WARN pass Silent to silence
logger: false // default is console log, pass false as another way to silence or your own custom logger
}))
// convert the object stream to JSON
.pipe(new ObjectStreamToJSON())
// write the library compatible options to disk
Expand Down
16 changes: 14 additions & 2 deletions examples/parse-existing-xml.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
const { createReadStream, createWriteStream } = require('fs');
const { XMLToSitemapItemStream, ObjectStreamToJSON } = require('sitemap');
const {
XMLToSitemapItemStream,
ObjectStreamToJSON,
ErrorLevel,
} = require('sitemap');

createReadStream('./sitemap.xml')
// turn the xml into sitemap option item options
.pipe(new XMLToSitemapItemStream())
.pipe(
new XMLToSitemapItemStream({
// Optional: pass a logger of your own.
// by default it uses built in console.log/warn
logger: (level, ...message) => console.log(...message),
// Optional, passing SILENT overrides logger
level: ErrorLevel.WARN,
})
)
// convert the object stream to JSON
.pipe(new ObjectStreamToJSON())
// write the library compatible options to disk
Expand Down
51 changes: 40 additions & 11 deletions lib/sitemap-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,19 @@ function newsTemplate(): NewsItem {
title: '',
};
}

type Logger = (
level: 'warn' | 'error' | 'info' | 'log',
...message: Parameters<Console['log']>[0]
) => void;
export interface XMLToSitemapItemStreamOptions extends TransformOptions {
level?: ErrorLevel;
logger?: Logger | false;
}
const defaultStreamOpts: XMLToSitemapItemStreamOptions = {};
const defaultLogger: Logger = (level, ...message) => console[level](...message);
const defaultStreamOpts: XMLToSitemapItemStreamOptions = {
logger: defaultLogger,
};

// TODO does this need to end with `options`
/**
Expand All @@ -71,6 +80,7 @@ const defaultStreamOpts: XMLToSitemapItemStreamOptions = {};
*/
export class XMLToSitemapItemStream extends Transform {
level: ErrorLevel;
logger: Logger;
saxStream: SAXStream;
constructor(opts = defaultStreamOpts) {
opts.objectMode = true;
Expand All @@ -83,6 +93,11 @@ export class XMLToSitemapItemStream extends Transform {
trim: true,
});
this.level = opts.level || ErrorLevel.WARN;
if (this.level !== ErrorLevel.SILENT && opts.logger !== false) {
this.logger = opts.logger ?? defaultLogger;
} else {
this.logger = () => undefined;
}
let currentItem: SitemapItem = tagTemplate();
let currentTag: string;
let currentVideo: VideoItem = videoTemplate();
Expand Down Expand Up @@ -119,11 +134,11 @@ export class XMLToSitemapItemStream extends Transform {
dontpushCurrentLink = true;
currentItem.ampLink = tag.attributes.href.value;
} else {
console.log('unhandled attr for xhtml:link', tag.attributes);
this.logger('log', 'unhandled attr for xhtml:link', tag.attributes);
}
}
} else {
console.warn('unhandled tag', tag.name);
this.logger('warn', 'unhandled tag', tag.name);
}
});

Expand Down Expand Up @@ -284,7 +299,12 @@ export class XMLToSitemapItemStream extends Transform {
break;

default:
console.log('unhandled text for tag:', currentTag, `'${text}'`);
this.logger(
'log',
'unhandled text for tag:',
currentTag,
`'${text}'`
);
break;
}
});
Expand Down Expand Up @@ -325,7 +345,7 @@ export class XMLToSitemapItemStream extends Transform {
break;

default:
console.log('unhandled cdata for tag:', currentTag);
this.logger('log', 'unhandled cdata for tag:', currentTag);
break;
}
});
Expand All @@ -340,7 +360,7 @@ export class XMLToSitemapItemStream extends Transform {
if (attr.name === 'relationship' && isAllowDeny(attr.value)) {
currentVideo['restriction:relationship'] = attr.value;
} else {
console.log('unhandled attr', currentTag, attr.name);
this.logger('log', 'unhandled attr', currentTag, attr.name);
}
break;
case TagNames['video:price']:
Expand All @@ -351,7 +371,7 @@ export class XMLToSitemapItemStream extends Transform {
} else if (attr.name === 'resolution' && isResolution(attr.value)) {
currentVideo['price:resolution'] = attr.value;
} else {
console.log('unhandled attr for video:price', attr.name);
this.logger('log', 'unhandled attr for video:price', attr.name);
}
break;
case TagNames['video:player_loc']:
Expand All @@ -360,14 +380,19 @@ export class XMLToSitemapItemStream extends Transform {
} else if (attr.name === 'allow_embed' && isValidYesNo(attr.value)) {
currentVideo['player_loc:allow_embed'] = attr.value;
} else {
console.log('unhandled attr for video:player_loc', attr.name);
this.logger(
'log',
'unhandled attr for video:player_loc',
attr.name
);
}
break;
case TagNames['video:platform']:
if (attr.name === 'relationship' && isAllowDeny(attr.value)) {
currentVideo['platform:relationship'] = attr.value;
} else {
console.log(
this.logger(
'log',
'unhandled attr for video:platform',
attr.name,
attr.value
Expand All @@ -378,11 +403,15 @@ export class XMLToSitemapItemStream extends Transform {
if (attr.name === 'title') {
currentVideo['gallery_loc:title'] = attr.value;
} else {
console.log('unhandled attr for video:galler_loc', attr.name);
this.logger(
'log',
'unhandled attr for video:galler_loc',
attr.name
);
}
break;
default:
console.log('unhandled attr', currentTag, attr.name);
this.logger('log', 'unhandled attr', currentTag, attr.name);
}
});

Expand Down
2 changes: 1 addition & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "sitemap",
"version": "6.3.4",
"version": "6.3.5",
"description": "Sitemap-generating lib/cli",
"keywords": [
"sitemap",
Expand Down
123 changes: 123 additions & 0 deletions tests/mocks/bad-tag-sitemap.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:news="http://www.google.com/schemas/sitemap-news/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns:video="http://www.google.com/schemas/sitemap-video/1.1">
<foo>
This is not a good tag
</foo>
<url>
<loc>https://roosterteeth.com/episode/rouletsplay-2018-goldeneye-source&amp;%3E%3C'%22</loc>
<changefreq>weekly</changefreq>
<video:video>
<video:thumbnail_loc>https://rtv3-img-roosterteeth.akamaized.net/store/0e841100-289b-4184-ae30-b6a16736960a.jpg/sm/thumb3.jpg&amp;&gt;&lt;'"</video:thumbnail_loc>
<video:title>2018:E6 - GoldenEye: Source&amp;&gt;&lt;'"</video:title>
<video:description>We play gun game in GoldenEye: Source with a good friend of ours. His name is Gruchy. Dan Gruchy.&amp;&gt;&lt;'"</video:description>
<video:player_loc autoplay="ap=1&amp;>&lt;'&quot;" allow_embed="yes">https://roosterteeth.com/embed/rouletsplay-2018-goldeneye-source&amp;&gt;&lt;'"</video:player_loc>
<video:duration>1208</video:duration>
<video:publication_date>2018-04-27T17:00:00.000Z</video:publication_date>
<video:tag>fruit&amp;&gt;&lt;'"</video:tag>
<video:tag>flies&amp;&gt;&lt;'"</video:tag>
<video:requires_subscription>YES</video:requires_subscription>
<video:id type="url">http://example.com/url&amp;&gt;&lt;'"</video:id>
</video:video>
</url>
<url>
<loc>https://roosterteeth.com/episode/let-s-play-2018-minecraft-episode-310&amp;%3E%3C'%22</loc>
<changefreq>weekly</changefreq>
<video:video>
<video:thumbnail_loc>https://rtv3-img-roosterteeth.akamaized.net/store/f255cd83-3d69-4ee8-959a-ac01817fa204.jpg/sm/thumblpchompinglistv2.jpg&amp;&gt;&lt;'"</video:thumbnail_loc>
<video:title>2018:E90 - Minecraft - Episode 310 - Chomping List&amp;&gt;&lt;'"</video:title>
<video:description>Now that the gang's a bit more settled into Achievement Cove, it's time for a competition. Whoever collects the most unique food items by the end of the episode wins. The winner may even receive a certain golden tower.&amp;&gt;&lt;'"</video:description>
<video:player_loc>https://roosterteeth.com/embed/let-s-play-2018-minecraft-episode-310&amp;&gt;&lt;'"</video:player_loc>
<video:duration>3070</video:duration>
<video:expiration_date>2012-07-16T19:20:30+08:00</video:expiration_date>
<video:rating>2.5</video:rating>
<video:view_count>1000</video:view_count>
<video:publication_date>2018-04-27T14:00:00.000Z</video:publication_date>
<video:tag>steak&amp;&gt;&lt;'"</video:tag>
<video:category>Baking&amp;&gt;&lt;'"</video:category>
<video:family_friendly>no</video:family_friendly>
<video:restriction relationship="deny">IE GB US CA</video:restriction>
<video:gallery_loc title="awhu series page&amp;>&lt;'&quot;">https://roosterteeth.com/series/awhu&amp;&gt;&lt;'"</video:gallery_loc>
<video:price resolution="HD" currency="USD" type="rent">1.99</video:price>
<video:requires_subscription>no</video:requires_subscription>
<video:uploader>GrillyMcGrillerson&amp;&gt;&lt;'"</video:uploader>
<video:platform relationship="allow">tv</video:platform>
<video:live>no</video:live>
</video:video>
</url>
<url>
<loc>https://roosterteeth.com/episode/let-s-watch-2018-house-party-part-2</loc>
<lastmod>2016-09-12T00:00:00.000Z</lastmod>
<changefreq>daily</changefreq>
<priority>0.6</priority>
<video:video>
<video:thumbnail_loc>https://rtv3-img-roosterteeth.akamaized.net/store/9dd9681a-0557-45fe-86b3-b662c91bbae7.jpg/sm/thumblwhouseparty2v4.jpg&amp;&gt;&lt;'"</video:thumbnail_loc>
<video:title>2018:E10 - House Party - Part 2 (Uncensored)&amp;&gt;&lt;'"</video:title>
<video:description>Achievement Hunter's House Party quest for some one-night intimacy continues. Can they use Ashley and Madison's sibling rivalry for their own dubious gains?&amp;&gt;&lt;'"</video:description>
<video:player_loc>https://roosterteeth.com/embed/let-s-watch-2018-house-party-part-2&amp;&gt;&lt;'"</video:player_loc>
<video:duration>2422</video:duration>
<video:publication_date>2018-04-26T17:00:00.000Z</video:publication_date>
<video:requires_subscription>no</video:requires_subscription>
</video:video>
<xhtml:link rel="alternate" hreflang="en" href="http://test.com/page-1/&amp;%3E%3C'%22"/>
<xhtml:link rel="alternate" hreflang="ja" href="http://test.com/page-1/ja/&amp;%3E%3C'%22"/>
<xhtml:link rel="alternate" href="android-app://com.company.test/page-1/&amp;>&lt;'&quot;"/>
<xhtml:link rel="amphtml" href="http://ampproject.org/article.amp.html&amp;>&lt;'&quot;"/>
</url>
<url>
<loc>http://www.example.org/business/article55.html&amp;%3E%3C'%22</loc>
<lastmod>2015-06-27T15:30:00.000Z</lastmod>
<news:news>
<news:publication>
<news:name>The Example Times&amp;&gt;&lt;'"</news:name>
<news:language>en</news:language>
</news:publication>
<news:access>Registration</news:access>
<news:genres>PressRelease, Blog</news:genres>
<news:publication_date>2008-12-23</news:publication_date>
<news:title>Companies A, B in Merger Talks&amp;&gt;&lt;'"</news:title>
<news:keywords>business, merger, acquisition, A, B&amp;&gt;&lt;'"</news:keywords>
<news:stock_tickers>NASDAQ:A, NASDAQ:B</news:stock_tickers>
</news:news>
</url>
<url>
<loc>http://example.com/2&amp;%3E%3C'%22</loc>
<lastmod>2011-06-27T00:00:00.000Z</lastmod>
<changefreq>always</changefreq>
<priority>0.9</priority>
<image:image>
<image:loc>http://test.com/img1.jpg&amp;%3E%3C'%22</image:loc>
<image:caption>An image&amp;&gt;&lt;'"</image:caption>
<image:geo_location>London, United Kingdom&amp;&gt;&lt;'"</image:geo_location>
<image:title>The Title of Image One&amp;&gt;&lt;'"</image:title>
<image:license>https://creativecommons.org/licenses/by/4.0/&amp;&gt;&lt;'"</image:license>
</image:image>
<image:image>
<image:loc>http://test.com/img2.jpg&amp;%3E%3C'%22</image:loc>
<image:caption>Another image&amp;&gt;&lt;'"</image:caption>
<image:geo_location>London, United Kingdom&amp;&gt;&lt;'"</image:geo_location>
<image:title>The Title of Image Two&amp;&gt;&lt;'"</image:title>
<image:license>https://creativecommons.org/licenses/by/4.0/&amp;&gt;&lt;'"</image:license>
</image:image>
</url>
<url>
<loc>http://example.com/1&amp;%3E%3C'%22</loc>
<lastmod>2011-06-27T00:00:00.000Z</lastmod>
<changefreq>always</changefreq>
<priority>0.9</priority>
<image:image>
<image:loc>http://urltest.com&amp;&gt;&lt;'"/</image:loc>
</image:image>
<image:image>
<image:loc>http://example.com/img.jpg&amp;%3E%3C'%22</image:loc>
</image:image>
</url>
<url>
<loc>http://example.com&amp;&gt;&lt;'"/</loc>
<lastmod>2011-06-27T00:00:00.000Z</lastmod>
<changefreq>always</changefreq>
<priority>0.9</priority>
<image:image>
<image:loc>http://urltest.com&amp;&gt;&lt;'"/</image:loc>
</image:image>
</url>
</urlset>
38 changes: 38 additions & 0 deletions tests/sitemap-parser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
ObjectStreamToJSON,
} from '../lib/sitemap-parser';
import { SitemapStreamOptions } from '../dist';
import { ErrorLevel } from '../lib/types';
const pipeline = promisify(pipe);
// eslint-disable-next-line @typescript-eslint/no-var-requires
const normalizedSample = require('./mocks/sampleconfig.normalized.json');
Expand Down Expand Up @@ -41,6 +42,43 @@ describe('XMLToSitemapItemStream', () => {
expect(sitemap).toEqual(normalizedSample.urls);
});

it('stream parses bad XML', async () => {
const sitemap: SitemapStreamOptions[] = [];
const logger = jest.fn();
await pipeline(
createReadStream(resolve(__dirname, './mocks/bad-tag-sitemap.xml'), {
encoding: 'utf8',
}),
new XMLToSitemapItemStream({ logger }),
new Writable({
objectMode: true,
write(chunk, a, cb): void {
sitemap.push(chunk);
cb();
},
})
);
expect(sitemap).toEqual(normalizedSample.urls);
expect(logger.mock.calls.length).toBe(2);
expect(logger.mock.calls[0][1]).toBe('unhandled tag');
expect(logger.mock.calls[0][2]).toBe('foo');

await pipeline(
createReadStream(resolve(__dirname, './mocks/bad-tag-sitemap.xml'), {
encoding: 'utf8',
}),
new XMLToSitemapItemStream({ logger, level: ErrorLevel.SILENT }),
new Writable({
objectMode: true,
write(chunk, a, cb): void {
sitemap.push(chunk);
cb();
},
})
);
expect(logger.mock.calls.length).toBe(2);
});

it('stream parses XML with cdata', async () => {
const sitemap: SitemapStreamOptions[] = [];
await pipeline(
Expand Down

0 comments on commit 9502668

Please sign in to comment.