Skip to content

Commit

Permalink
Merge pull request #126 from curbengh/camaro
Browse files Browse the repository at this point in the history
test: use camaro to parse xml
  • Loading branch information
curbengh authored Dec 27, 2019
2 parents 98408e4 + 6c98ca4 commit 76c220b
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 67 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
},
"devDependencies": {
"babel-eslint": "^10.0.1",
"camaro": "^4.1.2",
"chai": "^4.2.0",
"cheerio": "^0.22.0",
"eslint": "^6.0.1",
Expand Down
129 changes: 62 additions & 67 deletions test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const { join } = require('path');
const { readFileSync } = require('fs');
const cheerio = require('cheerio');
const { encodeURL, full_url_for } = require('hexo-util');
const p = require('./parse');

env.addFilter('uriencode', str => {
return encodeURI(str);
Expand Down Expand Up @@ -41,15 +42,14 @@ describe('Feed generator', () => {
let posts = {};
let locals = {};

before(() => {
return Post.insert([
before(async () => {
await Post.insert([
{source: 'foo', slug: 'foo', content: '<h6>TestHTML</h6>', date: 1e8},
{source: 'bar', slug: 'bar', date: 1e8 + 1},
{source: 'baz', slug: 'baz', title: 'With Image', image: 'test.png', date: 1e8 - 1}
]).then(data => {
posts = Post.sort('-date');
locals = hexo.locals.toObject();
});
]);
posts = Post.sort('-date');
locals = hexo.locals.toObject();
});

it('type = atom', () => {
Expand Down Expand Up @@ -109,93 +109,87 @@ describe('Feed generator', () => {
}));
});

it('Preserves HTML in the content field', () => {
it('Preserves HTML in the content field - atom', async () => {
hexo.config.feed = {
type: 'rss2',
path: 'rss2.xml',
type: 'atom',
path: 'atom.xml',
content: true
};
let feedCfg = hexo.config.feed;
let result = generator(locals, feedCfg.type, feedCfg.path);
let $ = cheerio.load(result.data, {xmlMode: true});

let description = $('content\\:encoded').html()
.replace(/^<!\[CDATA\[/, '')
.replace(/\]\]>$/, '');
const feedCfg = hexo.config.feed;
const result = generator(locals, feedCfg.type, feedCfg.path);
const atom = await p(result.data);

description.should.be.equal('<h6>TestHTML</h6>');
atom.items[1].description.includes('<h6>TestHTML</h6>').should.eql(true);
});

it('Preserves HTML in the content field - rss2', async () => {
hexo.config.feed = {
type: 'atom',
path: 'atom.xml',
type: 'rss2',
path: 'rss2.xml',
content: true
};
feedCfg = hexo.config.feed;
result = generator(locals, feedCfg.type, feedCfg.path);
$ = cheerio.load(result.data, {xmlMode: true});
description = $('content[type="html"]').html()
.replace(/^<!\[CDATA\[/, '')
.replace(/\]\]>$/, '');

description.should.be.equal('<h6>TestHTML</h6>');
const feedCfg = hexo.config.feed;
const result = generator(locals, feedCfg.type, feedCfg.path);
const rss = await p(result.data);

rss.items[1].description.includes('<h6>TestHTML</h6>').should.eql(true);
});

it('Relative URL handling', () => {
it('Relative URL handling', async () => {
hexo.config.feed = {
type: 'atom',
path: 'atom.xml'
};

const checkURL = function(url, root, valid) {
const checkURL = async function(url, root, valid) {
hexo.config.url = url;
hexo.config.root = root;

const feedCfg = hexo.config.feed;
const result = generator(locals, feedCfg.type, feedCfg.path);
const $ = cheerio.load(result.data);

$('feed>id').text().should.eql(valid);
const atom = await p(result.data);
atom.id.should.eql(valid);
};

checkURL('http://localhost/', '/', 'http://localhost/');
await checkURL('http://localhost/', '/', 'http://localhost/');

const GOOD = 'http://localhost/blog/';

checkURL('http://localhost/blog', '/blog/', GOOD);
checkURL('http://localhost/blog', '/blog', GOOD);
checkURL('http://localhost/blog/', '/blog/', GOOD);
checkURL('http://localhost/blog/', '/blog', GOOD);
await checkURL('http://localhost/blog', '/blog/', GOOD);
await checkURL('http://localhost/blog', '/blog', GOOD);
await checkURL('http://localhost/blog/', '/blog/', GOOD);
await checkURL('http://localhost/blog/', '/blog', GOOD);

checkURL('http://localhost/b/l/o/g', '/', 'http://localhost/b/l/o/g/');
await checkURL('http://localhost/b/l/o/g', '/', 'http://localhost/b/l/o/g/');

});

it('IDN handling', () => {
it('IDN handling', async () => {
hexo.config.feed = {
type: 'atom',
path: 'atom.xml'
};

const checkURL = function(url, root) {
const checkURL = async function(url, root) {
hexo.config.url = url;
hexo.config.root = root;

const feedCfg = hexo.config.feed;
const result = generator(locals, feedCfg.type, feedCfg.path);
const $ = cheerio.load(result.data);

if (url[url.length - 1] !== '/') url += '/';
const punyIDN = encodeURL(url);
$('feed>id').text().should.eql(punyIDN);
const atom = await p(result.data);
atom.id.should.eql(punyIDN);
};

checkURL('http://gôg.com/', '/');
await checkURL('http://gôg.com/', '/');

checkURL('http://gôg.com/bár', '/bár/');
await checkURL('http://gôg.com/bár', '/bár/');
});

it('Root encoding', () => {
it('Root encoding', async () => {
const file = 'atom.xml';
hexo.config.feed = {
type: 'atom',
Expand All @@ -204,49 +198,50 @@ describe('Feed generator', () => {

const domain = 'http://example.com/';

const checkURL = function(root, valid) {
const checkURL = async function(root, valid) {
hexo.config.url = domain;
hexo.config.root = root;

const feedCfg = hexo.config.feed;
const result = generator(locals, feedCfg.type, feedCfg.path);
const $ = cheerio.load(result.data);

$('feed>link').attr('href').should.eql(valid);
const atom = await p(result.data);
atom.link.should.eql(valid);
};
checkURL('/', '/' + file);

checkURL('blo g/', 'blo%20g/' + file);
await checkURL('/', '/' + file);

await checkURL('blo g/', 'blo%20g/' + file);
});

it('Prints an enclosure on `image` metadata', () => {
it('Prints an enclosure on `image` metadata', async () => {
hexo.config.feed = {
type: 'atom',
path: 'atom.xml'
};

const checkURL = function(url, root, selector) {
const checkURL = async function(url, root, index) {
hexo.config.url = url;
hexo.config.root = root;

const feedCfg = hexo.config.feed;
const result = generator(locals, feedCfg.type, feedCfg.path);
const $ = cheerio.load(result.data);

$(selector).length.should.eq(1);
const feed = await p(result.data);
feed.items[index].image.should.not.eql('');
};

checkURL('http://localhost/', '/', 'feed>entry:nth-of-type(3)>content[type="image"]');
await checkURL('http://localhost/', '/', 2);

hexo.config.feed = {
type: 'rss2',
path: 'rss2.xml',
content: true
};
checkURL('http://localhost/', '/', 'item:nth-of-type(3)>enclosure');
await checkURL('http://localhost/', '/', 2);
});

it('Icon (atom)', () => {
it('Icon (atom)', async () => {
hexo.config.url = 'http://example.com';
hexo.config.root = '/';

Expand All @@ -258,12 +253,12 @@ describe('Feed generator', () => {

const feedCfg = hexo.config.feed;
const result = generator(locals, feedCfg.type, feedCfg.path);
const $ = cheerio.load(result.data);
const atom = await p(result.data);

$('feed>icon').text().should.eql(full_url_for.call(hexo, hexo.config.feed.icon));
atom.icon.should.eql(full_url_for.call(hexo, hexo.config.feed.icon));
});

it('Icon (atom) - no icon', () => {
it('Icon (atom) - no icon', async () => {
hexo.config.feed = {
type: 'atom',
path: 'atom.xml',
Expand All @@ -272,12 +267,12 @@ describe('Feed generator', () => {

const feedCfg = hexo.config.feed;
const result = generator(locals, feedCfg.type, feedCfg.path);
const $ = cheerio.load(result.data);
const atom = await p(result.data);

$('feed>icon').length.should.eql(0);
atom.icon.length.should.eql(0);
});

it('Icon (rss2)', () => {
it('Icon (rss2)', async () => {
hexo.config.url = 'http://example.com';
hexo.config.root = '/';

Expand All @@ -289,12 +284,12 @@ describe('Feed generator', () => {

const feedCfg = hexo.config.feed;
const result = generator(locals, feedCfg.type, feedCfg.path);
const $ = cheerio.load(result.data);
const rss = await p(result.data);

$('rss>channel>image>url').text().should.eql(full_url_for.call(hexo, hexo.config.feed.icon));
rss.icon.url.should.eql(full_url_for.call(hexo, hexo.config.feed.icon));
});

it('Icon (rss2) - no icon', () => {
it('Icon (rss2) - no icon', async () => {
hexo.config.feed = {
type: 'rss2',
path: 'rss2.xml',
Expand All @@ -303,9 +298,9 @@ describe('Feed generator', () => {

const feedCfg = hexo.config.feed;
const result = generator(locals, feedCfg.type, feedCfg.path);
const $ = cheerio.load(result.data);
const rss = await p(result.data);

$('rss>channel>image').length.should.eql(0);
rss.icon.url.length.should.eql(0);
});

it('path must follow order of type', () => {
Expand Down
72 changes: 72 additions & 0 deletions test/parse.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
'use strict';

/* !
* Ported from feed-furious 1.0.0 to support async-ed camaro v4+
* Licensed MIT (c) 2017 Tuan Anh Tran <https://tuananh.org/>
* https://github.com/tuananh/feed-furious
*/


const { transform } = require('camaro');

const template = {
rss: {
title: 'rss/channel/title',
link: 'rss/channel/link|rss/channel/atom:link',
icon: {
url: 'rss/channel/image/url',
title: 'rss/channel/image/title',
link: 'rss/channel/image/link'
},
description: 'rss/channel/description',
language: 'rss/channel/language',
updated: 'rss/channel/lastBuildDate',
published: 'rss/channel/pubDate',
items: ['//item', {
title: 'title',
link: 'link',
description: 'description',
content: 'content:encoded',
image: 'enclosure[@type="image"]/@url',
date: 'pubDate',
id: 'guid',
categories: ['category', '.']
}]
},
atom: {
title: 'feed/title',
icon: 'feed/icon',
updated: 'feed/updated',
link: 'feed/link/@href',
id: 'feed/id',
items: ['//entry', {
id: 'id',
title: 'title',
date: 'published',
description: 'summary',
content: 'content[@type="html"]',
image: 'content[@type="image"]/@src',
link: 'link',
categories: ['category', '@term']
}]
}
};

const detectFeedType = async xml => {
const sample = await transform(xml, {
rss: 'rss/channel/title',
atom: 'feed/title'
});

if (sample.rss) return 'rss';
if (sample.atom) return 'atom';
throw new Error('unknown feed type');
};

const parseFeed = async xml => {
const type = await detectFeedType(xml);
const output = await transform(xml, template[type]);
return output;
};

module.exports = parseFeed;

0 comments on commit 76c220b

Please sign in to comment.