Skip to content

Commit

Permalink
fix: Fix issue #21 by implementing a better parser for img and video …
Browse files Browse the repository at this point in the history
…blocks.
  • Loading branch information
ericof committed Mar 6, 2023
1 parent ebea336 commit b49614e
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 14 deletions.
39 changes: 35 additions & 4 deletions src/converters/fromHtml.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,41 @@ const parser = new DOMParser();

global.document = new JSDOM('...').window.document;

const TEXT = 3;
const TEXT_NODE = 3;
const COMMENT = 8;

const elementsWithConverters = ['IMG', 'VIDEO', 'TABLE', 'IFRAME'];
const elementsShouldHaveText = [
'B',
'BLOCKQUOTE',
'BODY',
'CODE',
'DEL',
'DIV',
'EM',
'H1',
'H2',
'H3',
'H4',
'H5',
'H6',
'I',
'P',
'PRE',
'S',
'SPAN',
'STRONG',
'SUB',
'SUP',
'U',
];

const shouldKeepWrapper = (el) => {
if (elementsShouldHaveText.includes(el.tagName)) {
return el.textContent ? true : false;
}
return true;
};

const blockFromElement = (el, defaultTextBlock) => {
let textBlock = slateTextBlock;
Expand Down Expand Up @@ -52,13 +83,13 @@ const skipCommentsAndWhitespace = (elements) => {
(node) =>
!(
node.nodeType === COMMENT ||
(node.nodeType === TEXT && isWhitespace(node.textContent))
(node.nodeType === TEXT_NODE && isWhitespace(node.textContent))
),
);
};

const isInline = (n) =>
n.nodeType === TEXT || isGlobalInline(n.tagName.toLowerCase());
n.nodeType === TEXT_NODE || isGlobalInline(n.tagName.toLowerCase());

const convertFromHTML = (input, defaultTextBlock) => {
const document = parser.parseFromString(input, 'text/html');
Expand Down Expand Up @@ -87,7 +118,7 @@ const convertFromHTML = (input, defaultTextBlock) => {
// convert to blocks
for (const el of elements) {
const children = el.childNodes;
let keepWrapper = el.textContent ? true : false;
let keepWrapper = shouldKeepWrapper(el);
for (const child of children) {
// With children nodes, we keep the wrapper only
// if at least one child is not in elementsWithConverters
Expand Down
71 changes: 62 additions & 9 deletions src/converters/fromHtml.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ describe('convertFromHTML parsing html with images nested in h2', () => {
const result = convertFromHTML(html, 'draftjs');

test('will return an array of blocks', () => {
expect(result).toHaveLength(8);
expect(result).toHaveLength(10);
});

test('will have a first block with an image', () => {
Expand Down Expand Up @@ -113,7 +113,7 @@ describe('convertFromHTML parsing html with images nested in h2', () => {
const result = convertFromHTML(html, 'slate');

test('will return an array of blocks', () => {
expect(result).toHaveLength(8);
expect(result).toHaveLength(10);
});

test('will have a first block with an image', () => {
Expand Down Expand Up @@ -330,10 +330,12 @@ describe('convertFromHTML parsing whitespace inside unknown tags', () => {
});
});

describe('convertFromHTML parsing image inside a p element', () => {
const html = '<p><img src="image.jpeg"></p>';
describe('convertFromHTML parsing image', () => {
// https://github.com/plone/blocks-conversion-tool/issues/21

describe('on its own', () => {
const html = '<img src="image.jpeg">';

describe('returns a block with an image', () => {
const result = convertFromHTML(html, 'slate');
expect(result).toHaveLength(1);
expect(result).toEqual([
Expand All @@ -347,12 +349,63 @@ describe('convertFromHTML parsing image inside a p element', () => {
},
]);
});
});

describe('convertFromHTML parsing image inside a span element', () => {
const html = '<p><span><img src="image.jpeg"></span></p>';
describe('inside a p element', () => {
const html = '<p><img src="image.jpeg"></p>';

const result = convertFromHTML(html, 'slate');
expect(result).toHaveLength(1);
expect(result).toEqual([
{
'@type': 'image',
align: 'center',
alt: '',
size: 'l',
title: '',
url: 'image.jpeg',
},
]);
});

describe('inside a span element', () => {
const html = '<p><span><img src="image.jpeg"></span></p>';

const result = convertFromHTML(html, 'slate');
expect(result).toHaveLength(1);
expect(result).toEqual([
{
'@type': 'image',
align: 'center',
alt: '',
size: 'l',
title: '',
url: 'image.jpeg',
},
]);
});

describe('inside a div element', () => {
// https://github.com/plone/blocks-conversion-tool/issues/21#issuecomment-1455176066
const html = '<div><img src="image.jpeg"></div>';

const result = convertFromHTML(html, 'slate');
expect(result).toHaveLength(1);
expect(result).toEqual([
{
'@type': 'image',
align: 'center',
alt: '',
size: 'l',
title: '',
url: 'image.jpeg',
},
]);
});

describe('inside a nested div element', () => {
// https://github.com/plone/blocks-conversion-tool/issues/21#issuecomment-1455176066
const html = '<div><div><img src="image.jpeg"></div></div>';

describe('returns valid result preserving the whitespace', () => {
const result = convertFromHTML(html, 'slate');
expect(result).toHaveLength(1);
expect(result).toEqual([
Expand Down
16 changes: 15 additions & 1 deletion src/converters/slate.js
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,21 @@ const bodyTagDeserializer = (el) => {
};

const divTagDeserializer = (el) => {
const children = Array.from(el.childNodes)
let children = el.childNodes;
if (children.length === 1) {
const child = children[0];
if (
// handle formatting from OpenOffice
child.nodeType === TEXT_NODE &&
child.textContent === '\n'
) {
return jsx('text', {}, ' ');
} else if (elementsWithConverters.hasOwnProperty(child.tagName)) {
// If we have a child element that has its own converter, use it
return elementsWithConverters[child.tagName](child);
}
}
children = Array.from(children)
.map((child) => {
if (child.nodeType === TEXT_NODE) {
let value = deserialize(child);
Expand Down

0 comments on commit b49614e

Please sign in to comment.