Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use HTML to store rich text content #373

Merged
merged 2 commits into from
Jan 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions datahub/server/lib/richtext.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from html import escape as htmlescape
import json
from bs4 import BeautifulSoup


def richtext_to_plaintext(text, default="", escape=False) -> str:
is_draftjs, text = try_parse_draftjs(text or default)

if not is_draftjs:
text = html_to_plaintext(text)

if escape:
text = htmlescape(text)
return text


def try_parse_draftjs(text) -> str:
try:
content_state = json.loads(text)
return True, draftjs_content_state_to_plaintext(content_state)
except json.decoder.JSONDecodeError:
# For old text cells the value was plain text
return False, text


def draftjs_content_state_to_plaintext(content_state) -> str:
blocks = content_state.get("blocks", [])
blocks_text = [block.get("text", "") for block in blocks]
joined_blocks = "\n".join(blocks_text)
return joined_blocks


def html_to_plaintext(html) -> str:
soup = BeautifulSoup(html)
return soup.getText()
32 changes: 6 additions & 26 deletions datahub/server/logic/elasticsearch.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from html import escape
import math
import json
import re

from const.impression import ImpressionItemType
Expand All @@ -14,6 +13,7 @@
from lib.utils.decorators import in_mem_memoized
from lib.logger import get_logger
from lib.config import get_config_value
from lib.richtext import richtext_to_plaintext
from app.db import (
# TODO: We should use follower db instead
with_session,
Expand Down Expand Up @@ -99,7 +99,7 @@ def datadocs_to_es(datadoc, session=None):
cells_as_text = []
for cell in datadoc.cells:
if cell.cell_type == DataCellType.text:
cells_as_text.append(simple_parse_draftjs_content_state(cell.context) or "")
cells_as_text.append(richtext_to_plaintext(cell.context))
elif cell.cell_type == DataCellType.query:
cell_title = cell.meta.get("title", "")
cell_text = (
Expand Down Expand Up @@ -136,23 +136,6 @@ def datadocs_to_es(datadoc, session=None):
return expand_datadoc


@with_exception
def simple_parse_draftjs_content_state(value):
try:
if value is None:
return ""
content_state = json.loads(value)
except Exception:
# For old text cells the value was plain text
LOG.debug("Text cell is not json, content: {}".format(value))
return value

blocks = content_state.get("blocks", [])
blocks_text = [block.get("text", "") for block in blocks]
joined_blocks = "\n".join(blocks_text)
return joined_blocks


@with_exception
def _bulk_insert_datadocs():
type_name = ES_CONFIG["datadocs"]["type_name"]
Expand Down Expand Up @@ -244,13 +227,10 @@ def table_to_es(table, session=None):
column_names = list(map(lambda c: c.name, table.columns))
schema_name = schema.name
table_name = table.name
description = escape(
(
simple_parse_draftjs_content_state(table.information.description)
if table.information
else ""
)
or ""
description = (
richtext_to_plaintext(table.information.description, escape=True)
if table.information
else ""
)

full_name = "{}.{}".format(schema_name, table_name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import scrollIntoView from 'smooth-scroll-into-view-if-needed';

import { RichTextEditor } from 'ui/RichTextEditor/RichTextEditor';
import { ISearchAndReplaceContextType } from 'context/searchAndReplace';
import { LinkDecorator } from 'lib/draft-js-utils';
import { LinkDecorator } from 'lib/richtext';
import { makeSearchHighlightDecorator } from 'components/SearchAndReplace/SearchHighlightDecorator';

export const DraftJsSearchHighlighter: React.FC<{
Expand Down
14 changes: 6 additions & 8 deletions datahub/webapp/lib/batch/datadoc-save-manager.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import { convertToRaw, ContentState } from 'draft-js';
import type { ContentState } from 'draft-js';

import { IDataCellMeta } from 'const/datadoc';
import type { IDataCellMeta } from 'const/datadoc';
import ds from 'lib/datasource';
import { BatchManager, spreadMergeFunction } from 'lib/batch/batch-manager';
import dataDocSocket from 'lib/data-doc/datadoc-socketio';
import { convertIfContentStateToHTML } from 'lib/richtext/serialize';

export class DataDocSaveManager {
private dataDocSaverByDocId: Record<
Expand Down Expand Up @@ -65,12 +66,9 @@ export class DataCellSaveManager {
this.itemSaverByCellId[cellId] = new BatchManager({
mergeFunction: spreadMergeFunction,
processFunction: (data) => {
const stringifiedContext =
data.context != null
? typeof data.context === 'string'
? data.context
: JSON.stringify(convertToRaw(data.context))
: undefined;
const stringifiedContext = convertIfContentStateToHTML(
data.context
);

const fields = {
...(stringifiedContext != null && {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,29 +47,6 @@ export function isSoftNewLineEvent(event: React.KeyboardEvent) {
);
}

// Used mainly for chart descriptions, which could still be strings or raw
export function convertRawToContentState(raw: string): DraftJs.ContentState {
try {
if (raw) {
const result = JSON.parse(raw);
return DraftJs.convertFromRaw(result);
} else {
return DraftJs.ContentState.createFromText('');
}
} catch (e) {
const htmlBlocks = DraftJs.convertFromHTML((raw as string) || '');

const contentState = htmlBlocks.contentBlocks
? DraftJs.ContentState.createFromBlockArray(
htmlBlocks.contentBlocks,
htmlBlocks.entityMap
)
: DraftJs.ContentState.createFromText('');

return contentState;
}
}

export type RichTextEditorCommand =
| DraftJs.DraftEditorCommand
| 'show-link-input';
Expand Down
34 changes: 34 additions & 0 deletions datahub/webapp/lib/richtext/serialize.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import * as DraftJs from 'draft-js';
import { stateToHTML } from 'draft-js-export-html';
import { stateFromHTML } from 'draft-js-import-html';

// Used mainly for chart descriptions, which could still be strings or raw
export function convertRawToContentState(
raw: string,
defaultVal = ''
): DraftJs.ContentState {
try {
if (raw) {
const result = JSON.parse(raw);
return DraftJs.convertFromRaw(result);
} else {
return DraftJs.ContentState.createFromText(defaultVal);
}
} catch (e) {
return stateFromHTML(raw || '');
}
}

export function convertContentStateToHTML(contentState: DraftJs.ContentState) {
return stateToHTML(contentState);
}

export function convertIfContentStateToHTML(
context: DraftJs.ContentState | string
) {
return context != null
? typeof context === 'string'
? context
: convertContentStateToHTML(context)
: undefined;
}
2 changes: 1 addition & 1 deletion datahub/webapp/redux/dataDoc/action.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import {
DataCellSaveManager,
} from 'lib/batch/datadoc-save-manager';
import { getQueryEngineId } from 'lib/utils';
import { convertRawToContentState } from 'lib/draft-js-utils';
import { convertRawToContentState } from 'lib/richtext/serialize';
import dataDocSocket from 'lib/data-doc/datadoc-socketio';
import {
IUpdateDataDocPollingAction,
Expand Down
12 changes: 7 additions & 5 deletions datahub/webapp/redux/dataSources/action.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { normalize, schema } from 'normalizr';
import { convertToRaw, ContentState } from 'draft-js';
import type { ContentState } from 'draft-js';
import JSONBig from 'json-bigint';

import {
Expand All @@ -16,7 +16,10 @@ import {
IDataTableOwnership,
ITableStats,
} from 'const/metastore';
import { convertRawToContentState } from 'lib/draft-js-utils';
import {
convertContentStateToHTML,
convertRawToContentState,
} from 'lib/richtext/serialize';
import ds from 'lib/datasource';
import {
IReceiveDataTableAction,
Expand Down Expand Up @@ -182,7 +185,7 @@ export function updateDataTable(
const params: Partial<IDataTable> = {};

if (description != null) {
params.description = JSON.stringify(convertToRaw(description));
params.description = convertContentStateToHTML(description);
}
if (golden != null) {
params.golden = golden;
Expand All @@ -209,9 +212,8 @@ export function updateDataColumnDescription(
description: ContentState
): ThunkResult<Promise<void>> {
return async (dispatch) => {
const raw = JSON.stringify(convertToRaw(description));
const params = {
description: raw,
description: convertContentStateToHTML(description),
};
try {
const { data } = await ds.update(`/column/${columnId}/`, params);
Expand Down
2 changes: 1 addition & 1 deletion datahub/webapp/ui/RichTextEditor/RichTextEditor.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import {
isListBlock,
RichTextEditorCommand,
RichTextEditorStyleMap,
} from 'lib/draft-js-utils';
} from 'lib/richtext';
import * as Utils from 'lib/utils';
import { matchKeyPress } from 'lib/utils/keyboard';

Expand Down
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "datahub",
"version": "2.4.2",
"version": "2.5.0",
"description": "A Big Data Webapp",
"private": true,
"scripts": {
Expand Down Expand Up @@ -48,6 +48,8 @@
"d3": "5.7.0",
"dagre-d3": "0.6.1",
"draft-js": "0.11.5",
"draft-js-export-html": "^1.4.1",
"draft-js-import-html": "^1.4.1",
"fast-json-stable-stringify": "2.0.0",
"feather-icons": "4.28.0",
"formik": "2.1.4",
Expand Down
32 changes: 32 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -7563,6 +7563,33 @@ downshift@^6.0.6:
prop-types "^15.7.2"
react-is "^17.0.1"

draft-js-export-html@^1.4.1:
version "1.4.1"
resolved "https://registry.yarnpkg.com/draft-js-export-html/-/draft-js-export-html-1.4.1.tgz#7cdad970c6f7f2cdd19ce4c1f5073fdf0f313b4d"
integrity sha512-G4VGBSalPowktIE4wp3rFbhjs+Ln9IZ2FhXeHjsZDSw0a2+h+BjKu5Enq+mcsyVb51RW740GBK8Xbf7Iic51tw==
dependencies:
draft-js-utils "^1.4.0"

draft-js-import-element@^1.4.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/draft-js-import-element/-/draft-js-import-element-1.4.0.tgz#8760acbfeb60ed824a1c8319ec049f702681df66"
integrity sha512-WmYT5PrCm47lGL5FkH6sRO3TTAcn7qNHsD3igiPqLG/RXrqyKrqN4+wBgbcT2lhna/yfWTRtgzAbQsSJoS1Meg==
dependencies:
draft-js-utils "^1.4.0"
synthetic-dom "^1.4.0"

draft-js-import-html@^1.4.1:
version "1.4.1"
resolved "https://registry.yarnpkg.com/draft-js-import-html/-/draft-js-import-html-1.4.1.tgz#c222a3a40ab27dee5874fcf78526b64734fe6ea4"
integrity sha512-KOZmtgxZriCDgg5Smr3Y09TjubvXe7rHPy/2fuLSsL+aSzwUDwH/aHDA/k47U+WfpmL4qgyg4oZhqx9TYJV0tg==
dependencies:
draft-js-import-element "^1.4.0"

draft-js-utils@^1.4.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/draft-js-utils/-/draft-js-utils-1.4.0.tgz#c60af198108f69b0f1df3572555b23836819d1cf"
integrity sha512-8s9FFuKC+lOWGwJ0b3om2PF+uXrqQPaEQlPJI7UxdzxTYGMeKouMPA9+YlPn52zcAVElIZtd2tXj6eQmvlKelw==

draft-js@0.11.5:
version "0.11.5"
resolved "https://registry.yarnpkg.com/draft-js/-/draft-js-0.11.5.tgz#b5dd30c30c9316801ab9766d45a8f88b1cd43b2c"
Expand Down Expand Up @@ -16541,6 +16568,11 @@ synchronous-promise@^2.0.6:
resolved "https://registry.yarnpkg.com/synchronous-promise/-/synchronous-promise-2.0.7.tgz#3574b3d2fae86b145356a4b89103e1577f646fe3"
integrity sha512-16GbgwTmFMYFyQMLvtQjvNWh30dsFe1cAW5Fg1wm5+dg84L9Pe36mftsIRU95/W2YsISxsz/xq4VB23sqpgb/A==

synthetic-dom@^1.4.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/synthetic-dom/-/synthetic-dom-1.4.0.tgz#d988d7a4652458e2fc8706a875417af913e4dd34"
integrity sha512-mHv51ZsmZ+ShT/4s5kg+MGUIhY7Ltq4v03xpN1c8T1Krb5pScsh/lzEjyhrVD0soVDbThbd2e+4dD9vnDG4rhg==

table@^5.2.3:
version "5.4.6"
resolved "https://registry.yarnpkg.com/table/-/table-5.4.6.tgz#1292d19500ce3f86053b05f0e8e7e4a3bb21079e"
Expand Down