Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse text to entities with commonmark #11

Merged
merged 5 commits into from
Feb 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions jest.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module.exports = {
testMatch: ['**/?(*.)+(spec|test).js?(x)'],
}
8 changes: 6 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
"prepare": "yarn tsc",
"postinstall": "yarn tsc || true",
"build": "yarn tsc",
"lint": "eslint src --ext ts,tsx,js,jsx --cache"
"lint": "eslint src --ext ts,tsx,js,jsx --cache",
"test": "jest"
},
"husky": {
"hooks": {
Expand All @@ -15,12 +16,14 @@
},
"dependencies": {
"bluebird": "^3.7.2",
"commonmark": "^0.29.3",
"image-size": "^0.9.3",
"jest": "^26.6.3",
"lodash": "^4.17.20",
"matrix-js-sdk": "^9.6.0",
"node-localstorage": "^2.1.6",
"react": "https://github.com/TextsHQ/react-global-shim#3aa861a",
"olm": "https://packages.matrix.org/npm/olm/olm-3.2.1.tgz",
"react": "https://github.com/TextsHQ/react-global-shim#3aa861a",
"rimraf": "^3.0.2"
},
"peerDependencies": {
Expand All @@ -29,6 +32,7 @@
"devDependencies": {
"@textshq/eslint-config": "https://github.com/TextsHQ/eslint-config",
"@textshq/platform-sdk": "link:../platform-sdk",
"@types/jest": "^26.0.20",
"eslint": "^7.20.0",
"husky": "^4.3.0",
"typescript": "^4.1.5"
Expand Down
68 changes: 33 additions & 35 deletions src/mappers.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { Message, Thread, MessageActionType, MessageAttachmentType, texts } from '@textshq/platform-sdk'
import MatrixClient from './matrix-client'
import { mapTextAttributes } from './text-attributes'

const stripAtMark = (name) => name.startsWith('@') ? name.slice(1) : name

Expand Down Expand Up @@ -80,39 +81,43 @@ export function mapMessage(
event,
fresh = false,
): Message {
let text
let action = null
let attachments = []
let reactions = []
let isDeleted = false
let linkedMessageID
const senderID = event.getSender()
const mapped: Message = {
_original: JSON.stringify(event),
id: event.getId(),
timestamp: event.getDate(),
senderID,
text: '',
isSender: userID === senderID,
attachments: [],
reactions: [],
}

const eventType = event.getType()
texts.log(eventType, event)
switch (eventType) {
case 'm.room.encryption': {
action = {
mapped.action = {
type: MessageActionType.THREAD_TITLE_UPDATED,
title: 'Encrypted',
actorParticipantID: senderID,
}
text = 'Encryption enabled'
mapped.text = 'Encryption enabled'
break
}
case 'm.room.member': {
const { membership } = event.getContent()
let type
if (membership === 'join') {
type = MessageActionType.THREAD_PARTICIPANTS_ADDED
text = `${senderID} joined the room`
mapped.text = `${senderID} joined the room`
} else if (membership === 'leave') {
type = MessageActionType.THREAD_PARTICIPANTS_REMOVED
text = `${senderID} left the room`
mapped.text = `${senderID} left the room`
} else {
return null
}
action = {
mapped.action = {
type,
title: event.getContent().membership,
actorParticipantID: senderID,
Expand All @@ -126,15 +131,15 @@ export function mapMessage(
if (!redactedBy) {
return
}
isDeleted = true
text = `Message deleted by ${redactedBy}`
mapped.isDeleted = true
mapped.text = `Message deleted by ${redactedBy}`
break
}
const annotationRelations = room
.getUnfilteredTimelineSet()
.getRelationsForEvent(event.getId(), 'm.annotation', 'm.reaction')
if (annotationRelations) {
reactions = annotationRelations.getRelations().map(ev => ({
mapped.reactions = annotationRelations.getRelations().map(ev => ({
id: ev.getId(),
reactionKey: ev.getRelation().key,
participantID: ev.getSender(),
Expand All @@ -146,23 +151,28 @@ export function mapMessage(
case 'm.bad.encrypted':
case 'm.notice':
case 'm.text': {
text = content.body
mapped.text = content.body
if (
content['m.relates_to']
&& content['m.relates_to']['m.in_reply_to']
) {
text = stripQuotedMessage(content.body)
linkedMessageID = content['m.relates_to']['m.in_reply_to'].event_id
mapped.text = stripQuotedMessage(content.body)
mapped.linkedMessageID = content['m.relates_to']['m.in_reply_to'].event_id
}

if (content.format === 'org.matrix.custom.html') {
const { text, textAttributes } = mapTextAttributes(mapped.text)
mapped.text = text
mapped.textAttributes = textAttributes
}
break
}
case 'm.audio':
case 'm.file':
case 'm.image':
case 'm.video': {
const srcURL = matrixClient.client.mxcUrlToHttp(content.url)
attachments = [
mapped.attachments = [
{
id: event.getId(),
type: getAttachmentTypeFromContentType(content.msgtype),
Expand All @@ -184,14 +194,14 @@ export function mapMessage(
let type
if (prevContent.name) {
type = MessageActionType.THREAD_TITLE_UPDATED
text = `${senderID} changed the room name from ${prevContent.name} to ${
mapped.text = `${senderID} changed the room name from ${prevContent.name} to ${
event.getContent().name
}`
} else {
type = MessageActionType.GROUP_THREAD_CREATED
text = `${senderID} created and configured the room`
mapped.text = `${senderID} created and configured the room`
}
action = {
mapped.action = {
type,
title: event.getContent().name,
actorParticipantID: senderID,
Expand Down Expand Up @@ -242,18 +252,6 @@ export function mapMessage(
}
}

return {
_original: JSON.stringify(event),
id: event.getId(),
timestamp: event.getDate(),
senderID,
text,
isSender: userID === senderID,
attachments,
isAction: !!action,
action,
isDeleted,
reactions,
linkedMessageID,
}
mapped.isAction = !!mapped.action
return mapped
}
84 changes: 84 additions & 0 deletions src/tests/text-attributes.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import { mapTextAttributes } from '../text-attributes'

const cases = [
{
text: '\n*italic*\n **bold**\n\n plain <del>st</del> `code`.',
result: {
text: 'italic\nbold\n\nplain st code.',
textAttributes: {
entities: [
{
from: 0,
to: 6,
italic: true,
},
{
from: 7,
to: 11,
bold: true,
},
{
from: 19,
to: 21,
strikethrough: true,
},
{
from: 22,
to: 26,
code: true,
},
],
},
},
},
{
text:
'**[[tulir/mautrix-wsproxy](https://github.com/tulir/mautrix-wsproxy)]** [Yumekui](https://github.com/Yumekui) opened pull request #1',
result: {
text: '[tulir/mautrix-wsproxy] Yumekui opened pull request #1',
textAttributes: {
entities: [
{
from: 1,
to: 22,
link: 'https://github.com/tulir/mautrix-wsproxy',
},
{
from: 0,
to: 23,
bold: true,
},
{
from: 24,
to: 31,
link: 'https://github.com/Yumekui',
},
],
},
},
},
{
text:
'```js\nconsole.log("matrix")\n```',
result: {
text: 'console.log("matrix")',
textAttributes: {
entities: [
{
from: 0,
to: 22,
pre: true,
code: true,
codeLanguage: 'js'
},
],
},
},
},
]

test('text attributes', () => {
for (const c of cases) {
expect(mapTextAttributes(c.text)).toEqual(c.result)
}
})
100 changes: 100 additions & 0 deletions src/text-attributes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import { TextEntity } from '@textshq/platform-sdk'
import { Parser } from 'commonmark'

const reader = new Parser({ safe: true })

// All types: text, softbreak, linebreak, emph, strong, html_inline, link,
// image, code, document, paragraph, block_quote, item, list, heading,
// code_block, html_block, thematic_break
const TOKEN_TYPES = ['emph', 'strong', 'link']

export function mapTextAttributes(src: string) {
if (!src) return
const parsed = reader.parse(src)
const walker = parsed.walker()

let output = ''
const entities: TextEntity[] = []

let tokenStack = []
let event
while ((event = walker.next())) {
const { node } = event
// Some node types have both entering (true) and entering (false), but
// others only have entering(true).
if (event.entering) {
if (TOKEN_TYPES.includes(node.type)) {
tokenStack.push({
type: node.type,
from: output.length,
})
} else if (node.type === 'html_inline') {
if (node.literal.toLowerCase() === '<del>') {
tokenStack.push({
type: 'del',
from: output.length,
})
} else if (node.literal.toLowerCase() === '</del>') {
const lastToken = tokenStack.pop()
if (!lastToken) continue
if (lastToken.type !== 'del') {
tokenStack.push(lastToken)
continue
}
entities.push({
from: lastToken.from,
to: output.length,
strikethrough: true,
})
}
} else if (node.type === 'code') {
entities.push({
from: output.length,
to: output.length + node.literal.length,
code: true,
})
output += node.literal
} else if (node.type === 'code_block') {
entities.push({
from: output.length,
to: output.length + node.literal.length,
pre: true,
code: true,
codeLanguage: node.info,
})
output += node.literal
} else if (['softbreak', 'linkebreak'].includes(node.type)) {
output += '\n'
} else if (node.type === 'text') {
output += node.literal
}
} else {
if (node.type === 'paragraph') {
output += '\n\n'
continue
}
const lastToken = tokenStack.pop()
if (!lastToken) continue
const entity: TextEntity = {
from: lastToken.from,
to: output.length,
}
switch (lastToken.type) {
case 'emph':
entity.italic = true
break
case 'strong':
entity.bold = true
break
case 'link':
entity.link = node.destination
break
}
entities.push(entity)
}
}
return {
text: output.trim(),
textAttributes: entities.length ? { entities } : undefined,
}
}
Loading