Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🎉 Native gdocs tables #2827

Merged
merged 12 commits into from
Nov 1, 2023
1 change: 1 addition & 0 deletions db/model/Gdoc/Gdoc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,7 @@ export class Gdoc extends BaseEntity implements OwidGdocInterface {
"simple-text",
"sticky-left",
"sticky-right",
"table",
"text"
),
},
Expand Down
20 changes: 20 additions & 0 deletions db/model/Gdoc/enrichedToRaw.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import {
RawBlockAlign,
RawBlockEntrySummary,
RawBlockVideo,
RawBlockTable,
} from "@ourworldindata/utils"
import { spanToHtmlString } from "./gdocUtils.js"
import { match, P } from "ts-pattern"
Expand Down Expand Up @@ -400,5 +401,24 @@ export function enrichedBlockToRawBlock(
},
}
})
.with({ type: "table" }, (b): RawBlockTable => {
return {
type: b.type,
value: {
template: b.template,
rows: b.rows.map((row) => ({
type: row.type,
value: {
cells: row.cells.map((cell) => ({
type: cell.type,
value: cell.content.map(
enrichedBlockToRawBlock
),
})),
},
})),
},
}
})
.exhaustive()
}
88 changes: 88 additions & 0 deletions db/model/Gdoc/exampleEnrichedBlocks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -441,4 +441,92 @@ export const enrichedBlockExamples: Record<
items: [{ text: "Hello", slug: "#link-to-something" }],
parseErrors: [],
},
table: {
type: "table",
template: "header-row",
size: "narrow",
rows: [
{
type: "table-row",
cells: [
{
type: "table-cell",
content: [
{
type: "text",
value: [
{
spanType: "span-simple-text",
text: "City",
},
],
parseErrors: [],
},
{
type: "text",
value: [
{
spanType: "span-simple-text",
text: "Continent",
},
],
parseErrors: [],
},
],
},
{
type: "table-cell",
content: [
{
type: "text",
value: [
{
spanType: "span-simple-text",
text: "Wellington",
},
],
parseErrors: [],
},
{
type: "text",
value: [
{
spanType: "span-simple-text",
text: "Zealandia",
},
],
parseErrors: [],
},
],
},
{
type: "table-cell",
content: [
{
type: "text",
value: [
{
spanType: "span-simple-text",
text: "Addis Ababa",
},
],
parseErrors: [],
},
{
type: "text",
value: [
{
spanType: "span-simple-text",
text: "Africa",
},
],
parseErrors: [],
},
],
},
],
},
],
parseErrors: [],
},
}
190 changes: 120 additions & 70 deletions db/model/Gdoc/gdocToArchie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,103 +6,153 @@ import {
RawBlockHorizontalRule,
RawBlockHeading,
isNil,
RawBlockTableRow,
RawBlockTableCell,
RawBlockText,
} from "@ourworldindata/utils"
import { spanToHtmlString } from "./gdocUtils.js"
import { OwidRawGdocBlockToArchieMLString } from "./rawToArchie.js"
import { match, P } from "ts-pattern"

function paragraphToString(
paragraph: docs_v1.Schema$Paragraph,
context: { isInList: boolean }
): string {
let text = ""

// this is a list
const needsBullet = !isNil(paragraph.bullet)
if (needsBullet && !context.isInList) {
context.isInList = true
text += `\n[.list]\n`
} else if (!needsBullet && context.isInList) {
context.isInList = false
text += `[]\n`
}

if (paragraph.elements) {
// all values in the element
const values: docs_v1.Schema$ParagraphElement[] = paragraph.elements

let idx = 0

const taggedText = function (text: string): string {
if (paragraph.paragraphStyle?.namedStyleType?.includes("HEADING")) {
const headingLevel =
paragraph.paragraphStyle.namedStyleType.replace(
"HEADING_",
""
)

const heading: RawBlockHeading = {
type: "heading",
value: {
text: text.trim(),
level: headingLevel,
},
}
return `\n${OwidRawGdocBlockToArchieMLString(heading)}`
}
return text
}
let elementText = ""
for (const value of values) {
// we only need to add a bullet to the first value, so we check
const isFirstValue = idx === 0

// prepend an asterisk if this is a list item
const prefix = needsBullet && isFirstValue ? "* " : ""

// concat the text
const parsedParagraph = parseParagraph(value)
const fragmentText = match(parsedParagraph)
.with(
{ type: P.union("horizontal-rule") },
OwidRawGdocBlockToArchieMLString
)
.with({ spanType: P.any }, (s) => spanToHtmlString(s))
.with(P.nullish, () => "")
.exhaustive()
elementText += `${prefix}${fragmentText}`
idx++
}
text += taggedText(elementText)
}
return text
}

function tableToString(
table: docs_v1.Schema$StructuralElement["table"]
): string {
if (!table) return ""
let text = ""
const context = { isInList: false }
const { tableRows = [] } = table

const rows: RawBlockTableRow[] = []

for (const tableRow of tableRows) {
const rawRow: RawBlockTableRow = {
type: "table-row",
value: {
cells: [],
},
}
const { tableCells = [] } = tableRow
for (const tableCell of tableCells) {
const rawCell: RawBlockTableCell = {
type: "table-cell",
value: [],
}
const { content = [] } = tableCell
for (const item of content) {
if (item.paragraph) {
const text = paragraphToString(item.paragraph, context)
const rawTextBlock: RawBlockText = {
type: "text",
value: text,
}
rawCell.value!.push(rawTextBlock)
}
}
rawRow.value!.cells!.push(rawCell)
}
rows.push(rawRow)
}
text += "\n[.+rows]"
for (const row of rows) {
text += `\n${OwidRawGdocBlockToArchieMLString(row)}`
}
text += "\n[]"
return text
}

export async function gdocToArchie(
document: docs_v1.Schema$Document
): Promise<{ text: string }> {
// prepare the text holder
let text = ""
let isInList = false
const context = { isInList: false }

// check if the body key and content key exists, and give up if not
if (!document.body) return { text }
if (!document.body.content) return { text }

// loop through each content element in the body

for (const element of document.body.content) {
if (element.paragraph) {
// get the paragraph within the element
const paragraph: docs_v1.Schema$Paragraph = element.paragraph

// this is a list
const needsBullet = !isNil(paragraph.bullet)
if (needsBullet && !isInList) {
isInList = true
text += `\n[.list]\n`
} else if (!needsBullet && isInList) {
isInList = false
text += `[]\n`
}

if (paragraph.elements) {
// all values in the element
const values: docs_v1.Schema$ParagraphElement[] =
paragraph.elements

let idx = 0

const taggedText = function (text: string): string {
if (
paragraph.paragraphStyle?.namedStyleType?.includes(
"HEADING"
)
) {
const headingLevel =
paragraph.paragraphStyle.namedStyleType.replace(
"HEADING_",
""
)

const heading: RawBlockHeading = {
type: "heading",
value: {
text: text.trim(),
level: headingLevel,
},
}
return `\n${OwidRawGdocBlockToArchieMLString(heading)}`
}
return text
}
let elementText = ""
for (const value of values) {
// we only need to add a bullet to the first value, so we check
const isFirstValue = idx === 0

// prepend an asterisk if this is a list item
const prefix = needsBullet && isFirstValue ? "* " : ""

// concat the text
const parsedParagraph = parseParagraph(value)
const fragmentText = match(parsedParagraph)
.with(
{ type: P.union("horizontal-rule") },
OwidRawGdocBlockToArchieMLString
)
.with({ spanType: P.any }, (s) => spanToHtmlString(s))
.with(P.nullish, () => "")
.exhaustive()
elementText += `${prefix}${fragmentText}`
idx++
}
text += taggedText(elementText)
}
text += paragraphToString(element.paragraph, context)
} else if (element.table) {
text += tableToString(element.table)
}
}

return { text }
}

function parseParagraph(
element: docs_v1.Schema$ParagraphElement
): Span | RawBlockHorizontalRule | null {
// pull out the text

const textRun = element.textRun

// sometimes it's not there, skip this all if so
Expand Down
Loading