Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(api): parse Team Member data from Google Doc #676

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions src/lib/components/BodyParser/blocks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,17 @@ type Testimonial = {
data: TestimonialElement;
};

export type TeamMemberBlock = {
type: 'teamMember';
data: {
name: string;
role: string;
description: string;
image: string;
link?: string;
};
};

export type Author = {
name: string;
url: string;
Expand Down Expand Up @@ -402,6 +413,19 @@ const parseToc = (block: TocBlock) => {
};
};

const parseTeamMembers = (block: TeamMembersBlock) => {
return {
type: 'teamMembers',
data: block.data.members.map((member) => ({
name: member.name,
role: member.role,
description: member.description,
image: member.image,
link: member.link,
})),
};
};
lmontero18 marked this conversation as resolved.
Show resolved Hide resolved

const parseList = (block: ListBlock) => {
const tokens: any[] = [];
block.data.items.forEach((item) => {
Expand Down Expand Up @@ -590,6 +614,7 @@ const htmlParser = HTMLParser({
toc: parseToc,
cta: parseCTA,
testimonial: parseTestimonial,
teamMembers: parseTeamMembers,
});

const parseBlocks = (blocks: any[]) => htmlParser.parse({ blocks });
Expand Down
2 changes: 1 addition & 1 deletion src/routes/(pages)/about/template.pug
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ include ../c/mixins
+crumbItemText("About")
h1.text-title-l.font-satoshi.text-center(class="smDown:text-left xs:text-h1-s" id="{message.id}") About Holdex
+content-wrapper
BodyRenderer(blocks="{message.blocks}")
BodyRenderer(blocks="{message.blocks}")
160 changes: 149 additions & 11 deletions src/routes/api/google-conversion/+server.ts
georgeciubotaru marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,17 @@ import type {
Schema$TextStyle,
} from '$lib/types/googleDoc';
import type { RequestHandler } from './$types';
import type { Author, CTAElement, TestimonialElement } from '$components/BodyParser/blocks';
import type { Parsed$Paragraph, Parsed$ParagraphElement, Parsed$ParagraphItems } from '$lib/types/googleConversion';
import type {
Author,
CTAElement,
TeamMemberBlock,
TestimonialElement,
} from '$components/BodyParser/blocks';
import type {
Parsed$Paragraph,
Parsed$ParagraphElement,
Parsed$ParagraphItems,
} from '$lib/types/googleConversion';
import { trimJoinArray } from '$lib/utils';

export const POST: RequestHandler = async ({ request }) => {
Expand All @@ -38,6 +47,24 @@ export const POST: RequestHandler = async ({ request }) => {
);
};

/**
* Converts a Google Document (Schema$Document) into a Holdex JSON format.
*
* This function processes a provided Google Document by extracting header and body content to produce
* a JSON structure compatible with Holdex. It extracts author details from headers by looking for
* paragraphs containing "Authors:". It then iterates through the document body, handling different
* content types:
* - For paragraphs, it delegates processing to the `parseParagraph` function.
* - For tables, it builds a nested content structure from table cells and attempts to extract team member
* data via `parseTeamMembersSection`, call-to-action (CTA) details via `parseCTASection`, or testimonials
* via `parseTestimonialSection`. The extracted content is added based on a prioritized check.
* - For table of contents, it processes each paragraph entry to form a TOC block.
*
* The function returns a newly assembled array of content blocks structured for Holdex consumption.
*
* @param document - The Google Document object conforming to the Schema$Document interface.
* @returns An array containing the transformed content blocks in Holdex JSON format.
*/
function convertToHoldexJson(document: Schema$Document) {
const { body, headers } = document;

Expand Down Expand Up @@ -92,10 +119,13 @@ function convertToHoldexJson(document: Schema$Document) {
tableContent.push(trowContent);
});

const teamMember = parseTeamMemberSection(tableContent);
const cta: CTAElement = parseCTASection(tableContent);
const testimonial: TestimonialElement = parseTestimonialSection(tableContent);

if (!_.isEmpty(testimonial)) {
if (!_.isEmpty(teamMember)) {
newContent.push(teamMember);
} else if (!_.isEmpty(testimonial)) {
newContent.push({
type: 'testimonial',
data: testimonial,
Expand All @@ -113,9 +143,7 @@ function convertToHoldexJson(document: Schema$Document) {
},
});
}
}
// Table Of Contents
else if (tableOfContents) {
} else if (tableOfContents) {
const { content } = tableOfContents;

if (content && content.length > 0) {
Expand Down Expand Up @@ -175,6 +203,24 @@ function parseCTASection(content: any[]) {
return cta;
}

/**
* Parses a testimonial section from the provided content array.
*
* This function extracts testimonial data from a specific two-dimensional array structure. It expects the content array to have exactly 5 rows,
* each containing 2 elements. The header row must include a paragraph with the text "type" as its first element and a paragraph with the text "testimonial" as its second element.
* For each subsequent row, the function interprets the first element as the key and the second element as the corresponding value.
*
* The extracted key-value pairs are used to populate a TestimonialElement object with the following properties:
* - name: The name of the person giving the testimonial.
* - title: The title or role of the person.
* - content: The testimonial text.
* - picture: An object containing the person's name (as text) and the URL of their picture.
*
* If the content does not meet the expected structure, an empty TestimonialElement is returned.
*
* @param content - An array representing rows of testimonial data, where each row is an array of two elements with paragraph objects.
* @returns A populated TestimonialElement object if the expected structure is met; otherwise, an empty TestimonialElement.
*/
function parseTestimonialSection(content: any[]) {
const testimonial: TestimonialElement = {} as TestimonialElement;
if (content.length === 5 && (content[0] as any[]).length === 2) {
Expand Down Expand Up @@ -202,6 +248,81 @@ function parseTestimonialSection(content: any[]) {
}
return testimonial;
}

/**
* Parses a team members section from table content and returns an array of team member blocks.
*
* The function expects the input to be an array of table rows, where each row is an array consisting of two cells.
* The first row must serve as a header with specific identifiers, where the first cell contains a paragraph with the text "type"
* and the second cell contains a paragraph with the text "teamMember". Subsequent rows are processed to extract key-value pairs,
* where the key is taken from the first cell (converted to lowercase) and the value is taken from the second cell.
*
* When all of the required fields ("name", "role", "description", and "image") have been collected in a row,
* a new team member block is created with an optional "link" field if provided.
*
* @param content - An array representing table rows; each row should contain two cells with paragraph data.
* @returns An array of team member blocks extracted from the input content. Returns an empty array if the input format is invalid.
*/
function parseTeamMemberSection(content: any[]): TeamMemberBlock | null {
if (content.length >= 5 && (content[0] as any[]).length === 2) {
const contentHead = content[0];

if (
contentHead[0][0]?.type === 'paragraph' &&
contentHead[0][0]?.data?.text === 'type' &&
contentHead[1][0]?.type === 'paragraph' &&
contentHead[1][0]?.data?.text === 'teamMember'
) {
let currentMember: Record<string, string> = {};
let link = '';

for (const [[first], [second]] of content) {
if (first?.type !== 'paragraph' || second?.type !== 'paragraph') continue;

const key = first.data.text.toLowerCase();
const value = second.data.text || '';

if (key === 'link') {
link = value;
continue;
}

currentMember[key] = value;
}

if (
currentMember['name'] &&
currentMember['role'] &&
currentMember['description'] &&
currentMember['image']
) {
return {
type: 'teamMember',
data: {
name: currentMember['name'],
role: currentMember['role'],
description: currentMember['description'],
image: currentMember['image'],
link: link || undefined,
},
};
}
}
}

return null;
}
lmontero18 marked this conversation as resolved.
Show resolved Hide resolved

/**
* Extracts the author information from a paragraph header element.
*
* This function inspects the provided paragraph element for a linked text run. If a link exists,
* it cleans the text content to derive the author's name and retrieves the URL from the text style link.
* An Author object with the extracted `name` and `url` is returned. If no link is present, an empty Author object is returned.
*
* @param content - A Schema$ParagraphElement that may contain author details embedded in a link.
* @returns An Author object with `name` and `url` properties if available; otherwise, an empty Author object.
*/
function getHeaderRowAuthor(content: Schema$ParagraphElement) {
const author: Author = {} as Author;
if (content && content.textRun?.textStyle?.link) {
Expand Down Expand Up @@ -314,6 +435,20 @@ function getImage(document: Schema$Document, element: Schema$ParagraphElement) {
return null;
}

/**
* Converts a paragraph element's content into HTML formatted text based on its styling.
*
* This function extracts and cleans text from the given paragraph element, then applies HTML tags
* to represent text styles such as underline, italic, bold, and strikethrough. When a link is present,
* it wraps the text in an anchor tag, unless it's a call-to-action link, in which case it returns the URL directly.
* Bold formatting is omitted if the text is marked as a header.
*
* @param element - The paragraph element containing text and style information.
* @param options - Optional flags to modify text processing.
* @param options.isHeader - Indicates if the text is a header. Bold formatting will be skipped if true.
* @param options.isCtaLink - Indicates if the text is linked as a call-to-action; if true, returns the link's URL.
* @returns The HTML formatted text derived from the paragraph element.
*/
function getText(element: Schema$ParagraphElement, { isHeader = false, isCtaLink = false } = {}) {
let text = cleanText(element.textRun?.content as string);
const { link, underline, strikethrough, bold, italic } = element?.textRun
Expand All @@ -339,7 +474,7 @@ function getText(element: Schema$ParagraphElement, { isHeader = false, isCtaLink

if (link) {
if (isCtaLink) {
return (link.url || text) as string
return (link.url || text) as string;
}
return `<a href="${link.url}">${text}</a>`;
}
Expand Down Expand Up @@ -383,7 +518,7 @@ const parseParagraphElement = (
parentContent.push({
[tag]: getText(element, {
isHeader: tag !== 'p',
isCtaLink: wrappingTable
isCtaLink: wrappingTable,
}),
});
}
Expand Down Expand Up @@ -423,7 +558,8 @@ const parseParagraph = (
const listStyle = listTag === 'ol' ? 'ordered' : 'unordered';

if (prevListId === listId) {
const list: Parsed$ParagraphItems[] = (_.last(contents)?.data as Parsed$ParagraphItems).items ?? [];
const list: Parsed$ParagraphItems[] =
(_.last(contents)?.data as Parsed$ParagraphItems).items ?? [];

if (nestingLevel !== undefined) {
const lastIndex = list.length - 1;
Expand Down Expand Up @@ -529,7 +665,7 @@ const parseParagraph = (
(paragraph?.paragraphStyle?.indentFirstLine?.magnitude
? paragraph?.paragraphStyle?.indentFirstLine?.magnitude
: 0) /
18 +
18 +
2;

tagContent.push({
Expand Down Expand Up @@ -562,7 +698,9 @@ const parseParagraph = (
},
});
} else {
elements?.forEach((element) => parseParagraphElement(document, tag, tagContent, element, wrappingTable));
elements?.forEach((element) =>
parseParagraphElement(document, tag, tagContent, element, wrappingTable)
);
}
}

Expand Down