Skip to content

Commit

Permalink
feat: Support anchor linking.
Browse files Browse the repository at this point in the history
  • Loading branch information
ShogunPanda committed Aug 7, 2023
1 parent c8a0fa0 commit 5000d1d
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 30 deletions.
2 changes: 2 additions & 0 deletions packages/docs/pages/plugins/plugin-parsedoc.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ An asynchronous function that takes three arguments:
- `globPath`: a string representing a glob path to reading the files from.
- `options`: an object containing the following properties:
- `transformFn` (optional): a function that passes an object as its only argument. It contains the raw HTML/Markdown chunk, tag name, parsed content and html attributes.
If the function adds a `additionalProperties` object to the trasformed node, it will be merged with the original node's properties.
- `mergeStrategy` (optional): a value that defines how to handle consecutive chunks of the same tag. The default value is `merge`. Accepted values are:
- `merge`: consecutive chunks with the same tag will be merged into one document for the index.
- `split`: consecutive chunks with the same tag will be split into separate documents for the index.
Expand All @@ -67,6 +68,7 @@ A asynchronous function that takes three arguments. Should be used internally by
- `fileType`: a string representing the file type. Accepted values are `html` and `md`.
- `options`: an object containing the following properties:
- `transformFn` (optional): a function that passes an object as its only argument. It contains the raw HTML/Markdown chunk, tag name, parsed content and html attributes.
If the function adds a `additionalProperties` object to the trasformed node, it will be merged with the original node's properties.
- `mergeStrategy` (optional): a value that defines how to handle consecutive chunks of the same tag. The default value is `merge`. Accepted values are:
- `merge`: consecutive chunks with the same tag will be merged into one document for the index.
- `split`: consecutive chunks with the same tag will be split into separate documents for the index.
Expand Down
1 change: 1 addition & 0 deletions packages/plugin-docusaurus/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
"@orama/orama": "workspace:*",
"@orama/plugin-match-highlight": "workspace:*",
"@orama/plugin-parsedoc": "workspace:*",
"github-slugger": "^2.0.0",
"pako": "^2.1.0",
"vfile-message": "^3.1.3"
},
Expand Down
40 changes: 34 additions & 6 deletions packages/plugin-docusaurus/src/server/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ import type { LoadContext, Plugin } from '@docusaurus/types'
import { create, insertMultiple, save } from '@orama/orama'
import { documentsStore } from '@orama/orama/components'
import { OramaWithHighlight, afterInsert as highlightAfterInsert } from '@orama/plugin-match-highlight'
import type { DefaultSchemaElement, NodeContent } from '@orama/plugin-parsedoc'
import type { DefaultSchemaElement, NodeContent, PopulateFnContext } from '@orama/plugin-parsedoc'
import { defaultHtmlSchema, populate } from '@orama/plugin-parsedoc'
import * as githubSlugger from 'github-slugger'
import { cp, readFile, writeFile } from 'node:fs/promises'
import { resolve } from 'node:path'
import { fileURLToPath } from 'node:url'
Expand All @@ -23,7 +24,9 @@ function indexPath(outDir: string, version: string): string {
return resolve(outDir, INDEX_FILE.replace('@VERSION@', version))
}

function transformFn(node: NodeContent): NodeContent {
function transformFn(node: NodeContent, context: PopulateFnContext): NodeContent {
let raw

switch (node.tag) {
case 'strong':
case 'a':
Expand All @@ -34,10 +37,30 @@ function transformFn(node: NodeContent): NodeContent {
case 'b':
case 'p':
case 'ul':
return { ...node, raw: `<p>${node.content}</p>` }
default:
return node
raw = `<p>${node.content}</p>`
break
case 'h1':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
case 'h6':
context.lastLink = node.properties?.id ?? githubSlugger.slug(node.content)
break
}

const transformed = {
...node,
additionalProperties: {
hash: context.lastLink
}
}

if (raw) {
transformed.raw = raw
}

return transformed
}

function defaultToSectionSchema(
Expand All @@ -46,7 +69,7 @@ function defaultToSectionSchema(
sectionTitle: string,
version: string
): SectionSchema {
const { content, type } = node
const { content, type, properties } = node

if (!sectionTitle) {
sectionTitle = (pageRoute.split('/').pop() ?? '')
Expand All @@ -58,6 +81,7 @@ function defaultToSectionSchema(

return {
pageRoute,
hash: (properties?.hash as string) ?? '',
sectionTitle: pageRoute ? sectionTitle : 'Home',
sectionContent: content,
type,
Expand Down Expand Up @@ -92,6 +116,10 @@ async function generateDocument(
if (!section.pageRoute.startsWith('/')) {
section.pageRoute = '/' + section.pageRoute
}

if (section.hash) {
section.pageRoute += `#${section.hash}`
}
}

return sections
Expand Down
1 change: 1 addition & 0 deletions packages/plugin-docusaurus/src/server/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export interface SectionSchema extends Document {
pageRoute: string
sectionTitle: string
version: string
hash: string
}

export type RawDataWithPositions = RawData & { positions: Record<string, Record<string, Record<string, Position[]>>> }
Expand Down
6 changes: 3 additions & 3 deletions packages/plugin-docusaurus/test/integration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,20 +101,20 @@ await test('generated DBs have indexed pages content', async () => {
// Search results seem reasonable
const indexSearchResult = await search(database, 'index')
assert.ok(indexSearchResult.count === 1)
assert.ok(indexSearchResult.hits[0].document.pageRoute === '/')
assert.ok(indexSearchResult.hits[0].document.pageRoute === '/#main')

const catSearchResult = await search(database, 'cat')
assert.ok(catSearchResult.count === 1)
assert.ok(catSearchResult.hits[0].document.pageRoute === '/animals_cat')

const dogSearchResult = await search(database, 'dog')
assert.ok(dogSearchResult.count === 2)
assert.ok(dogSearchResult.hits[0].document.pageRoute === '/animals_dog')
assert.ok(dogSearchResult.hits[0].document.pageRoute === '/animals_dog#dog')

const domesticSearchResult = await search(database, 'domestic')
assert.ok(domesticSearchResult.count === 2)
assert.ok(domesticSearchResult.hits[0].document.pageRoute === '/animals_cat')
assert.ok(domesticSearchResult.hits[1].document.pageRoute === '/animals_dog')
assert.ok(domesticSearchResult.hits[1].document.pageRoute === '/animals_dog#dog')

// We do not have content about turtles
const turtleSearchResult = await search(database, 'turtle')
Expand Down
67 changes: 49 additions & 18 deletions packages/plugin-parsedoc/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,12 @@ export interface DefaultSchemaElement extends Document {
properties?: Properties
}

export type PopulateFnContext = Record<string, any>

interface PopulateFromGlobOptions {
transformFn?: TransformFn
mergeStrategy?: MergeStrategy
context?: PopulateFnContext
}

type PopulateOptions = PopulateFromGlobOptions & { basePath?: string }
Expand All @@ -54,12 +57,11 @@ const populateFromFile = async (db: Orama, filename: string, options?: PopulateF
return populate(db, data, fileType, { ...options, basePath: `${filename}/` })
}

export const populate = async (
db: Orama,
export const parseFile = async (
data: Buffer | string,
fileType: FileType,
options?: PopulateOptions
): Promise<string[]> => {
): Promise<DefaultSchemaElement[]> => {
const records: DefaultSchemaElement[] = []
switch (fileType) {
case 'md':
Expand All @@ -80,13 +82,34 @@ export const populate = async (
return fileType
/* c8 ignore stop */
}
return insertMultiple(db, records)

return records
}

export const populate = async (
db: Orama,
data: Buffer | string,
fileType: FileType,
options?: PopulateOptions
): Promise<string[]> => {
return insertMultiple(db, await parseFile(data, fileType, options))
}

function rehypeOrama(records: DefaultSchemaElement[], options?: PopulateOptions): (tree: Root) => void {
if (!options) {
options = {}
}

return (tree: Root) => {
tree.children.forEach((child, i) => {
visitChildren(child, tree, `${options?.basePath /* c8 ignore next */ ?? ''}root[${i}]`, records, options)
visitChildren(
child,
tree,
`${options?.basePath /* c8 ignore next */ ?? ''}root[${i}]`,
records,
options!,
structuredClone(options?.context ?? {})
)
})
}
}
Expand All @@ -96,7 +119,8 @@ function visitChildren(
parent: Parent,
path: string,
records: DefaultSchemaElement[],
options?: PopulateOptions
options: PopulateOptions,
context: PopulateFnContext
): void {
if (node.type === 'text') {
addRecords(
Expand All @@ -105,23 +129,24 @@ function visitChildren(
path,
(parent as Element).properties,
records,
options?.mergeStrategy ?? 'merge'
options.mergeStrategy ?? 'merge'
)
return
}

if (!('tagName' in node)) return

const transformedNode = typeof options?.transformFn === 'function' ? applyTransform(node, options.transformFn) : node
const transformedNode =
typeof options?.transformFn === 'function' ? applyTransform(node, options.transformFn, context) : node

transformedNode.children.forEach((child, i) => {
visitChildren(child, transformedNode, `${path}.${transformedNode.tagName}[${i}]`, records, options)
visitChildren(child, transformedNode, `${path}.${transformedNode.tagName}[${i}]`, records, options, context)
})
}

function applyTransform(node: Element, transformFn: TransformFn): Element {
function applyTransform(node: Element, transformFn: TransformFn, context: PopulateFnContext): Element {
const preparedNode = prepareNode(node)
const transformedNode = transformFn(preparedNode)
const transformedNode = transformFn(preparedNode, context)
return applyChanges(node, transformedNode)
}

Expand All @@ -134,14 +159,19 @@ function prepareNode(node: Element): NodeContent {
}

function applyChanges(node: Element, transformedNode: NodeContent): Element {
let changed = node

if (toHtml(node) !== transformedNode.raw) {
return fromHtml(transformedNode.raw, { fragment: true }).children[0] as Element
changed = fromHtml(transformedNode.raw, { fragment: true }).children[0] as Element
} else {
node.tagName = transformedNode.tag
if (toString(node) !== transformedNode.content) {
changed = fromString(node, transformedNode.content)
}
}
node.tagName = transformedNode.tag
if (toString(node) !== transformedNode.content) {
return fromString(node, transformedNode.content)
}
return node

changed.properties = { ...changed.properties, ...transformedNode.additionalProperties }
return changed
}

function addRecords(
Expand Down Expand Up @@ -203,6 +233,7 @@ export interface NodeContent {
raw: string
content: string
properties?: Properties
additionalProperties?: Properties
}

export type TransformFn = (node: NodeContent) => NodeContent
export type TransformFn = (node: NodeContent, context: PopulateFnContext) => NodeContent
2 changes: 1 addition & 1 deletion packages/stemmers/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -193,4 +193,4 @@
"engines": {
"node": ">= 16.0.0"
}
}
}
2 changes: 1 addition & 1 deletion packages/stopwords/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -186,4 +186,4 @@
"engines": {
"node": ">= 16.0.0"
}
}
}
5 changes: 4 additions & 1 deletion pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 5000d1d

Please sign in to comment.