diff --git a/__mocks__/content/index.mdx b/__mocks__/content/index.mdx index 46d4d2e..9d40e9e 100644 --- a/__mocks__/content/index.mdx +++ b/__mocks__/content/index.mdx @@ -1,5 +1,7 @@ --- title: Homepage +tags: tag1, tag2, tag3 --- -# Welcome \ No newline at end of file +# Welcome +[link](blog0.mdx) \ No newline at end of file diff --git a/__mocks__/content/news/index.md b/__mocks__/content/news/index.md deleted file mode 100644 index e7e6e7a..0000000 --- a/__mocks__/content/news/index.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -type: news ---- - -# Document Title - diff --git a/package-lock.json b/package-lock.json index 3fdee79..9502058 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "mddb", - "version": "0.1.9", + "version": "0.3.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "mddb", - "version": "0.1.9", + "version": "0.3.0", "license": "MIT", "dependencies": { "@portaljs/remark-wiki-link": "^1.0.4", @@ -24,6 +24,7 @@ "@changesets/changelog-github": "^0.4.8", "@changesets/cli": "^2.26.1", "@types/jest": "^29.5.1", + "@types/node": "^20.8.7", "@typescript-eslint/eslint-plugin": "^5.59.5", "@typescript-eslint/parser": "^5.59.5", "eslint": "^8.40.0", @@ -2239,10 +2240,13 @@ "integrity": "sha512-iiUgKzV9AuaEkZqkOLDIvlQiL6ltuZd9tGcW3gwpnX8JbuiuhFlEGmmFXEXkN50Cvq7Os88IY2v0dkDqXYWVgA==" }, "node_modules/@types/node": { - "version": "20.1.0", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.1.0.tgz", - "integrity": "sha512-O+z53uwx64xY7D6roOi4+jApDGFg0qn6WHcxe5QeqjMaTezBO/mxdfFXIVAVVyNWKx84OmPB3L8kbVYOTeN34A==", - "dev": true + "version": "20.8.7", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.8.7.tgz", + "integrity": "sha512-21TKHHh3eUHIi2MloeptJWALuCu5H7HQTdTrWIFReA8ad+aggoX+lRes3ex7/FtpC+sVUpFMQ+QTfYr74mruiQ==", + "dev": true, + "dependencies": { + "undici-types": "~5.25.1" + } }, "node_modules/@types/normalize-package-data": { "version": "2.4.1", @@ -9745,6 +9749,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/undici-types": { + "version": "5.25.3", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.25.3.tgz", + "integrity": "sha512-Ga1jfYwRn7+cP9v8auvEXN1rX3sWqlayd4HP7OKk4mZWylEmu3KzXDUGrQUN6Ol7qo1gPvB2e5gX6udnyEPgdA==", + "dev": true + }, "node_modules/unified": { "version": "10.1.2", "resolved": "https://registry.npmjs.org/unified/-/unified-10.1.2.tgz", diff --git a/package.json b/package.json index ae3ad7b..7dab7f5 100644 --- a/package.json +++ b/package.json @@ -13,6 +13,7 @@ "pretest": "npm run build && node ./dist/src/bin/index.js ./__mocks__/content", "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js", "build": "tsc --p tsconfig.lib.json", + "dev": "tsc --p tsconfig.lib.json --watch", "changeset": "changeset", "prepublishOnly": "npm run build", "release": "changeset publish" @@ -55,6 +56,7 @@ "@changesets/changelog-github": "^0.4.8", "@changesets/cli": "^2.26.1", "@types/jest": "^29.5.1", + "@types/node": "^20.8.7", "@typescript-eslint/eslint-plugin": "^5.59.5", "@typescript-eslint/parser": "^5.59.5", "eslint": "^8.40.0", diff --git a/src/bin/index.js b/src/bin/index.ts old mode 100755 new mode 100644 similarity index 100% rename from src/bin/index.js rename to src/bin/index.ts diff --git a/src/lib/DbQueryManager.ts b/src/lib/DbQueryManager.ts new file mode 100644 index 0000000..17f2507 --- /dev/null +++ b/src/lib/DbQueryManager.ts @@ -0,0 +1,116 @@ +import { Knex } from "knex"; +import { MddbFile, MddbTag, MddbLink } from "./schema.js"; +import { FilesQuery } from "./types/DbQueryTypes.js"; + +export class DbQueryManager { + private db: Knex; + + constructor(db: Knex) { + this.db = db; + } + + async getFileById(id: string): Promise { + const file = await this.db.from("files").where("_id", id).first(); + return new MddbFile(file); + } + + async getFileByUrl(url: string): Promise { + const file = await this.db + .from("files") + .where("url_path", encodeURI(url)) + .first(); + return new MddbFile(file); + } + + async getFiles(query?: FilesQuery): Promise { + const { filetypes, tags, extensions, folder, frontmatter } = query || {}; + + const files = await this.db + // TODO join only if tags are specified ? + .leftJoin("file_tags", "files._id", "file_tags.file") + .where((builder) => { + // TODO temporary solution before we have a proper way to filter files by and assign file types + if (folder) { + builder.whereLike("url_path", `${folder}/%`); + } + + if (tags) { + builder.whereIn("tag", tags); + } + + if (extensions) { + builder.whereIn("extension", extensions); + } + + if (filetypes) { + builder.whereIn("filetype", filetypes); + } + + if (frontmatter) { + Object.entries(frontmatter).forEach(([key, value]) => { + if (typeof value === "string" || typeof value === "number") { + builder.whereRaw(`json_extract(metadata, '$.${key}') = ?`, [ + value, + ]); + } else if (typeof value === "boolean") { + if (value) { + builder.whereRaw(`json_extract(metadata, '$.${key}') = ?`, [ + true, + ]); + } else { + builder.where(function () { + this.whereRaw(`json_extract(metadata, '$.${key}') = ?`, [ + false, + ]).orWhereRaw(`json_extract(metadata, '$.${key}') IS NULL`); + }); + } + } + + // To check if the provided value exists in an array inside the JSON + else { + builder.whereRaw(`json_extract(metadata, '$.${key}') LIKE ?`, [ + `%${value}%`, + ]); + } + }); + } + }) + .select("files.*") + .from("files") + .groupBy("_id"); + + return files.map((file) => new MddbFile(file)); + } + + async getTags(): Promise { + const tags = await this.db("tags").select(); + return tags.map((tag) => new MddbTag(tag)); + } + + async getLinks(query?: { + fileId: string; + linkType?: "normal" | "embed"; + direction?: "forward" | "backward"; + }): Promise { + const { fileId, direction = "forward", linkType } = query || {}; + const joinKey = direction === "forward" ? "from" : "to"; + const where = { + [joinKey]: fileId, + }; + if (linkType) { + where["link_type"] = linkType; + } + const dbLinks = await this.db + .select("links.*") + .from("links") + .rightJoin("files", `links.${joinKey}`, "=", "files._id") + .where(where); + + const links = dbLinks.map((link) => new MddbLink(link)); + return links; + } + + _destroyDb() { + this.db.destroy(); + } +} diff --git a/src/lib/indexFolderToObjects.ts b/src/lib/indexFolderToObjects.ts new file mode 100644 index 0000000..e0c8963 --- /dev/null +++ b/src/lib/indexFolderToObjects.ts @@ -0,0 +1,79 @@ +import { getUniqueValues, recursiveWalkDir } from "../utils/index.js"; +import type { WikiLink } from "../utils/index.js"; +import { extractFileSchemeFromObject } from "../utils/extractFileSchemeFromObject.js"; +import { processFile } from "./processFile.js"; +import type { File, FileTag, Link, Tag } from "./types/schemaTypes.js"; + +export function indexFolderToObjects( + folderPath: string, + pathToUrlResolver: (filePath: string) => string, + ignorePatterns?: RegExp[] +) { + const filePathsToIndex = recursiveWalkDir(folderPath); + const files: File[] = []; + const tags: string[] = []; + const fileTags: FileTag[] = []; + const links: Link[] = []; + const filteredFilePathsToIndex = filePathsToIndex.filter((filePath) => + shouldIncludeFile(filePath, ignorePatterns) + ); + + for (const filePath of filteredFilePathsToIndex) { + const fileObject = processFile( + folderPath, + filePath, + filePathsToIndex, + pathToUrlResolver + ); + + const file = extractFileSchemeFromObject(fileObject); + files.push(file); + + tags.push(...fileObject.tags); + + const fileTagsToInsert = fileObject.tags.map((tag) => ({ + tag: tag, + file: fileObject._id, + })); + fileTags.push(...fileTagsToInsert); + + const linksToInsert: Link[] = processWikiLinks( + fileObject.links, + fileObject._id, + filePathsToIndex + ); + links.push(...linksToInsert); + } + + const uniqueTags = getUniqueValues(tags); + const TagsToInsert = uniqueTags.map((tag) => ({ name: tag })); + return { + files: files, + tags: TagsToInsert, + fileTags: fileTags, + links: links, + }; +} + +function processWikiLinks( + links: WikiLink[], + fileId: string, + filePathsToIndex: string[] +): Link[] { + return links + .map((link) => ({ + from: fileId, + to: filePathsToIndex.find((file) => file === link.linkSrc)!, + link_type: link.linkType, + })) + .filter((link) => link.to !== undefined); +} + +function shouldIncludeFile( + filePath: string, + ignorePatterns?: RegExp[] +): boolean { + return !( + ignorePatterns && ignorePatterns.some((pattern) => pattern.test(filePath)) + ); +} diff --git a/src/lib/markdowndb.spec.ts b/src/lib/markdowndb.spec.ts index 5066512..4aaa22b 100644 --- a/src/lib/markdowndb.spec.ts +++ b/src/lib/markdowndb.spec.ts @@ -1,7 +1,8 @@ // import knex from "knex"; import { MarkdownDB } from "./markdowndb"; -import { File, MddbFile, Table } from "./schema"; +import { MddbFile } from "./schema"; import { recursiveWalkDir } from "../utils"; +import { File, Table } from "./types/schemaTypes"; /** * @jest-environment node diff --git a/src/lib/markdowndb.ts b/src/lib/markdowndb.ts index d65a31d..8e9eb87 100644 --- a/src/lib/markdowndb.ts +++ b/src/lib/markdowndb.ts @@ -1,45 +1,15 @@ -import crypto from "crypto"; -import fs from "fs"; -import path from "path"; import knex, { Knex } from "knex"; -import { recursiveWalkDir, parseFile, WikiLink } from "../utils/index.js"; -import { - File, - MddbFile, - Link, - Tag, - FileTag, - MddbTag, - MddbFileTag, - MddbLink, -} from "./schema.js"; - -const defaultFilePathToUrl = (filePath: string) => { - let url = filePath - .replace(/\.(mdx|md)/, "") - .replace(/\\/g, "/") // replace windows backslash with forward slash - .replace(/(\/)?index$/, ""); // remove index from the end of the permalink - url = url.length > 0 ? url : "/"; // for home page - return encodeURI(url); -}; - -const resolveLinkToUrlPath = (link: string, sourceFilePath?: string) => { - if (!sourceFilePath) { - return link; - } - // needed to make path.resolve work correctly - // becuase we store urls without leading slash - const sourcePath = "/" + sourceFilePath; - const dir = path.dirname(sourcePath); - const resolved = path.resolve(dir, link); - // remove leading slash - return resolved.slice(1); -}; +import { MddbFile, MddbTag, MddbFileTag, MddbLink } from "./schema.js"; +import { DbQueryManager } from "./DbQueryManager.js"; +import type { FilesQuery, LinkQuery } from "./types/DbQueryTypes.js"; +import { indexFolderToObjects } from "./indexFolderToObjects.js"; +import { defaultFilePathToUrl } from "../utils/defaultFilePathToUrl.js"; export class MarkdownDB { config: Knex.Config; db: Knex; + dbQueryManager: DbQueryManager; constructor(config: Knex.Config) { this.config = config; @@ -47,6 +17,7 @@ export class MarkdownDB { async init() { this.db = knex({ ...this.config, useNullAsDefault: true }); + this.dbQueryManager = new DbQueryManager(this.db); return this; } @@ -60,272 +31,49 @@ export class MarkdownDB { ignorePatterns?: RegExp[]; pathToUrlResolver?: (filePath: string) => string; }) { - // Temporary, we don't want to handle updates now - // so database is refreshed every time the folder - // is indexed - await MddbFile.deleteTable(this.db); - await MddbTag.deleteTable(this.db); - await MddbFileTag.deleteTable(this.db); - await MddbLink.deleteTable(this.db); - - await MddbFile.createTable(this.db); - await MddbTag.createTable(this.db); - await MddbFileTag.createTable(this.db); - await MddbLink.createTable(this.db); - - const filePathsToIndex = recursiveWalkDir(folderPath); - - const filesToInsert: File[] = []; - const fileTagsToInsert: FileTag[] = []; - // TODO shouldn't available tags be explicitly defined in some config file - // instead of being extracted from all files? I think it's better even from user perspective - // as he can easily manage and see all the tags he is using - // (he can qickly look up tag if he's not sure what term he was using in other files) - // + it's easier to implement - const tagsToInsert: Tag[] = []; - const linksToInsert: Link[] = []; - - // TODO is there a better way to do this? - // Temporary containter for storing links extracted from each file - // as a map of file id -> extracted links. - // This is used after all files have been parsed and added to filesToInsert - // to resolve paths in links to target file ids - const filesLinksMap: { - [fileId: string]: { - url: string; - links: WikiLink[]; - }; - } = {}; - - for (const filePath of filePathsToIndex) { - if (ignorePatterns.some((pattern) => pattern.test(filePath))) { - continue; - } - - // id - // TODO this can be autogenerated by database - const encodedPath = Buffer.from(filePath, "utf-8").toString(); - const id = crypto.createHash("sha1").update(encodedPath).digest("hex"); - - // extension - const [, extension] = filePath.match(/.(\w+)$/) || []; - - if (!MddbFile.supportedExtensions.includes(extension)) { - filesToInsert.push({ - _id: id, - file_path: filePath, - extension, - url_path: null, - filetype: null, - metadata: null, - }); - continue; - } - - // url_path - const pathRelativeToFolder = path.relative(folderPath, filePath); - const urlPath = pathToUrlResolver(pathRelativeToFolder); - - // metadata, tags, links - const source: string = fs.readFileSync(filePath, { - encoding: "utf8", - flag: "r", - }); - - const { metadata, links } = parseFile(source, { - permalinks: filePathsToIndex, - }); - const filetype = metadata?.type || null; - - // TODO is there a better way to do this? - filesLinksMap[id] = { - url: urlPath, - links, - }; - - const tags = metadata?.tags || []; - tags.forEach((tag: string) => { - if (!tagsToInsert.some((t) => t.name === tag)) { - tagsToInsert.push({ name: tag }); - } - fileTagsToInsert.push({ file: id, tag }); - }); - - filesToInsert.push({ - _id: id, - file_path: filePath, - extension, - url_path: urlPath, - filetype, - metadata, - }); - } - - Object.entries(filesLinksMap).forEach(([fileId, { url, links }]) => { - links.forEach(({ linkSrc, linkType }) => { - const destPath = resolveLinkToUrlPath(linkSrc, url); - const destFile = filesToInsert.find( - (file) => file.url_path === destPath - ); - if (!destFile) { - return; - } - const linkToInsert = { - // _id: id, - from: fileId, - to: destFile._id, - link_type: linkType, - }; - linksToInsert.push(linkToInsert); - }); - }); - - if (filesToInsert.length >= 500) { - for (let i = 0; i < filesToInsert.length; i += 500) { - await MddbFile.batchInsert(this.db, filesToInsert.slice(i, i + 500)); - } - } else { - await MddbFile.batchInsert(this.db, filesToInsert); - } - - // TODO what happens if some of the files were not inserted? - // I guess inserting tags or links with such files used as foreign keys will fail too, - // but need to check - - if (tagsToInsert.length >= 500) { - for (let i = 0; i < tagsToInsert.length; i += 500) { - await MddbTag.batchInsert(this.db, tagsToInsert.slice(i, i + 500)); - } - } else { - await MddbTag.batchInsert(this.db, tagsToInsert); - } - - if (fileTagsToInsert.length >= 500) { - for (let i = 0; i < fileTagsToInsert.length; i += 500) { - await MddbFileTag.batchInsert( - this.db, - fileTagsToInsert.slice(i, i + 500) - ); - } - } else { - await MddbFileTag.batchInsert(this.db, fileTagsToInsert); - } - - if (linksToInsert.length >= 500) { - for (let i = 0; i < linksToInsert.length; i += 500) { - await MddbLink.batchInsert(this.db, linksToInsert.slice(i, i + 500)); - } - } else { - await MddbLink.batchInsert(this.db, linksToInsert); - } + await resetDatabaseTables(this.db); + const { files, tags, fileTags, links } = indexFolderToObjects( + folderPath, + pathToUrlResolver, + ignorePatterns + ); + + await MddbFile.batchInsert(this.db, files); + await MddbTag.batchInsert(this.db, tags); + await MddbFileTag.batchInsert(this.db, fileTags); + await MddbLink.batchInsert(this.db, links); } async getFileById(id: string): Promise { - const file = await this.db.from("files").where("_id", id).first(); - return new MddbFile(file); + return this.dbQueryManager.getFileById(id); } async getFileByUrl(url: string): Promise { - const file = await this.db - .from("files") - .where("url_path", encodeURI(url)) - .first(); - return new MddbFile(file); + return this.dbQueryManager.getFileByUrl(url); } - async getFiles(query?: { - folder?: string; - filetypes?: string[]; - tags?: string[]; - extensions?: string[]; - frontmatter?: Record; - }): Promise { - const { filetypes, tags, extensions, folder, frontmatter } = query || {}; - - const files = await this.db - // TODO join only if tags are specified ? - .leftJoin("file_tags", "files._id", "file_tags.file") - .where((builder) => { - // TODO temporary solution before we have a proper way to filter files by and assign file types - if (folder) { - builder.whereLike("url_path", `${folder}/%`); - } - if (tags) { - builder.whereIn("tag", tags); - } - - if (extensions) { - builder.whereIn("extension", extensions); - } - - if (filetypes) { - builder.whereIn("filetype", filetypes); - } - - if (frontmatter) { - Object.entries(frontmatter).forEach(([key, value]) => { - if (typeof value === "string" || typeof value === "number") { - builder.whereRaw(`json_extract(metadata, '$.${key}') = ?`, [ - value, - ]); - } else if (typeof value === "boolean") { - if (value) { - builder.whereRaw(`json_extract(metadata, '$.${key}') = ?`, [ - true, - ]); - } else { - builder.where(function () { - this.whereRaw(`json_extract(metadata, '$.${key}') = ?`, [ - false, - ]).orWhereRaw(`json_extract(metadata, '$.${key}') IS NULL`); - }); - } - } - // To check if the provided value exists in an array inside the JSON - else { - builder.whereRaw(`json_extract(metadata, '$.${key}') LIKE ?`, [ - `%${value}%`, - ]); - } - }); - } - }) - .select("files.*") - .from("files") - .groupBy("_id"); - - return files.map((file) => new MddbFile(file)); + async getFiles(query?: FilesQuery): Promise { + return this.dbQueryManager.getFiles(query); } async getTags(): Promise { - const tags = await this.db("tags").select(); - return tags.map((tag) => new MddbTag(tag)); + return this.dbQueryManager.getTags(); } - async getLinks(query?: { - fileId: string; - linkType?: "normal" | "embed"; - direction?: "forward" | "backward"; - }): Promise { - const { fileId, direction = "forward", linkType } = query || {}; - const joinKey = direction === "forward" ? "from" : "to"; - const where = { - [joinKey]: fileId, - }; - if (linkType) { - where["link_type"] = linkType; - } - const dbLinks = await this.db - .select("links.*") - .from("links") - .rightJoin("files", `links.${joinKey}`, "=", "files._id") - .where(where); - - const links = dbLinks.map((link) => new MddbLink(link)); - return links; + async getLinks(query?: LinkQuery): Promise { + return this.dbQueryManager.getLinks(query); } _destroyDb() { - this.db.destroy(); + this.dbQueryManager._destroyDb(); + } +} + +async function resetDatabaseTables(db: Knex) { + const tableNames = [MddbFile, MddbTag, MddbFileTag, MddbLink]; + // Drop and Create tables + for (const table of tableNames) { + await table.deleteTable(db); + await table.createTable(db); } } diff --git a/src/lib/processFile.spec.ts b/src/lib/processFile.spec.ts new file mode 100644 index 0000000..87a2aac --- /dev/null +++ b/src/lib/processFile.spec.ts @@ -0,0 +1,28 @@ +import path from "path"; +import { processFile } from "./processFile"; + +describe("Can parse a file and get file info", () => { + const pathToContentFixture = "__mocks__/content"; + + test("can parse a file", async () => { + const filePath = "index.mdx"; + const fullPath = path.join(pathToContentFixture, filePath); + const fileInfo = processFile( + fullPath, + pathToContentFixture, + [], + (filePath: string) => filePath + ); + + expect(fileInfo.file_path).toBe(fullPath); + expect(fileInfo.extension).toBe("mdx"); + expect(fileInfo.tags).toEqual(["tag1", "tag2", "tag3"]); + expect(fileInfo.metadata).toEqual({ + title: "Homepage", + tags: ["tag1", "tag2", "tag3"], + }); + expect(fileInfo.links).toEqual([ + { linkSrc: "blog0.mdx", linkType: "normal" }, + ]); + }); +}); diff --git a/src/lib/processFile.ts b/src/lib/processFile.ts new file mode 100644 index 0000000..c2087e8 --- /dev/null +++ b/src/lib/processFile.ts @@ -0,0 +1,63 @@ +import fs from "fs"; +import path from "path"; +import { parseMarkdownContent } from "../utils/index.js"; +import type { FileObject } from "./types/FileObject.js"; +import crypto from "crypto"; + +export function processFile( + filePath: string, + folderPath: string, + filePathsToIndex: string[], + pathToUrlResolver: (filePath: string) => string +): FileObject { + const encodedPath = Buffer.from(filePath, "utf-8").toString(); + const id = crypto.createHash("sha1").update(encodedPath).digest("hex"); + const extension = path.extname(filePath).slice(1); + + const fileObject: FileObject = { + _id: id, + file_path: filePath, + extension, + url_path: null, + filetype: null, + metadata: null, + tags: [], + links: [], + }; + + // if not md or mdx return this + const isExtensionSupported = ["md", "mdx"].includes(extension); + if (!isExtensionSupported) { + return fileObject; + } + + // url_path + const pathRelativeToFolder = path.relative(folderPath, filePath); + const urlPath = pathToUrlResolver(pathRelativeToFolder); + + // metadata, tags, links + let source: string, metadata, links; + try { + source = fs.readFileSync(filePath, { + encoding: "utf8", + flag: "r", + }); + + ({ metadata, links } = parseMarkdownContent(source, { + permalinks: filePathsToIndex, + })); + + fileObject.url_path = urlPath; + fileObject.metadata = metadata; + fileObject.filetype = metadata?.type || null; + fileObject.links = links; + if (metadata.tags) { + fileObject.tags = metadata.tags; + } + + return fileObject; + } catch (e) { + console.error(`Error processing file ${filePath}: ${e}`); + return fileObject; + } +} diff --git a/src/lib/schema.ts b/src/lib/schema.ts index b1f3d77..8fb20c8 100644 --- a/src/lib/schema.ts +++ b/src/lib/schema.ts @@ -1,31 +1,6 @@ import { Knex } from "knex"; import { areUniqueObjectsByKey } from "./validate.js"; - -/* - * Types - */ -export enum Table { - Files = "files", - Tags = "tags", - FileTags = "file_tags", - Links = "links", -} - -type MetaData = { - [key: string]: any; -}; - -/* - * Schema - */ -interface File { - _id: string; - file_path: string; - extension: string; - url_path: string | null; - filetype: string | null; - metadata: MetaData | null; -} +import { Table, File, MetaData, Link, Tag, FileTag } from "./types/schemaTypes.js"; class MddbFile { static table = Table.Files; @@ -82,6 +57,10 @@ class MddbFile { } static batchInsert(db: Knex, files: File[]) { + if (files.length === 0) { + return; + } + if (!areUniqueObjectsByKey(files, "_id")) { throw new Error("Files must have unique _id"); } @@ -99,12 +78,6 @@ class MddbFile { } } -interface Link { - link_type: "normal" | "embed"; - from: string; - to: string; -} - class MddbLink { static table = Table.Links; @@ -150,14 +123,14 @@ class MddbLink { } static batchInsert(db: Knex, links: Link[]) { + if (links.length === 0) { + return; + } + return db.batchInsert(Table.Links, links); } } -interface Tag { - name: string; -} - class MddbTag { static table = Table.Tags; @@ -194,6 +167,9 @@ class MddbTag { } static batchInsert(db: Knex, tags: Tag[]) { + if (tags.length === 0) { + return; + } if (!areUniqueObjectsByKey(tags, "name")) { throw new Error("Tags must have unique name"); } @@ -201,11 +177,6 @@ class MddbTag { } } -interface FileTag { - tag: string; - file: string; -} - class MddbFileTag { static table = Table.FileTags; // _id: string; @@ -238,8 +209,12 @@ class MddbFileTag { } static batchInsert(db: Knex, fileTags: FileTag[]) { + if (fileTags.length === 0) { + return; + } + return db.batchInsert(Table.FileTags, fileTags); } } -export { File, MddbFile, Link, MddbLink, Tag, MddbTag, FileTag, MddbFileTag }; +export { MddbFile, MddbLink, MddbTag, MddbFileTag }; diff --git a/src/lib/types/DbQueryTypes.ts b/src/lib/types/DbQueryTypes.ts new file mode 100644 index 0000000..31d38c5 --- /dev/null +++ b/src/lib/types/DbQueryTypes.ts @@ -0,0 +1,13 @@ +export type FilesQuery = { + folder?: string; + filetypes?: string[]; + tags?: string[]; + extensions?: string[]; + frontmatter?: Record; +}; + +export type LinkQuery = { + fileId: string; + linkType?: "normal" | "embed"; + direction?: "forward" | "backward"; +}; diff --git a/src/lib/types/FileObject.ts b/src/lib/types/FileObject.ts new file mode 100644 index 0000000..ab28a59 --- /dev/null +++ b/src/lib/types/FileObject.ts @@ -0,0 +1,7 @@ +import { WikiLink } from "../../utils"; +import { File } from "./schemaTypes"; + +export interface FileObject extends File { + tags: string[]; + links: WikiLink[]; +} diff --git a/src/lib/types/schemaTypes.ts b/src/lib/types/schemaTypes.ts new file mode 100644 index 0000000..5595be8 --- /dev/null +++ b/src/lib/types/schemaTypes.ts @@ -0,0 +1,34 @@ +export interface File { + _id: string; + file_path: string; + extension: string; + url_path: string | null; + filetype: string | null; + metadata: MetaData | null; +} + +export type MetaData = { + [key: string]: any; +}; + +export interface Link { + link_type: "normal" | "embed"; + from: string; + to: string; +} + +export interface Tag { + name: string; +} + +export interface FileTag { + tag: string; + file: string; +} + +export enum Table { + Files = "files", + Tags = "tags", + FileTags = "file_tags", + Links = "links", +} diff --git a/src/utils/defaultFilePathToUrl.ts b/src/utils/defaultFilePathToUrl.ts new file mode 100644 index 0000000..8e68990 --- /dev/null +++ b/src/utils/defaultFilePathToUrl.ts @@ -0,0 +1,8 @@ +export const defaultFilePathToUrl = (filePath: string) => { + let url = filePath + .replace(/\.(mdx|md)/, "") + .replace(/\\/g, "/") // replace windows backslash with forward slash + .replace(/(\/)?index$/, ""); // remove index from the end of the permalink + url = url.length > 0 ? url : "/"; // for home page + return encodeURI(url); +}; diff --git a/src/utils/extractFileSchemeFromObject.ts b/src/utils/extractFileSchemeFromObject.ts new file mode 100644 index 0000000..3e4675c --- /dev/null +++ b/src/utils/extractFileSchemeFromObject.ts @@ -0,0 +1,12 @@ +import { FileObject } from "../lib/types/FileObject.js"; + +export function extractFileSchemeFromObject(fileObject: FileObject) { + return { + _id: fileObject._id, + file_path: fileObject.file_path, + extension: fileObject.extension, + url_path: fileObject.url_path, + filetype: fileObject.filetype, + metadata: fileObject.metadata, + }; +} diff --git a/src/utils/getUniqueValues.ts b/src/utils/getUniqueValues.ts new file mode 100644 index 0000000..1b07699 --- /dev/null +++ b/src/utils/getUniqueValues.ts @@ -0,0 +1,32 @@ +function deepEqual(obj1: any, obj2: any): boolean { + if (typeof obj1 !== "object" || typeof obj2 !== "object") { + return obj1 === obj2; + } + + const keys1 = Object.keys(obj1); + const keys2 = Object.keys(obj2); + + if (keys1.length !== keys2.length) { + return false; + } + + for (const key of keys1) { + if (!keys2.includes(key) || !deepEqual(obj1[key], obj2[key])) { + return false; + } + } + + return true; +} + +export function getUniqueValues(array: T[]): T[] { + const uniqueArray: T[] = []; + + for (const item of array) { + if (!uniqueArray.some((existingItem) => deepEqual(existingItem, item))) { + uniqueArray.push(item); + } + } + + return uniqueArray; +} diff --git a/src/utils/index.ts b/src/utils/index.ts index 25a8e00..6f4e7da 100644 --- a/src/utils/index.ts +++ b/src/utils/index.ts @@ -1,3 +1,6 @@ export { recursiveWalkDir } from "./recursiveWalkDir.js"; export { extractWikiLinks, WikiLink } from "./extractWikiLinks.js"; -export { parseFile } from "./parseFile.js"; +export { parseMarkdownContent } from "./parseMarkdownContent.js"; +export { getUniqueValues } from "./getUniqueValues.js"; +export { defaultFilePathToUrl } from "./defaultFilePathToUrl.js"; +export { extractFileSchemeFromObject } from "./extractFileSchemeFromObject.js"; diff --git a/src/utils/parseFile.spec.ts b/src/utils/parseMarkdownContent.spec.ts similarity index 89% rename from src/utils/parseFile.spec.ts rename to src/utils/parseMarkdownContent.spec.ts index 28de0e3..c324990 100644 --- a/src/utils/parseFile.spec.ts +++ b/src/utils/parseMarkdownContent.spec.ts @@ -1,4 +1,4 @@ -import { parseFile } from "./parseFile"; +import { parseMarkdownContent } from "./parseMarkdownContent"; const source = `--- title: Hello World @@ -25,7 +25,7 @@ describe("parseFile", () => { { linkType: "normal", linkSrc: "blog/Some Other Link" }, { linkType: "embed", linkSrc: "Some Image.png" }, ]; - const { metadata, links } = parseFile(source); + const { metadata, links } = parseMarkdownContent(source); expect(metadata).toEqual(expectedMetadata); expect(links).toEqual(expectedLinks); }); @@ -48,7 +48,7 @@ describe("parseFile", () => { "/some/folder/blog/Some Other Link", "/some/folder/Some Image.png", ]; - const { metadata, links } = parseFile(source, { permalinks }); + const { metadata, links } = parseMarkdownContent(source, { permalinks }); expect(metadata).toEqual(expectedMetadata); expect(links).toEqual(expectedLinks); }); diff --git a/src/utils/parseFile.ts b/src/utils/parseMarkdownContent.ts similarity index 86% rename from src/utils/parseFile.ts rename to src/utils/parseMarkdownContent.ts index 6a2d21a..21095de 100644 --- a/src/utils/parseFile.ts +++ b/src/utils/parseMarkdownContent.ts @@ -1,13 +1,18 @@ import matter from "gray-matter"; import { extractWikiLinks } from "./extractWikiLinks.js"; +import { getUniqueValues } from "./getUniqueValues.js"; -export function parseFile(source: string, options?: { permalinks?: string[] }) { +export function parseMarkdownContent( + source: string, + options?: { permalinks?: string[] } +) { // Metadata const { data: metadata } = matter(source); // Obsidian style tags i.e. tags: tag1, tag2, tag3 if (metadata.tags && typeof metadata.tags === "string") { metadata.tags = metadata.tags.split(",").map((tag: string) => tag.trim()); + metadata.tags = getUniqueValues(metadata.tags); } // Links diff --git a/src/utils/resolveLinkToUrlPath.ts b/src/utils/resolveLinkToUrlPath.ts new file mode 100644 index 0000000..e6ec20d --- /dev/null +++ b/src/utils/resolveLinkToUrlPath.ts @@ -0,0 +1,14 @@ +import path from "path"; + +export const resolveLinkToUrlPath = (link: string, sourceFilePath?: string) => { + if (!sourceFilePath) { + return link; + } + // needed to make path.resolve work correctly + // becuase we store urls without leading slash + const sourcePath = "/" + sourceFilePath; + const dir = path.dirname(sourcePath); + const resolved = path.resolve(dir, link); + // remove leading slash + return resolved.slice(1); +}; diff --git a/tsconfig.json b/tsconfig.json index 04b2fbc..633a72f 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -13,5 +13,6 @@ { "path": "./tsconfig.spec.json" } - ] + ], + "exclude": ["node_modules", "dist/**/*"] } diff --git a/tsconfig.lib.json b/tsconfig.lib.json index e578407..fc675e5 100644 --- a/tsconfig.lib.json +++ b/tsconfig.lib.json @@ -6,7 +6,7 @@ "strict": true, "strictPropertyInitialization": false, "target": "es2020", - "module": "es2020", + "module": "esnext", "moduleResolution": "node", "esModuleInterop": true, "types": ["node"] diff --git a/tsconfig.spec.json b/tsconfig.spec.json index 7453ec9..cfccbea 100644 --- a/tsconfig.spec.json +++ b/tsconfig.spec.json @@ -15,6 +15,7 @@ "src/**/*.spec.js", "src/**/*.test.jsx", "src/**/*.spec.jsx", - "src/**/*.d.ts" + "src/**/*.d.ts", + "src/lib/processFile.spec.ts" ] }