From c95b01c664cd63d89c72ca9c20de1530a8c90ba8 Mon Sep 17 00:00:00 2001 From: Vlad Frangu <kingdgrizzle@gmail.com> Date: Wed, 14 Sep 2022 17:24:17 +0300 Subject: [PATCH 1/3] feat: make the CLI download directly from GitHub --- .../cli/src/commands/CreateProjectCommand.ts | 58 ++++++++++-- packages/templates/manifest.json | 90 +++++++++++++++++-- packages/templates/package.json | 6 +- packages/templates/scripts/copy-templates.mjs | 18 ---- .../templates/scripts/validate-manifest.mjs | 49 ++++++++++ packages/templates/src/index.ts | 49 +++++++++- 6 files changed, 229 insertions(+), 41 deletions(-) delete mode 100644 packages/templates/scripts/copy-templates.mjs create mode 100644 packages/templates/scripts/validate-manifest.mjs diff --git a/packages/cli/src/commands/CreateProjectCommand.ts b/packages/cli/src/commands/CreateProjectCommand.ts index f4c626616d73..364e5ec64e71 100644 --- a/packages/cli/src/commands/CreateProjectCommand.ts +++ b/packages/cli/src/commands/CreateProjectCommand.ts @@ -1,13 +1,15 @@ import { mkdirSync } from 'node:fs'; -import { join } from 'node:path'; +import { dirname, join } from 'node:path'; import { execSync } from 'node:child_process'; import type { ArgumentsCamelCase, Argv, CommandModule } from 'yargs'; import { prompt } from 'inquirer'; import colors from 'ansi-colors'; +import type { Template } from '@crawlee/templates'; import { fetchManifest } from '@crawlee/templates'; -import { copy } from 'fs-extra'; -import { readFileSync, writeFileSync } from 'fs'; import { resolve } from 'path'; +import { readFile, writeFile } from 'node:fs/promises'; +import { get } from 'node:https'; +import { ensureDir } from 'fs-extra'; interface CreateProjectArgs { projectName?: string; @@ -20,11 +22,11 @@ function validateProjectName(name: string) { } } -function rewrite(path: string, replacer: (from: string) => string): void { +async function rewrite(path: string, replacer: (from: string) => string) { try { - const file = readFileSync(path).toString(); + const file = await readFile(path, 'utf8'); const replaced = replacer(file); - writeFileSync(path, replaced); + await writeFile(path, replaced); } catch { // not found } @@ -105,14 +107,52 @@ export class CreateProjectCommand<T> implements CommandModule<T, CreateProjectAr throw err; } - await copy(require.resolve('@crawlee/templates').replace('index.js', `templates/${template}`), projectDir); - rewrite(resolve(projectDir, 'package.json'), (pkg) => pkg.replace(/"name": "[\w-]+"/, `"name": "${projectName}"`)); + const templateData = manifest.templates.find((item) => item.name === template)!; - // Run npm install in project dir. + await this.downloadTemplateFilesToDisk(templateData, projectDir); + await rewrite(resolve(projectDir, 'package.json'), (pkg) => pkg.replace(/"name": "[\w-]+"/, `"name": "${projectName}"`)); + + // // Run npm install in project dir. const npm = /^win/.test(process.platform) ? 'npm.cmd' : 'npm'; execSync(`${npm} install`, { cwd: projectDir, stdio: 'inherit' }); // eslint-disable-next-line no-console console.log(colors.green(`Project ${projectName} was created. To run it, run "cd ${projectName}" and "npm start".`)); } + + private async downloadTemplateFilesToDisk(template: Template, destinationDirectory: string) { + const promises: Promise<void>[] = []; + + for (const file of template.files) { + const promise = this.downloadFile(file.url).then(async (buffer) => { + // Make sure the folder for the file exists + const fileDirName = dirname(file.path); + const fileFolder = resolve(destinationDirectory, fileDirName); + await ensureDir(fileFolder); + + // Write the actual file + await writeFile(resolve(destinationDirectory, file.path), buffer); + }); + + promises.push(promise); + } + + await Promise.all(promises); + } + + private downloadFile(url: string) { + return new Promise<Buffer>((promiseResolve, reject) => { + get(url, async (res) => { + const bytes: Buffer[] = []; + + res.on('error', (err) => reject(err)); + + for await (const byte of res) { + bytes.push(byte); + } + + promiseResolve(Buffer.concat(bytes)); + }).on('error', (err) => reject(err)); + }); + } } diff --git a/packages/templates/manifest.json b/packages/templates/manifest.json index 2a1f81744971..710a9be350b9 100644 --- a/packages/templates/manifest.json +++ b/packages/templates/manifest.json @@ -2,35 +2,109 @@ "templates": [ { "name": "getting-started-ts", - "description": "Getting started example [TypeScript]" + "description": "Getting started example [TypeScript]", + "files": [ + "src/main.ts", + ".dockerignore", + ".gitignore", + "Dockerfile", + "package.json", + "README.md", + "tsconfig.json" + ] }, { "name": "getting-started-js", - "description": "Getting started example [JavaScript]" + "description": "Getting started example [JavaScript]", + "files": [ + "src/main.js", + ".dockerignore", + ".gitignore", + "Dockerfile", + "package.json", + "README.md" + ] }, { "name": "cheerio-ts", - "description": "CheerioCrawler template project [TypeScript]" + "description": "CheerioCrawler template project [TypeScript]", + "files": [ + "src/main.ts", + "src/routes.ts", + ".dockerignore", + ".gitignore", + "Dockerfile", + "package.json", + "README.md", + "tsconfig.json" + ] }, { "name": "playwright-ts", - "description": "PlaywrightCrawler template project [TypeScript]" + "description": "PlaywrightCrawler template project [TypeScript]", + "files": [ + "src/main.ts", + "src/routes.ts", + ".dockerignore", + ".gitignore", + "Dockerfile", + "package.json", + "README.md", + "tsconfig.json" + ] }, { "name": "puppeteer-ts", - "description": "PuppeteerCrawler template project [TypeScript]" + "description": "PuppeteerCrawler template project [TypeScript]", + "files": [ + "src/main.ts", + "src/routes.ts", + ".dockerignore", + ".gitignore", + "Dockerfile", + "package.json", + "README.md", + "tsconfig.json" + ] }, { "name": "cheerio-js", - "description": "CheerioCrawler template project [JavaScript]" + "description": "CheerioCrawler template project [JavaScript]", + "files": [ + "src/main.js", + "src/routes.js", + ".dockerignore", + ".gitignore", + "Dockerfile", + "package.json", + "README.md" + ] }, { "name": "playwright-js", - "description": "PlaywrightCrawler template project [JavaScript]" + "description": "PlaywrightCrawler template project [JavaScript]", + "files": [ + "src/main.js", + "src/routes.js", + ".dockerignore", + ".gitignore", + "Dockerfile", + "package.json", + "README.md" + ] }, { "name": "puppeteer-js", - "description": "PuppeteerCrawler template project [JavaScript]" + "description": "PuppeteerCrawler template project [JavaScript]", + "files": [ + "src/main.js", + "src/routes.js", + ".dockerignore", + ".gitignore", + "Dockerfile", + "package.json", + "README.md" + ] } ] } diff --git a/packages/templates/package.json b/packages/templates/package.json index 8af50029d00c..3e818a598531 100644 --- a/packages/templates/package.json +++ b/packages/templates/package.json @@ -39,11 +39,11 @@ }, "homepage": "https://crawlee.dev", "scripts": { - "build": "npm run clean && npm run compile && npm run copy-templates && npm run copy", + "build": "npm run clean && npm run validate && npm run compile && npm run copy", "clean": "rimraf ./dist", "compile": "tsc -p tsconfig.build.json && gen-esm-wrapper ./dist/index.js ./dist/index.mjs", - "copy-templates": "node ./scripts/copy-templates.mjs", - "copy": "ts-node -T ../../scripts/copy.ts" + "copy": "ts-node -T ../../scripts/copy.ts", + "validate": "node ./scripts/validate-manifest.mjs" }, "publishConfig": { "access": "public" diff --git a/packages/templates/scripts/copy-templates.mjs b/packages/templates/scripts/copy-templates.mjs deleted file mode 100644 index 298df8a37b28..000000000000 --- a/packages/templates/scripts/copy-templates.mjs +++ /dev/null @@ -1,18 +0,0 @@ -import { readdir } from 'node:fs/promises'; -import { copy } from 'fs-extra'; - -const templates = await readdir('./templates'); - -await copy('./manifest.json', './dist/manifest.json', { override: true }); -console.info(`Successfully copied 'manifest.json' to dist`); - -const ignoreFolders = ['node_modules', 'dist', 'crawlee_storage', 'storage', 'apify_storage', 'package-lock.json']; - -for (const tpl of templates) { - console.info(tpl); - await copy(`./templates/${tpl}`, `./dist/templates/${tpl}`, { - override: true, - filter: (src) => !ignoreFolders.some(f => src.includes(f)), - }); - console.info(`Successfully copied '${tpl}' template to dist`); -} diff --git a/packages/templates/scripts/validate-manifest.mjs b/packages/templates/scripts/validate-manifest.mjs new file mode 100644 index 000000000000..608ef9c27fa2 --- /dev/null +++ b/packages/templates/scripts/validate-manifest.mjs @@ -0,0 +1,49 @@ +import { readFile, readdir, access } from 'node:fs/promises'; +import { URL } from 'node:url'; + +const colors = { + red: (text) => `\x1B[31m${text}\x1B[39m`, + green: (text) => `\x1B[32m${text}\x1B[39m`, + grey: (text) => `\x1B[90m${text}\x1B[39m`, + yellow: (text) => `\x1B[33m${text}\x1B[39m`, +}; + +const templatesDirectory = new URL('../templates/', import.meta.url); +const templateNames = await readdir(templatesDirectory); +/** @type {{ templates: Array<{ name: string; description: string; files: string[] }>; }} */ +const manifest = JSON.parse(await readFile(new URL('../manifest.json', import.meta.url), 'utf8')); + +console.log(`Validating ${colors.green(manifest.templates.length)} templates`); + +let hasError = false; + +for (const manifestTemplate of manifest.templates) { + // Check if the folder it points to actually exists + if (!templateNames.includes(manifestTemplate.name)) { + console.error(colors.red(`Failed to find folder for template called ${colors.yellow(manifestTemplate.name)}`)); + hasError = true; + // Skipping the rest of the validation as the template is missing + continue; + } + + console.log(colors.grey(`Validating template ${colors.yellow(manifestTemplate.name)}`)); + + // Check that all files it requires exist + for (const requiredFile of manifestTemplate.files) { + try { + await access(new URL(`./${manifestTemplate.name}/${requiredFile}`, templatesDirectory)); + } catch (err) { + if (err.code === 'ENOENT') { + hasError = true; + console.error(`${colors.grey(`[${colors.yellow(manifestTemplate.name)}]:`)} Failed to find file ${colors.yellow(requiredFile)}`); + console.error(err); + } else { + console.warn(`${colors.grey(`[${colors.yellow(manifestTemplate.name)}]:`)} Failed to read file ${colors.yellow(requiredFile)}`, err); + } + } + } + + console.log(colors.green(`Finished validating ${colors.yellow(manifestTemplate.name)}`)); +} + +process.exitCode = hasError ? 1 : 0; diff --git a/packages/templates/src/index.ts b/packages/templates/src/index.ts index 44605e330dcf..5287f16a069b 100644 --- a/packages/templates/src/index.ts +++ b/packages/templates/src/index.ts @@ -2,19 +2,47 @@ import https from 'https'; export const MANIFEST_URL = 'https://raw.githubusercontent.com/apify/crawlee/master/packages/templates/manifest.json'; +function templateFileUrl(templateName: string, path: string) { + return `https://raw.githubusercontent.com/apify/crawlee/master/packages/templates/templates/${templateName}/${path}`; +} + +interface SharedTemplateData { + name: string; + description: string; +} + +// Data received from the github file +interface RawTemplate extends SharedTemplateData { + files: string[]; +} + +interface RawManifest { + templates: RawTemplate[]; +} + +// Data returned for the CLI or users to consume export interface Manifest { - templates: { name: string; description: string }[]; + templates: Template[]; +} + +export interface Template extends SharedTemplateData { + files: TemplateFile[]; +} + +export interface TemplateFile { + path: string; + url: string; } export async function fetchManifest(): Promise<Manifest> { - return new Promise((resolve, reject) => { + const rawManifest = await new Promise<RawManifest>((resolve, reject) => { https.get(MANIFEST_URL, (res) => { let json = ''; res .on('data', (chunk) => { json += chunk; }) - .on('end', () => { + .once('end', () => { if (res.statusCode === 200) { try { const data = JSON.parse(json); @@ -30,4 +58,19 @@ export async function fetchManifest(): Promise<Manifest> { }) .on('error', (err) => reject(err)); }); + + const newTemplates: Template[] = rawManifest.templates.map((original) => { + return { + name: original.name, + description: original.description, + files: original.files.map((file) => ({ + path: file, + url: templateFileUrl(original.name, file), + })), + }; + }); + + return { + templates: newTemplates, + }; } From 01299459eedb1b92c61594a05357afab5586e876 Mon Sep 17 00:00:00 2001 From: Vlad Frangu <kingdgrizzle@gmail.com> Date: Wed, 14 Sep 2022 17:28:33 +0300 Subject: [PATCH 2/3] fix: methods need to go outside the class --- .../cli/src/commands/CreateProjectCommand.ts | 74 +++++++++---------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/packages/cli/src/commands/CreateProjectCommand.ts b/packages/cli/src/commands/CreateProjectCommand.ts index 364e5ec64e71..4b2338051a01 100644 --- a/packages/cli/src/commands/CreateProjectCommand.ts +++ b/packages/cli/src/commands/CreateProjectCommand.ts @@ -32,6 +32,42 @@ async function rewrite(path: string, replacer: (from: string) => string) { } } +async function downloadTemplateFilesToDisk(template: Template, destinationDirectory: string) { + const promises: Promise<void>[] = []; + + for (const file of template.files) { + const promise = downloadFile(file.url).then(async (buffer) => { + // Make sure the folder for the file exists + const fileDirName = dirname(file.path); + const fileFolder = resolve(destinationDirectory, fileDirName); + await ensureDir(fileFolder); + + // Write the actual file + await writeFile(resolve(destinationDirectory, file.path), buffer); + }); + + promises.push(promise); + } + + await Promise.all(promises); +} + +async function downloadFile(url: string) { + return new Promise<Buffer>((promiseResolve, reject) => { + get(url, async (res) => { + const bytes: Buffer[] = []; + + res.on('error', (err) => reject(err)); + + for await (const byte of res) { + bytes.push(byte); + } + + promiseResolve(Buffer.concat(bytes)); + }).on('error', (err) => reject(err)); + }); +} + export class CreateProjectCommand<T> implements CommandModule<T, CreateProjectArgs> { command = 'create [project-name]'; describe = 'Creates a new Crawlee project directory from a selected boilerplate template.'; @@ -109,7 +145,7 @@ export class CreateProjectCommand<T> implements CommandModule<T, CreateProjectAr const templateData = manifest.templates.find((item) => item.name === template)!; - await this.downloadTemplateFilesToDisk(templateData, projectDir); + await downloadTemplateFilesToDisk(templateData, projectDir); await rewrite(resolve(projectDir, 'package.json'), (pkg) => pkg.replace(/"name": "[\w-]+"/, `"name": "${projectName}"`)); // // Run npm install in project dir. @@ -119,40 +155,4 @@ export class CreateProjectCommand<T> implements CommandModule<T, CreateProjectAr // eslint-disable-next-line no-console console.log(colors.green(`Project ${projectName} was created. To run it, run "cd ${projectName}" and "npm start".`)); } - - private async downloadTemplateFilesToDisk(template: Template, destinationDirectory: string) { - const promises: Promise<void>[] = []; - - for (const file of template.files) { - const promise = this.downloadFile(file.url).then(async (buffer) => { - // Make sure the folder for the file exists - const fileDirName = dirname(file.path); - const fileFolder = resolve(destinationDirectory, fileDirName); - await ensureDir(fileFolder); - - // Write the actual file - await writeFile(resolve(destinationDirectory, file.path), buffer); - }); - - promises.push(promise); - } - - await Promise.all(promises); - } - - private downloadFile(url: string) { - return new Promise<Buffer>((promiseResolve, reject) => { - get(url, async (res) => { - const bytes: Buffer[] = []; - - res.on('error', (err) => reject(err)); - - for await (const byte of res) { - bytes.push(byte); - } - - promiseResolve(Buffer.concat(bytes)); - }).on('error', (err) => reject(err)); - }); - } } From 469804b8af574deffb07f94d1dacb1096c8feea6 Mon Sep 17 00:00:00 2001 From: Vlad Frangu <kingdgrizzle@gmail.com> Date: Wed, 14 Sep 2022 17:40:58 +0300 Subject: [PATCH 3/3] chore: fix accidental double // comment --- packages/cli/src/commands/CreateProjectCommand.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cli/src/commands/CreateProjectCommand.ts b/packages/cli/src/commands/CreateProjectCommand.ts index 4b2338051a01..b87e9ef1db56 100644 --- a/packages/cli/src/commands/CreateProjectCommand.ts +++ b/packages/cli/src/commands/CreateProjectCommand.ts @@ -148,7 +148,7 @@ export class CreateProjectCommand<T> implements CommandModule<T, CreateProjectAr await downloadTemplateFilesToDisk(templateData, projectDir); await rewrite(resolve(projectDir, 'package.json'), (pkg) => pkg.replace(/"name": "[\w-]+"/, `"name": "${projectName}"`)); - // // Run npm install in project dir. + // Run npm install in project dir. const npm = /^win/.test(process.platform) ? 'npm.cmd' : 'npm'; execSync(`${npm} install`, { cwd: projectDir, stdio: 'inherit' });