From c95b01c664cd63d89c72ca9c20de1530a8c90ba8 Mon Sep 17 00:00:00 2001
From: Vlad Frangu <kingdgrizzle@gmail.com>
Date: Wed, 14 Sep 2022 17:24:17 +0300
Subject: [PATCH 1/3] feat: make the CLI download directly from GitHub

---
 .../cli/src/commands/CreateProjectCommand.ts  | 58 ++++++++++--
 packages/templates/manifest.json              | 90 +++++++++++++++++--
 packages/templates/package.json               |  6 +-
 packages/templates/scripts/copy-templates.mjs | 18 ----
 .../templates/scripts/validate-manifest.mjs   | 49 ++++++++++
 packages/templates/src/index.ts               | 49 +++++++++-
 6 files changed, 229 insertions(+), 41 deletions(-)
 delete mode 100644 packages/templates/scripts/copy-templates.mjs
 create mode 100644 packages/templates/scripts/validate-manifest.mjs

diff --git a/packages/cli/src/commands/CreateProjectCommand.ts b/packages/cli/src/commands/CreateProjectCommand.ts
index f4c626616d73..364e5ec64e71 100644
--- a/packages/cli/src/commands/CreateProjectCommand.ts
+++ b/packages/cli/src/commands/CreateProjectCommand.ts
@@ -1,13 +1,15 @@
 import { mkdirSync } from 'node:fs';
-import { join } from 'node:path';
+import { dirname, join } from 'node:path';
 import { execSync } from 'node:child_process';
 import type { ArgumentsCamelCase, Argv, CommandModule } from 'yargs';
 import { prompt } from 'inquirer';
 import colors from 'ansi-colors';
+import type { Template } from '@crawlee/templates';
 import { fetchManifest } from '@crawlee/templates';
-import { copy } from 'fs-extra';
-import { readFileSync, writeFileSync } from 'fs';
 import { resolve } from 'path';
+import { readFile, writeFile } from 'node:fs/promises';
+import { get } from 'node:https';
+import { ensureDir } from 'fs-extra';
 
 interface CreateProjectArgs {
     projectName?: string;
@@ -20,11 +22,11 @@ function validateProjectName(name: string) {
     }
 }
 
-function rewrite(path: string, replacer: (from: string) => string): void {
+async function rewrite(path: string, replacer: (from: string) => string) {
     try {
-        const file = readFileSync(path).toString();
+        const file = await readFile(path, 'utf8');
         const replaced = replacer(file);
-        writeFileSync(path, replaced);
+        await writeFile(path, replaced);
     } catch {
         // not found
     }
@@ -105,14 +107,52 @@ export class CreateProjectCommand<T> implements CommandModule<T, CreateProjectAr
             throw err;
         }
 
-        await copy(require.resolve('@crawlee/templates').replace('index.js', `templates/${template}`), projectDir);
-        rewrite(resolve(projectDir, 'package.json'), (pkg) => pkg.replace(/"name": "[\w-]+"/, `"name": "${projectName}"`));
+        const templateData = manifest.templates.find((item) => item.name === template)!;
 
-        // Run npm install in project dir.
+        await this.downloadTemplateFilesToDisk(templateData, projectDir);
+        await rewrite(resolve(projectDir, 'package.json'), (pkg) => pkg.replace(/"name": "[\w-]+"/, `"name": "${projectName}"`));
+
+        // // Run npm install in project dir.
         const npm = /^win/.test(process.platform) ? 'npm.cmd' : 'npm';
         execSync(`${npm} install`, { cwd: projectDir, stdio: 'inherit' });
 
         // eslint-disable-next-line no-console
         console.log(colors.green(`Project ${projectName} was created. To run it, run "cd ${projectName}" and "npm start".`));
     }
+
+    private async downloadTemplateFilesToDisk(template: Template, destinationDirectory: string) {
+        const promises: Promise<void>[] = [];
+
+        for (const file of template.files) {
+            const promise = this.downloadFile(file.url).then(async (buffer) => {
+                // Make sure the folder for the file exists
+                const fileDirName = dirname(file.path);
+                const fileFolder = resolve(destinationDirectory, fileDirName);
+                await ensureDir(fileFolder);
+
+                // Write the actual file
+                await writeFile(resolve(destinationDirectory, file.path), buffer);
+            });
+
+            promises.push(promise);
+        }
+
+        await Promise.all(promises);
+    }
+
+    private downloadFile(url: string) {
+        return new Promise<Buffer>((promiseResolve, reject) => {
+            get(url, async (res) => {
+                const bytes: Buffer[] = [];
+
+                res.on('error', (err) => reject(err));
+
+                for await (const byte of res) {
+                    bytes.push(byte);
+                }
+
+                promiseResolve(Buffer.concat(bytes));
+            }).on('error', (err) => reject(err));
+        });
+    }
 }
diff --git a/packages/templates/manifest.json b/packages/templates/manifest.json
index 2a1f81744971..710a9be350b9 100644
--- a/packages/templates/manifest.json
+++ b/packages/templates/manifest.json
@@ -2,35 +2,109 @@
     "templates": [
         {
             "name": "getting-started-ts",
-            "description": "Getting started example [TypeScript]"
+            "description": "Getting started example [TypeScript]",
+            "files": [
+                "src/main.ts",
+                ".dockerignore",
+                ".gitignore",
+                "Dockerfile",
+                "package.json",
+                "README.md",
+                "tsconfig.json"
+            ]
         },
         {
             "name": "getting-started-js",
-            "description": "Getting started example [JavaScript]"
+            "description": "Getting started example [JavaScript]",
+            "files": [
+                "src/main.js",
+                ".dockerignore",
+                ".gitignore",
+                "Dockerfile",
+                "package.json",
+                "README.md"
+            ]
         },
         {
             "name": "cheerio-ts",
-            "description": "CheerioCrawler template project [TypeScript]"
+            "description": "CheerioCrawler template project [TypeScript]",
+            "files": [
+                "src/main.ts",
+                "src/routes.ts",
+                ".dockerignore",
+                ".gitignore",
+                "Dockerfile",
+                "package.json",
+                "README.md",
+                "tsconfig.json"
+            ]
         },
         {
             "name": "playwright-ts",
-            "description": "PlaywrightCrawler template project [TypeScript]"
+            "description": "PlaywrightCrawler template project [TypeScript]",
+            "files": [
+                "src/main.ts",
+                "src/routes.ts",
+                ".dockerignore",
+                ".gitignore",
+                "Dockerfile",
+                "package.json",
+                "README.md",
+                "tsconfig.json"
+            ]
         },
         {
             "name": "puppeteer-ts",
-            "description": "PuppeteerCrawler template project [TypeScript]"
+            "description": "PuppeteerCrawler template project [TypeScript]",
+            "files": [
+                "src/main.ts",
+                "src/routes.ts",
+                ".dockerignore",
+                ".gitignore",
+                "Dockerfile",
+                "package.json",
+                "README.md",
+                "tsconfig.json"
+            ]
         },
         {
             "name": "cheerio-js",
-            "description": "CheerioCrawler template project [JavaScript]"
+            "description": "CheerioCrawler template project [JavaScript]",
+            "files": [
+                "src/main.js",
+                "src/routes.js",
+                ".dockerignore",
+                ".gitignore",
+                "Dockerfile",
+                "package.json",
+                "README.md"
+            ]
         },
         {
             "name": "playwright-js",
-            "description": "PlaywrightCrawler template project [JavaScript]"
+            "description": "PlaywrightCrawler template project [JavaScript]",
+            "files": [
+                "src/main.js",
+                "src/routes.js",
+                ".dockerignore",
+                ".gitignore",
+                "Dockerfile",
+                "package.json",
+                "README.md"
+            ]
         },
         {
             "name": "puppeteer-js",
-            "description": "PuppeteerCrawler template project [JavaScript]"
+            "description": "PuppeteerCrawler template project [JavaScript]",
+            "files": [
+                "src/main.js",
+                "src/routes.js",
+                ".dockerignore",
+                ".gitignore",
+                "Dockerfile",
+                "package.json",
+                "README.md"
+            ]
         }
     ]
 }
diff --git a/packages/templates/package.json b/packages/templates/package.json
index 8af50029d00c..3e818a598531 100644
--- a/packages/templates/package.json
+++ b/packages/templates/package.json
@@ -39,11 +39,11 @@
     },
     "homepage": "https://crawlee.dev",
     "scripts": {
-        "build": "npm run clean && npm run compile && npm run copy-templates && npm run copy",
+        "build": "npm run clean && npm run validate && npm run compile && npm run copy",
         "clean": "rimraf ./dist",
         "compile": "tsc -p tsconfig.build.json && gen-esm-wrapper ./dist/index.js ./dist/index.mjs",
-        "copy-templates": "node ./scripts/copy-templates.mjs",
-        "copy": "ts-node -T ../../scripts/copy.ts"
+        "copy": "ts-node -T ../../scripts/copy.ts",
+        "validate": "node ./scripts/validate-manifest.mjs"
     },
     "publishConfig": {
         "access": "public"
diff --git a/packages/templates/scripts/copy-templates.mjs b/packages/templates/scripts/copy-templates.mjs
deleted file mode 100644
index 298df8a37b28..000000000000
--- a/packages/templates/scripts/copy-templates.mjs
+++ /dev/null
@@ -1,18 +0,0 @@
-import { readdir } from 'node:fs/promises';
-import { copy } from 'fs-extra';
-
-const templates = await readdir('./templates');
-
-await copy('./manifest.json', './dist/manifest.json', { override: true });
-console.info(`Successfully copied 'manifest.json' to dist`);
-
-const ignoreFolders = ['node_modules', 'dist', 'crawlee_storage', 'storage', 'apify_storage', 'package-lock.json'];
-
-for (const tpl of templates) {
-    console.info(tpl);
-    await copy(`./templates/${tpl}`, `./dist/templates/${tpl}`, {
-        override: true,
-        filter: (src) => !ignoreFolders.some(f => src.includes(f)),
-    });
-    console.info(`Successfully copied '${tpl}' template to dist`);
-}
diff --git a/packages/templates/scripts/validate-manifest.mjs b/packages/templates/scripts/validate-manifest.mjs
new file mode 100644
index 000000000000..608ef9c27fa2
--- /dev/null
+++ b/packages/templates/scripts/validate-manifest.mjs
@@ -0,0 +1,49 @@
+import { readFile, readdir, access } from 'node:fs/promises';
+import { URL } from 'node:url';
+
+const colors = {
+    red: (text) => `\x1B[31m${text}\x1B[39m`,
+    green: (text) => `\x1B[32m${text}\x1B[39m`,
+    grey: (text) => `\x1B[90m${text}\x1B[39m`,
+    yellow: (text) => `\x1B[33m${text}\x1B[39m`,
+};
+
+const templatesDirectory = new URL('../templates/', import.meta.url);
+const templateNames = await readdir(templatesDirectory);
+/** @type {{ templates: Array<{ name: string; description: string; files: string[] }>; }} */
+const manifest = JSON.parse(await readFile(new URL('../manifest.json', import.meta.url), 'utf8'));
+
+console.log(`Validating ${colors.green(manifest.templates.length)} templates`);
+
+let hasError = false;
+
+for (const manifestTemplate of manifest.templates) {
+    // Check if the folder it points to actually exists
+    if (!templateNames.includes(manifestTemplate.name)) {
+        console.error(colors.red(`Failed to find folder for template called ${colors.yellow(manifestTemplate.name)}`));
+        hasError = true;
+        // Skipping the rest of the validation as the template is missing
+        continue;
+    }
+
+    console.log(colors.grey(`Validating template ${colors.yellow(manifestTemplate.name)}`));
+
+    // Check that all files it requires exist
+    for (const requiredFile of manifestTemplate.files) {
+        try {
+            await access(new URL(`./${manifestTemplate.name}/${requiredFile}`, templatesDirectory));
+        } catch (err) {
+            if (err.code === 'ENOENT') {
+                hasError = true;
+                console.error(`${colors.grey(`[${colors.yellow(manifestTemplate.name)}]:`)} Failed to find file ${colors.yellow(requiredFile)}`);
+                console.error(err);
+            } else {
+                console.warn(`${colors.grey(`[${colors.yellow(manifestTemplate.name)}]:`)} Failed to read file ${colors.yellow(requiredFile)}`, err);
+            }
+        }
+    }
+
+    console.log(colors.green(`Finished validating ${colors.yellow(manifestTemplate.name)}`));
+}
+
+process.exitCode = hasError ? 1 : 0;
diff --git a/packages/templates/src/index.ts b/packages/templates/src/index.ts
index 44605e330dcf..5287f16a069b 100644
--- a/packages/templates/src/index.ts
+++ b/packages/templates/src/index.ts
@@ -2,19 +2,47 @@ import https from 'https';
 
 export const MANIFEST_URL = 'https://raw.githubusercontent.com/apify/crawlee/master/packages/templates/manifest.json';
 
+function templateFileUrl(templateName: string, path: string) {
+    return `https://raw.githubusercontent.com/apify/crawlee/master/packages/templates/templates/${templateName}/${path}`;
+}
+
+interface SharedTemplateData {
+    name: string;
+    description: string;
+}
+
+// Data received from the github file
+interface RawTemplate extends SharedTemplateData {
+    files: string[];
+}
+
+interface RawManifest {
+    templates: RawTemplate[];
+}
+
+// Data returned for the CLI or users to consume
 export interface Manifest {
-    templates: { name: string; description: string }[];
+    templates: Template[];
+}
+
+export interface Template extends SharedTemplateData {
+    files: TemplateFile[];
+}
+
+export interface TemplateFile {
+    path: string;
+    url: string;
 }
 
 export async function fetchManifest(): Promise<Manifest> {
-    return new Promise((resolve, reject) => {
+    const rawManifest = await new Promise<RawManifest>((resolve, reject) => {
         https.get(MANIFEST_URL, (res) => {
             let json = '';
             res
                 .on('data', (chunk) => {
                     json += chunk;
                 })
-                .on('end', () => {
+                .once('end', () => {
                     if (res.statusCode === 200) {
                         try {
                             const data = JSON.parse(json);
@@ -30,4 +58,19 @@ export async function fetchManifest(): Promise<Manifest> {
         })
             .on('error', (err) => reject(err));
     });
+
+    const newTemplates: Template[] = rawManifest.templates.map((original) => {
+        return {
+            name: original.name,
+            description: original.description,
+            files: original.files.map((file) => ({
+                path: file,
+                url: templateFileUrl(original.name, file),
+            })),
+        };
+    });
+
+    return {
+        templates: newTemplates,
+    };
 }

From 01299459eedb1b92c61594a05357afab5586e876 Mon Sep 17 00:00:00 2001
From: Vlad Frangu <kingdgrizzle@gmail.com>
Date: Wed, 14 Sep 2022 17:28:33 +0300
Subject: [PATCH 2/3] fix: methods need to go outside the class

---
 .../cli/src/commands/CreateProjectCommand.ts  | 74 +++++++++----------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/packages/cli/src/commands/CreateProjectCommand.ts b/packages/cli/src/commands/CreateProjectCommand.ts
index 364e5ec64e71..4b2338051a01 100644
--- a/packages/cli/src/commands/CreateProjectCommand.ts
+++ b/packages/cli/src/commands/CreateProjectCommand.ts
@@ -32,6 +32,42 @@ async function rewrite(path: string, replacer: (from: string) => string) {
     }
 }
 
+async function downloadTemplateFilesToDisk(template: Template, destinationDirectory: string) {
+    const promises: Promise<void>[] = [];
+
+    for (const file of template.files) {
+        const promise = downloadFile(file.url).then(async (buffer) => {
+            // Make sure the folder for the file exists
+            const fileDirName = dirname(file.path);
+            const fileFolder = resolve(destinationDirectory, fileDirName);
+            await ensureDir(fileFolder);
+
+            // Write the actual file
+            await writeFile(resolve(destinationDirectory, file.path), buffer);
+        });
+
+        promises.push(promise);
+    }
+
+    await Promise.all(promises);
+}
+
+async function downloadFile(url: string) {
+    return new Promise<Buffer>((promiseResolve, reject) => {
+        get(url, async (res) => {
+            const bytes: Buffer[] = [];
+
+            res.on('error', (err) => reject(err));
+
+            for await (const byte of res) {
+                bytes.push(byte);
+            }
+
+            promiseResolve(Buffer.concat(bytes));
+        }).on('error', (err) => reject(err));
+    });
+}
+
 export class CreateProjectCommand<T> implements CommandModule<T, CreateProjectArgs> {
     command = 'create [project-name]';
     describe = 'Creates a new Crawlee project directory from a selected boilerplate template.';
@@ -109,7 +145,7 @@ export class CreateProjectCommand<T> implements CommandModule<T, CreateProjectAr
 
         const templateData = manifest.templates.find((item) => item.name === template)!;
 
-        await this.downloadTemplateFilesToDisk(templateData, projectDir);
+        await downloadTemplateFilesToDisk(templateData, projectDir);
         await rewrite(resolve(projectDir, 'package.json'), (pkg) => pkg.replace(/"name": "[\w-]+"/, `"name": "${projectName}"`));
 
         // // Run npm install in project dir.
@@ -119,40 +155,4 @@ export class CreateProjectCommand<T> implements CommandModule<T, CreateProjectAr
         // eslint-disable-next-line no-console
         console.log(colors.green(`Project ${projectName} was created. To run it, run "cd ${projectName}" and "npm start".`));
     }
-
-    private async downloadTemplateFilesToDisk(template: Template, destinationDirectory: string) {
-        const promises: Promise<void>[] = [];
-
-        for (const file of template.files) {
-            const promise = this.downloadFile(file.url).then(async (buffer) => {
-                // Make sure the folder for the file exists
-                const fileDirName = dirname(file.path);
-                const fileFolder = resolve(destinationDirectory, fileDirName);
-                await ensureDir(fileFolder);
-
-                // Write the actual file
-                await writeFile(resolve(destinationDirectory, file.path), buffer);
-            });
-
-            promises.push(promise);
-        }
-
-        await Promise.all(promises);
-    }
-
-    private downloadFile(url: string) {
-        return new Promise<Buffer>((promiseResolve, reject) => {
-            get(url, async (res) => {
-                const bytes: Buffer[] = [];
-
-                res.on('error', (err) => reject(err));
-
-                for await (const byte of res) {
-                    bytes.push(byte);
-                }
-
-                promiseResolve(Buffer.concat(bytes));
-            }).on('error', (err) => reject(err));
-        });
-    }
 }

From 469804b8af574deffb07f94d1dacb1096c8feea6 Mon Sep 17 00:00:00 2001
From: Vlad Frangu <kingdgrizzle@gmail.com>
Date: Wed, 14 Sep 2022 17:40:58 +0300
Subject: [PATCH 3/3] chore: fix accidental double // comment

---
 packages/cli/src/commands/CreateProjectCommand.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/cli/src/commands/CreateProjectCommand.ts b/packages/cli/src/commands/CreateProjectCommand.ts
index 4b2338051a01..b87e9ef1db56 100644
--- a/packages/cli/src/commands/CreateProjectCommand.ts
+++ b/packages/cli/src/commands/CreateProjectCommand.ts
@@ -148,7 +148,7 @@ export class CreateProjectCommand<T> implements CommandModule<T, CreateProjectAr
         await downloadTemplateFilesToDisk(templateData, projectDir);
         await rewrite(resolve(projectDir, 'package.json'), (pkg) => pkg.replace(/"name": "[\w-]+"/, `"name": "${projectName}"`));
 
-        // // Run npm install in project dir.
+        // Run npm install in project dir.
         const npm = /^win/.test(process.platform) ? 'npm.cmd' : 'npm';
         execSync(`${npm} install`, { cwd: projectDir, stdio: 'inherit' });