Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: make the CLI download directly from GitHub #1540

Merged
merged 3 commits into from
Sep 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 48 additions & 8 deletions packages/cli/src/commands/CreateProjectCommand.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import { mkdirSync } from 'node:fs';
import { join } from 'node:path';
import { dirname, join } from 'node:path';
import { execSync } from 'node:child_process';
import type { ArgumentsCamelCase, Argv, CommandModule } from 'yargs';
import { prompt } from 'inquirer';
import colors from 'ansi-colors';
import type { Template } from '@crawlee/templates';
import { fetchManifest } from '@crawlee/templates';
import { copy } from 'fs-extra';
import { readFileSync, writeFileSync } from 'fs';
import { resolve } from 'path';
import { readFile, writeFile } from 'node:fs/promises';
import { get } from 'node:https';
import { ensureDir } from 'fs-extra';

interface CreateProjectArgs {
projectName?: string;
Expand All @@ -20,16 +22,52 @@ function validateProjectName(name: string) {
}
}

function rewrite(path: string, replacer: (from: string) => string): void {
async function rewrite(path: string, replacer: (from: string) => string) {
try {
const file = readFileSync(path).toString();
const file = await readFile(path, 'utf8');
const replaced = replacer(file);
writeFileSync(path, replaced);
await writeFile(path, replaced);
} catch {
// not found
}
}

async function downloadTemplateFilesToDisk(template: Template, destinationDirectory: string) {
const promises: Promise<void>[] = [];

for (const file of template.files) {
const promise = downloadFile(file.url).then(async (buffer) => {
// Make sure the folder for the file exists
const fileDirName = dirname(file.path);
const fileFolder = resolve(destinationDirectory, fileDirName);
await ensureDir(fileFolder);

// Write the actual file
await writeFile(resolve(destinationDirectory, file.path), buffer);
});

promises.push(promise);
}

await Promise.all(promises);
}

async function downloadFile(url: string) {
return new Promise<Buffer>((promiseResolve, reject) => {
get(url, async (res) => {
const bytes: Buffer[] = [];

res.on('error', (err) => reject(err));

for await (const byte of res) {
bytes.push(byte);
}

promiseResolve(Buffer.concat(bytes));
}).on('error', (err) => reject(err));
});
}

export class CreateProjectCommand<T> implements CommandModule<T, CreateProjectArgs> {
command = 'create [project-name]';
describe = 'Creates a new Crawlee project directory from a selected boilerplate template.';
Expand Down Expand Up @@ -105,8 +143,10 @@ export class CreateProjectCommand<T> implements CommandModule<T, CreateProjectAr
throw err;
}

await copy(require.resolve('@crawlee/templates').replace('index.js', `templates/${template}`), projectDir);
rewrite(resolve(projectDir, 'package.json'), (pkg) => pkg.replace(/"name": "[\w-]+"/, `"name": "${projectName}"`));
const templateData = manifest.templates.find((item) => item.name === template)!;

await downloadTemplateFilesToDisk(templateData, projectDir);
await rewrite(resolve(projectDir, 'package.json'), (pkg) => pkg.replace(/"name": "[\w-]+"/, `"name": "${projectName}"`));

// Run npm install in project dir.
const npm = /^win/.test(process.platform) ? 'npm.cmd' : 'npm';
Expand Down
90 changes: 82 additions & 8 deletions packages/templates/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,109 @@
"templates": [
{
"name": "getting-started-ts",
"description": "Getting started example [TypeScript]"
"description": "Getting started example [TypeScript]",
"files": [
"src/main.ts",
".dockerignore",
".gitignore",
"Dockerfile",
"package.json",
"README.md",
"tsconfig.json"
]
},
{
"name": "getting-started-js",
"description": "Getting started example [JavaScript]"
"description": "Getting started example [JavaScript]",
"files": [
"src/main.js",
".dockerignore",
".gitignore",
"Dockerfile",
"package.json",
"README.md"
]
},
{
"name": "cheerio-ts",
"description": "CheerioCrawler template project [TypeScript]"
"description": "CheerioCrawler template project [TypeScript]",
"files": [
"src/main.ts",
"src/routes.ts",
".dockerignore",
".gitignore",
"Dockerfile",
"package.json",
"README.md",
"tsconfig.json"
]
},
{
"name": "playwright-ts",
"description": "PlaywrightCrawler template project [TypeScript]"
"description": "PlaywrightCrawler template project [TypeScript]",
"files": [
"src/main.ts",
"src/routes.ts",
".dockerignore",
".gitignore",
"Dockerfile",
"package.json",
"README.md",
"tsconfig.json"
]
},
{
"name": "puppeteer-ts",
"description": "PuppeteerCrawler template project [TypeScript]"
"description": "PuppeteerCrawler template project [TypeScript]",
"files": [
"src/main.ts",
"src/routes.ts",
".dockerignore",
".gitignore",
"Dockerfile",
"package.json",
"README.md",
"tsconfig.json"
]
},
{
"name": "cheerio-js",
"description": "CheerioCrawler template project [JavaScript]"
"description": "CheerioCrawler template project [JavaScript]",
"files": [
"src/main.js",
"src/routes.js",
".dockerignore",
".gitignore",
"Dockerfile",
"package.json",
"README.md"
]
},
{
"name": "playwright-js",
"description": "PlaywrightCrawler template project [JavaScript]"
"description": "PlaywrightCrawler template project [JavaScript]",
"files": [
"src/main.js",
"src/routes.js",
".dockerignore",
".gitignore",
"Dockerfile",
"package.json",
"README.md"
]
},
{
"name": "puppeteer-js",
"description": "PuppeteerCrawler template project [JavaScript]"
"description": "PuppeteerCrawler template project [JavaScript]",
"files": [
"src/main.js",
"src/routes.js",
".dockerignore",
".gitignore",
"Dockerfile",
"package.json",
"README.md"
]
}
]
}
6 changes: 3 additions & 3 deletions packages/templates/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@
},
"homepage": "https://crawlee.dev",
"scripts": {
"build": "npm run clean && npm run compile && npm run copy-templates && npm run copy",
"build": "npm run clean && npm run validate && npm run compile && npm run copy",
"clean": "rimraf ./dist",
"compile": "tsc -p tsconfig.build.json && gen-esm-wrapper ./dist/index.js ./dist/index.mjs",
"copy-templates": "node ./scripts/copy-templates.mjs",
"copy": "ts-node -T ../../scripts/copy.ts"
"copy": "ts-node -T ../../scripts/copy.ts",
"validate": "node ./scripts/validate-manifest.mjs"
},
"publishConfig": {
"access": "public"
Expand Down
18 changes: 0 additions & 18 deletions packages/templates/scripts/copy-templates.mjs

This file was deleted.

49 changes: 49 additions & 0 deletions packages/templates/scripts/validate-manifest.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import { readFile, readdir, access } from 'node:fs/promises';
import { URL } from 'node:url';

const colors = {
red: (text) => `\x1B[31m${text}\x1B[39m`,
green: (text) => `\x1B[32m${text}\x1B[39m`,
grey: (text) => `\x1B[90m${text}\x1B[39m`,
yellow: (text) => `\x1B[33m${text}\x1B[39m`,
};

const templatesDirectory = new URL('../templates/', import.meta.url);
const templateNames = await readdir(templatesDirectory);
/** @type {{ templates: Array<{ name: string; description: string; files: string[] }>; }} */
const manifest = JSON.parse(await readFile(new URL('../manifest.json', import.meta.url), 'utf8'));

console.log(`Validating ${colors.green(manifest.templates.length)} templates`);

let hasError = false;

for (const manifestTemplate of manifest.templates) {
// Check if the folder it points to actually exists
if (!templateNames.includes(manifestTemplate.name)) {
console.error(colors.red(`Failed to find folder for template called ${colors.yellow(manifestTemplate.name)}`));
hasError = true;
// Skipping the rest of the validation as the template is missing
continue;
}

console.log(colors.grey(`Validating template ${colors.yellow(manifestTemplate.name)}`));

// Check that all files it requires exist
for (const requiredFile of manifestTemplate.files) {
try {
await access(new URL(`./${manifestTemplate.name}/${requiredFile}`, templatesDirectory));
} catch (err) {
if (err.code === 'ENOENT') {
hasError = true;
console.error(`${colors.grey(`[${colors.yellow(manifestTemplate.name)}]:`)} Failed to find file ${colors.yellow(requiredFile)}`);
console.error(err);
} else {
console.warn(`${colors.grey(`[${colors.yellow(manifestTemplate.name)}]:`)} Failed to read file ${colors.yellow(requiredFile)}`, err);
}
}
}

console.log(colors.green(`Finished validating ${colors.yellow(manifestTemplate.name)}`));
}

process.exitCode = hasError ? 1 : 0;
49 changes: 46 additions & 3 deletions packages/templates/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,47 @@ import https from 'https';

export const MANIFEST_URL = 'https://raw.githubusercontent.com/apify/crawlee/master/packages/templates/manifest.json';

function templateFileUrl(templateName: string, path: string) {
return `https://raw.githubusercontent.com/apify/crawlee/master/packages/templates/templates/${templateName}/${path}`;
}

interface SharedTemplateData {
name: string;
description: string;
}

// Data received from the github file
interface RawTemplate extends SharedTemplateData {
files: string[];
}

interface RawManifest {
templates: RawTemplate[];
}

// Data returned for the CLI or users to consume
export interface Manifest {
templates: { name: string; description: string }[];
templates: Template[];
}

export interface Template extends SharedTemplateData {
files: TemplateFile[];
}

export interface TemplateFile {
path: string;
url: string;
}

export async function fetchManifest(): Promise<Manifest> {
return new Promise((resolve, reject) => {
const rawManifest = await new Promise<RawManifest>((resolve, reject) => {
https.get(MANIFEST_URL, (res) => {
let json = '';
res
.on('data', (chunk) => {
json += chunk;
})
.on('end', () => {
.once('end', () => {
if (res.statusCode === 200) {
try {
const data = JSON.parse(json);
Expand All @@ -30,4 +58,19 @@ export async function fetchManifest(): Promise<Manifest> {
})
.on('error', (err) => reject(err));
});

const newTemplates: Template[] = rawManifest.templates.map((original) => {
return {
name: original.name,
description: original.description,
files: original.files.map((file) => ({
path: file,
url: templateFileUrl(original.name, file),
})),
};
});

return {
templates: newTemplates,
};
}