Skip to content

Commit

Permalink
Add option fileContent to bypass read ops
Browse files Browse the repository at this point in the history
Allows file content to be provided manually instead of reading from a folder on disk.

Resolves #9 and #10
  • Loading branch information
Nixinova committed Feb 12, 2022
1 parent c4fc99e commit 205c4ef
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 14 deletions.
3 changes: 3 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## Next
- Added option `fileContent` to provide manual file content to analyse instead of reading a folder from disk.

## 2.2.1
*2022-02-11*
- Fixed files marked as `text=auto` being classified as text.
Expand Down
3 changes: 3 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ Running Linguist on this folder will return the following JSON:
### Notes

- File paths in the output use only forward slashes as delimiters, even on Windows.
- This tool does not work when offline.
- Do not rely on any language classification output from Linguist being unchanged between runs.
Language data is fetched each run from the latest classifications of [`github-linguist`](https://github.com/github/linguist).
This data is subject to change at any time and may change the results of a run even when using the same version of Linguist.
Expand All @@ -99,6 +100,8 @@ const { files, languages, unknown } = linguist(folder, options);
Analyse multiple folders using the syntax `"{folder1,folder2,...}"`.
- `opts` (optional; object):
An object containing analyser options.
- `fileContent` (string or string array):
Provides the file content associated with the file name(s) given as `entry` to analyse instead of reading from a folder on disk.
- `ignoredFiles` (string array):
A list of file path globs to explicitly ignore.
- `ignoredLanguages` (string array):
Expand Down
2 changes: 1 addition & 1 deletion src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ if (args.analyze) (async () => {
const data = await linguist(root, args);
const { files, languages, unknown } = data;
// Get file count
let totalFiles = walk(root).files.length;
const totalFiles = walk(root).files.length;
// Print output
if (!args.json) {
const sortedEntries = Object.entries(languages.results).sort((a, b) => a[1].bytes < b[1].bytes ? +1 : -1);
Expand Down
36 changes: 26 additions & 10 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,17 @@ async function analyse(input?: string | string[], opts: T.Options = {}): Promise
opts.keepVendored ? [] : vendorData.map(path => RegExp(path)),
opts.ignoredFiles?.map(path => globToRegexp('*' + path + '*', { extended: true })) ?? [],
].flat();
let { files, folders } = walk(input ?? '.', ignoredFiles);

let files, folders;
if (opts.fileContent) {
opts.fileContent = Array.isArray(opts.fileContent) ? opts.fileContent : [opts.fileContent];
files = [`${input}`];
folders = [''];
}
else {
const data = walk(input ?? '.', ignoredFiles);
({ files, folders } = data);
}

// Apply aliases
opts = { checkIgnored: !opts.quick, checkAttributes: !opts.quick, checkHeuristics: !opts.quick, checkShebang: !opts.quick, ...opts };
Expand All @@ -55,7 +65,7 @@ async function analyse(input?: string | string[], opts: T.Options = {}): Promise
const customIgnored: string[] = [];
const customBinary: string[] = [];
const customText: string[] = [];
if (!opts.quick) {
if (!opts.fileContent && !opts.quick) {
for (const folder of folders) {

// Skip if folder is marked in gitattributes
Expand Down Expand Up @@ -102,7 +112,7 @@ async function analyse(input?: string | string[], opts: T.Options = {}): Promise
}
}
// Check vendored files
if (!opts.keepVendored) {
if (!opts.fileContent && !opts.keepVendored) {
// Filter out any files that match a vendor file path
const matcher = (match: string) => RegExp(match.replace(/\/$/, '/.+$').replace(/^\.\//, ''));
files = files.filter(file => !customIgnored.some(pattern => matcher(pattern).test(file)));
Expand All @@ -121,15 +131,21 @@ async function analyse(input?: string | string[], opts: T.Options = {}): Promise
const overridesArray = Object.entries(overrides);
// List all languages that could be associated with a given file
for (const file of files) {
if (!fs.existsSync(file) || fs.lstatSync(file).isDirectory()) continue;
const firstLine = await readFile(file, true).catch(() => null);
let firstLine: string | null;
if (opts.fileContent) {
firstLine = opts.fileContent?.[files.indexOf(file)]?.split('\n')[0] ?? null;
}
else {
if (!fs.existsSync(file) || fs.lstatSync(file).isDirectory()) continue;
firstLine = await readFile(file, true).catch(() => null);
}
// Skip if file is unreadable
if (firstLine === null) continue;
// Check shebang line for explicit classification
if (!opts.quick && opts.checkShebang && firstLine.startsWith('#!')) {
// Find matching interpreters
const matches = Object.entries(langData).filter(([, data]) =>
data.interpreters?.some(interpreter => firstLine.match('\\b' + interpreter + '\\b'))
data.interpreters?.some(interpreter => firstLine!.match('\\b' + interpreter + '\\b'))
);
if (matches.length) {
// Add explicitly-identified language
Expand All @@ -138,7 +154,7 @@ async function analyse(input?: string | string[], opts: T.Options = {}): Promise
}
}
// Check override for manual language classification
if (!opts.quick && opts.checkAttributes) {
if (!opts.fileContent && !opts.quick && opts.checkAttributes) {
const match = overridesArray.find(item => RegExp(item[0]).test(file));
if (match) {
const forcedLang = match[1];
Expand Down Expand Up @@ -167,7 +183,7 @@ async function analyse(input?: string | string[], opts: T.Options = {}): Promise
// Narrow down file associations to the best fit
for (const file in fileAssociations) {
// Skip binary files
if (!opts.keepBinary) {
if (!opts.fileContent && !opts.keepBinary) {
const isCustomText = customText.some(path => RegExp(path).test(file));
const isCustomBinary = customBinary.some(path => RegExp(path).test(file));
const isBinaryExt = binaryData.some(ext => file.endsWith('.' + ext));
Expand Down Expand Up @@ -227,7 +243,7 @@ async function analyse(input?: string | string[], opts: T.Options = {}): Promise
}

// Convert paths to relative
if (opts.relativePaths) {
if (!opts.fileContent && opts.relativePaths) {
const newMap: Record<T.FilePath, T.LanguageResult> = {};
for (const [file, lang] of Object.entries(results.files.results)) {
let relPath = paths.relative(process.cwd(), file).replace(/\\/g, '/');
Expand All @@ -240,7 +256,7 @@ async function analyse(input?: string | string[], opts: T.Options = {}): Promise
// Load language bytes size
for (const [file, lang] of Object.entries(results.files.results)) {
if (lang && !langData[lang]) continue;
const fileSize = fs.statSync(file).size;
const fileSize = opts.fileContent?.[files.indexOf(file)]?.length ?? fs.statSync(file).size;
results.files.bytes += fileSize;
// If no language found, add extension in other section
if (!lang) {
Expand Down
1 change: 1 addition & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export type Bytes = Integer
export type Integer = number

export interface Options {
fileContent?: string | string[]
ignoredFiles?: string[]
ignoredLanguages?: Language[]
categories?: Category[]
Expand Down
18 changes: 15 additions & 3 deletions test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ const fs = require('fs');
const linguist = require('..');
const { updatedDiff } = require('deep-object-diff');

async function test() {
async function testFolder() {
const samplesFolder = __dirname.replace(/\\/g, '/') + '/samples';
const expectedJson = fs.readFileSync(__dirname + '/expected.json', { encoding: 'utf8' });
const expected = JSON.parse(expectedJson.replace(/\*/g, samplesFolder));

const actual = await linguist(samplesFolder);
const diff = updatedDiff(expected, actual);
console.log(JSON.stringify(actual, null, 2));
console.dir(actual, { depth: null });
if (JSON.stringify(diff) === '{}') {
console.info('Results match expected');
}
Expand All @@ -18,4 +18,16 @@ async function test() {
throw new Error('Results differ from expected!');
}
}
test();
testFolder();

async function testRaw() {
const actual = await linguist('test.example', { fileContent: ['#!/usr/bin/env node'] });
console.dir(actual, { depth: null });
if (actual.files.results['test.example'] === 'JavaScript' && actual.languages.bytes === 19) {
console.info('Results match expected');
}
else {
throw new Error('Results differ from expected!');
}
}
testRaw();

0 comments on commit 205c4ef

Please sign in to comment.