Skip to content

Commit

Permalink
Merge branch 'master' of github.com:teknologi-umum/language-detector …
Browse files Browse the repository at this point in the history
…into core/refactor
  • Loading branch information
Reinaldy Rafli committed Aug 20, 2021
2 parents a8a3485 + db35606 commit 391e033
Show file tree
Hide file tree
Showing 17 changed files with 674 additions and 41 deletions.
21 changes: 11 additions & 10 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,16 @@ jobs:
- name: Test & coverage
run: npm run test

# - name: Initialize CodeQL
# uses: github/codeql-action/init@v1
# with:
# languages: javascript
- name: Build with Rollup
run: npm run build

# - name: Perform CodeQL Analysis
# uses: github/codeql-action/analyze@v1

- name: Upload to Codecov
uses: codecov/codecov-action@v1
- name: Initialize CodeQL
uses: github/codeql-action/init@v1
with:
token: ${{ secrets.CODECOV_TOKEN }}
languages: javascript

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v1

- name: Send coverage report to Codecov
uses: codecov/codecov-action@v2
Empty file modified .husky/pre-commit
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"format": "prettier --write --ignore-path .gitignore .",
"prepare": "husky install",
"test:unit": "uvu -r esbuild-register tests \".(test|spec).ts\"",
"test:coverage": "c8 npm run test:unit",
"test:coverage": "c8 --reporter=text --reporter=lcov npm run test:unit",
"test:tdd": "npm run test:unit; watchlist src tests -- npm run test:unit",
"test": "npm run test:coverage",
"build": "rollup -c",
Expand Down
23 changes: 16 additions & 7 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { C } from './languages/c';
import { CPP } from './languages/cpp';
import { CS } from './languages/cs';
import { CSS } from './languages/css';
import { Go } from './languages/go';
import { HTML } from './languages/html';
Expand All @@ -13,6 +14,7 @@ import { Rust } from './languages/rust';
import { SQL } from './languages/sql';
import { nearTop, getPoints } from './points';
import type { LanguagePattern, Options, StatisticOutput } from './types';
import { convert } from './shiki';

/**
* A checker is an object with the following form:
Expand All @@ -37,6 +39,7 @@ import type { LanguagePattern, Options, StatisticOutput } from './types';
const languages: Record<string, LanguagePattern[]> = {
C,
'C++': CPP,
'C#': CS,
CSS,
Go,
HTML,
Expand Down Expand Up @@ -65,8 +68,8 @@ const languages: Record<string, LanguagePattern[]> = {
*/
function detectLang(
snippet: string,
options: Options = { heuristic: true, statistics: false },
): StatisticOutput | string {
options: Options = { heuristic: true, statistics: false, shiki: false },
): StatisticOutput & string {
let linesOfCode = snippet
.replace(/\r\n?/g, '\n')
.replace(/\n{2,}/g, '\n')
Expand Down Expand Up @@ -107,16 +110,22 @@ function detectLang(

const bestResult = results.reduce((a, b) => (a.points >= b.points ? a : b), { points: 0, language: '' });
if (options.statistics) {
const statistics = [];
const statistics: Record<string, number> = {};
for (const result in results) {
statistics.push([results[result].language, results[result].points]);
statistics[results[result].language] = results[result].points;
}

statistics.sort((a, b) => Number(b[1]) - Number(a[1]));
return { detected: bestResult.language, statistics };
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
return {
detected: options.shiki ? convert(bestResult.language) : bestResult.language,
statistics,
};
}

return bestResult.language;
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
return options.shiki ? convert(bestResult.language) : bestResult.language;
}

export type { Options, StatisticOutput };
Expand Down
11 changes: 10 additions & 1 deletion src/languages/c.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,14 @@ export const C: LanguagePattern[] = [
// Avoiding Ruby confusion
{ pattern: /def\s+\w+\s*(\(.+\))?\s*\n/, points: -50 },
{ pattern: /puts\s+("|').+("|')/, points: -1 },
{ pattern: /(public\s*)?class\s*(.*)+(\s)?/, points: -50 },
// Avoiding C# confusion
{ pattern: /Console\.(WriteLine|Write)(\s*)?\(/, points: -50 },
{ pattern: /(using\s)?System(\..*)?(;)?/, points: -50 },
{ pattern: /(public\s)?((partial|static|delegate)\s)?(class\s)/, points: -50 },
{ pattern: /(public|private|protected|internal)/, points: -1 },
{
pattern:
/(new|this\s)?(List|IEnumerable)<(sbyte|byte|short|ushort|int|uint|long|ulong|float|double|decimal|bool|char|string)>/,
points: -50,
},
];
6 changes: 5 additions & 1 deletion src/languages/cpp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export const CPP: LanguagePattern[] = [
// Primitive variable declaration.
{ pattern: /(char|long|int|float|double)\s+\w+\s*=?/, points: 2 },
// #include <whatever.h>
{ pattern: /#include\s*(<|")\w+(\.h)?(>|")/, points: 2, nearTop: true },
{ pattern: /#include\s*(<|")\w+(\.h)?(>|")/, points: 2 },
// using namespace something
{ pattern: /using\s+namespace\s+.+\s*;/, points: 2 },
// template declaration
Expand Down Expand Up @@ -40,4 +40,8 @@ export const CPP: LanguagePattern[] = [
// Avoiding Ruby confusion
{ pattern: /def\s+\w+\s*(\(.+\))?\s*\n/, points: -50 },
{ pattern: /puts\s+("|').+("|')/, points: -1 },
// Avoiding C# confusion
{ pattern: /Console\.(WriteLine|Write)(\s*)?\(/, points: -50 },
{ pattern: /(using\s)?System(\..*)?(;)?/, points: -50 },
{ pattern: /(public|private|protected|internal)\s/, points: -50 },
];
33 changes: 33 additions & 0 deletions src/languages/cs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import type { LanguagePattern } from '../types';

export const CS: LanguagePattern[] = [
{ pattern: /using\sSystem(\..*)?(;)?/, points: 2 },
{ pattern: /Console\.(WriteLine|Write)(\s*)?\(/, points: 2 },
{ pattern: /(public\s)?((partial|static|delegate)\s)?class\s/, points: 2 },
// Modifiers
{ pattern: /(extern|override|sealed|readonly|virtual|volatile)/, points: 2 },
{ pattern: /namespace\s(.*)(\.(.*))?(\s{)?/, points: 2 },
// Regions
{ pattern: /(#region(\s.*)?|#endregion\n)/, points: 2 },
// Functions
{ pattern: /(public|private|protected|internal)\s/, points: 1 },
// Variable declaration
{
pattern:
/(const\s)?(sbyte|byte|short|ushort|int|uint|long|ulong|float|double|decimal|bool|char|string)(\[\])?\s(.*)\s=\s(.*);/,
points: 1,
},
// Lists
{
pattern:
/(new|this\s)?(List|IEnumerable)<(sbyte|byte|short|ushort|int|uint|long|ulong|float|double|decimal|bool|char|string)>/,
points: 2,
},
// Macro
{ pattern: /#define\s(.*)/, points: 1 },
// Plus point if you're doing PascalCase
{ pattern: /\s([A-Z]([A-Z0-9]*[a-z][a-z0-9]*[A-Z]|[a-z0-9]*[A-Z][A-Z0-9]*[a-z])[A-Za-z0-9]*)\s=/, points: 1 },
// Avoiding Java confusion
{ pattern: /(extends|throws|@Attribute)/, points: -1 },
{ pattern: /System\.(in|out)\.\w+/, points: -50 },
];
4 changes: 4 additions & 0 deletions src/languages/go.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,8 @@ export const Go: LanguagePattern[] = [
{ pattern: /nil/, points: 1 },
// Single quote multicharacter string
{ pattern: /'.{2,}'/, points: -1 },
// Avoiding C# confusion
{ pattern: /Console\.(WriteLine|Write)(\s*)?\(/, points: -50 },
{ pattern: /using\sSystem(\..*)?(;)?/, points: -50 },
{ pattern: /(public|private|protected|internal)\s/, points: -1 },
];
5 changes: 5 additions & 0 deletions src/languages/java.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,9 @@ export const Java: LanguagePattern[] = [
{ pattern: /#include\s*(<|")\w+(\.h)?(>|")/, points: -1, nearTop: true },
// Avoiding Ruby confusion
{ pattern: /def\s+\w+\s*(\(.+\))?\s*\n/, points: -50 },
// Avoiding C# confusion
{ pattern: /\[Attribute\]/, points: -50 },
{ pattern: /Console\.(WriteLine|Write)(\s*)?\(/, points: -50 },
{ pattern: /(#region(\s.*)?|#endregion\n)/, points: -50 },
{ pattern: /using\sSystem(\..*)?(;)?/, points: -50 },
];
3 changes: 3 additions & 0 deletions src/languages/javascript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,7 @@ export const Javascript: LanguagePattern[] = [
// HTML <script> tag
{ pattern: /<(\/)?script( type=('|")text\/javascript('|"))?>/, points: -50 },
{ pattern: /fn\s[A-Za-z0-9<>,]+\(.*\)\s->\s\w+(\s\{|)/, points: -50 },
// Avoiding C# confusion
{ pattern: /Console\.(WriteLine|Write)(\s*)?\(/, points: -50 },
{ pattern: /(using\s)?System(\..*)?(;)?/, points: -50 },
];
4 changes: 4 additions & 0 deletions src/languages/rust.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,8 @@ export const Rust: LanguagePattern[] = [
{ pattern: /(Ok|Err|Box|ToOwned|Clone)/, points: 1 },
// Panic!!
{ pattern: /panic!\(.*\)/, points: 2 },
// Avoiding clash with C#
{ pattern: /using\sSystem/, points: -50 },
{ pattern: /Console\.WriteLine\s*\(/, points: -50 },
{ pattern: /(public\s)?((partial|static)\s)?class\s/, points: -1 },
];
12 changes: 12 additions & 0 deletions src/shiki.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/**
* Returns a language string that match with Shiki's language specification,
* find it here: https://github.com/shikijs/shiki/blob/main/docs/languages.md
* If the name is similar, we'll just convert it to lower case.
* @param {String} language Language from the list
* @returns {String} Shiki acceptable language
*/
export function convert(language: string): string {
if (language === 'C++') return 'cpp';
if (language === 'C#') return 'csharp';
return language.toLowerCase();
}
3 changes: 2 additions & 1 deletion src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ export interface LanguagePattern {
export interface Options {
heuristic?: boolean;
statistics?: boolean;
shiki?: boolean;
}

export interface StatisticOutput {
detected: string;
statistics: (string | number)[][];
statistics: Record<string, number>;
}
4 changes: 2 additions & 2 deletions tests/c.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ import * as assert from 'uvu/assert';
import detectLang from '../src/index';

test('hello world', () => {
const code = detectLang('printf("Hello world!\\n");');
assert.equal(code, 'C');
const code = detectLang('printf("Hello world!\\n");', { shiki: true });
assert.equal(code, 'c');
});

test('fizz buzz', () => {
Expand Down
26 changes: 24 additions & 2 deletions tests/cpp.test.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,32 @@
import { test } from 'uvu';
import * as assert from 'uvu/assert';
import detectLang from '../src/index';
import type { StatisticOutput } from '../src/types';

test('hello world', () => {
const code = detectLang('cout << "Hello world" << endl;');
assert.equal(code, 'C++');
const code = detectLang('cout << "Hello world" << endl;', {
shiki: true,
statistics: true,
heuristic: true,
}) as StatisticOutput;
assert.equal(code.detected, 'cpp');
assert.equal(code.statistics, {
C: 0,
'C++': 2,
CSS: 0,
Go: 0,
HTML: 0,
Java: 0,
Javascript: 0,
Julia: 1,
PHP: 0,
Python: 0,
Ruby: 0,
Rust: 0,
SQL: 0,
Unknown: 1,
'C#': 0,
});
});

test('fizz buzz', () => {
Expand Down
Loading

0 comments on commit 391e033

Please sign in to comment.