Skip to content

Commit

Permalink
fix: Improve matching of words near punctuation
Browse files Browse the repository at this point in the history
Fixes #28
  • Loading branch information
sapegin committed Jun 22, 2022
1 parent afad69e commit 18cfb94
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 2 deletions.
5 changes: 3 additions & 2 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,12 @@ function readTermsFile(filepath) {
* @param {string} pattern
*/
function getExactMatchRegExp(pattern) {
const punctuation = '[\\.,;\'")]';
return new RegExp(
// 1. Beginning of the string, or any character that isn't "-" or alphanumeric
// 2. Exact match of the pattern
// 3. Space, ". ", "." at the end of the string, end of the string
`(?<=^|[^-\\w])\\b${pattern}\\b(?= |\\. |\\.$|$)`,
// 3. Space, punctuation + space, punctuation + punctuation, or punctuation at the end of the string, end of the string
`(?<=^|[^-\\w])\\b${pattern}\\b(?= |${punctuation} |${punctuation}${punctuation}|${punctuation}$|$)`,
'ig'
);
}
Expand Down
26 changes: 26 additions & 0 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,19 @@ describe('getMultipleWordRegExp', () => {
expect(result[0]).toBe('javascript');
});

it.each([
['Bad Javascript. Is it bad?'],
['Bad Javascript, is it bad?'],
['Bad Javascript; is it bad?'],
['Bad (Javascript) is it bad?'],
['Bad "Javascript" is it bad?'],
["Bad 'Javascript' is it bad?"],
['Bad "Javascript", is it bad?'],
])('should match a pattern regardless of punctuation: %s', string => {
const result = getMultipleWordRegExp(variants).exec(string);
expect(result).toBeTruthy();
});

it('should not match a pattern in as a part of a file name', () => {
const result = getMultipleWordRegExp(variants).exec('javascript.md');
expect(result).toBeFalsy();
Expand Down Expand Up @@ -148,6 +161,19 @@ describe('getExactMatchRegExp', () => {
expect(regexp.test('Webpack')).toBeTruthy();
});

it.each([
['Javascript.'],
['Javascript,'],
['Javascript;'],
['(Javascript)'],
['"Javascript"'],
["'Javascript'"],
['"Javascript",'],
])('should match a pattern regardless of punctuation: %s', string => {
const regexp = getExactMatchRegExp('javascript');
expect(regexp.test(string)).toBeTruthy();
});

it('returned RegExp should not match in the middle of the word', () => {
const regexp = getExactMatchRegExp('webpack');
expect(regexp.test(`FooWebpack`)).toBeFalsy();
Expand Down

0 comments on commit 18cfb94

Please sign in to comment.