Skip to content

Commit

Permalink
fix: Fix the word matching behavior by using a simple `string.indexOf…
Browse files Browse the repository at this point in the history
…` again

As the regex doesn't ensure that every word matches, only the same amount, it reported false positives
  • Loading branch information
marcbachmann committed Jun 4, 2020
1 parent 05c77c5 commit 45313f4
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 40 deletions.
76 changes: 36 additions & 40 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,61 +34,57 @@ function prepareSimpleTextSearch (collection, property) {

return function simpleTextSearch (q) {
if (!collection || !q) return collection
const { regex, length } = toRegex(q)
const tokens = toQueryTokens(q)
const result = []
for (const { pruned, elem } of cachedPrunedElements || prunedElements()) {
const match = pruned.match(regex)
if (match && match.length >= length) result.push(elem)

// eslint-disable-next-line no-labels
entries: for (const { pruned, elem } of cachedPrunedElements || prunedElements()) {
let i = tokens.length
// eslint-disable-next-line no-labels
while (i--) if (pruned.indexOf(tokens[i]) === -1) continue entries
result.push(elem)
}
return result
}
}

function toRegex (str) {
function toQueryTokens (str) {
const content = []
for (const token of clean(str).split(/\b/)) {
if (!/\b/.test(token)) continue
content.push(token.replace(/[|\\{}()[\]^$+*?.]/g, '\\$&').replace(/-/g, '\\x2d'))
}
return {
regex: new RegExp(`(${content.join('|')})`, 'ig'),
length: content.length
content.push(token.trim())
}
return content
}

var replaceChar = charReplacer()
function clean (str) {
return replaceChar(String(str).toLowerCase())
const specialCharMap = {
äàáâäæãåā: 'a',
çćč: 'c',
đð: 'd',
èéêëēėę: 'e',
îïíīįì: 'i',
ł: 'l',
ñńň: 'n',
ôöòóœøōõ: 'o',
ř: 'r',
śš: 's',
ß: 'ss',
ť: 't',
ûüùúūů: 'u',
ÿý: 'y',
žżŻź: 'z'
}

function charReplacer () {
var charMap = {
äàáâäæãåā: 'a',
çćč: 'c',
đð: 'd',
èéêëēėę: 'e',
îïíīįì: 'i',
ł: 'l',
ñńň: 'n',
ôöòóœøōõ: 'o',
ř: 'r',
śš: 's',
ß: 'ss',
ť: 't',
ûüùúūů: 'u',
ÿý: 'y',
žżŻź: 'z'
const charMap = { '\\W+': ' ' }
for (const keys of Object.keys(specialCharMap)) {
for (const char of keys) {
charMap[char] = specialCharMap[keys]
}
}

Object.keys(charMap).forEach(function (keys) {
keys.split('').forEach(function (char) {
charMap[char] = charMap[keys]
})
})
const toReplace = new RegExp('(' + Object.keys(charMap).join('|') + ')', 'g')
function replacer (char) { return charMap[char] || char }

var toReplace = new RegExp('[' + Object.keys(charMap).join('') + ']', 'g')
function replacer (char) { return charMap[char] || char }
return function replaceChars (str) {
return str.replace(toReplace, replacer)
}
function clean (str) {
return String(str).toLowerCase().replace(toReplace, replacer)
}
10 changes: 10 additions & 0 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,13 @@ assert.strictEqual(res4c.length, 1)

const res4d = get4('zu reyk')
assert.strictEqual(res4d.length, 0)

const arr5 = ['Sidra Trabanco', 'Sidra Trabanco Selección', 'Sidra iSidra', 'Sidra Acebal']
const get5 = search(arr5)
const res5a = get5('Sidra Trabanco')
assert.strictEqual(res5a.length, 2)
assert.strictEqual(res5a[0], arr5[0])
assert.strictEqual(res5a[1], arr5[1])

const res5b = get5('Sidra')
assert.strictEqual(res5b.length, arr5.length)

0 comments on commit 45313f4

Please sign in to comment.