Skip to content

Commit

Permalink
Fix the JavaScript search term splitting for compound words
Browse files Browse the repository at this point in the history
Previously, the JavaScript search code split the search query only on
spaces, which meant that with a search query of a compound word like
"mkdocs-material", for example, would be treated as a single term
"mkdocs-material".  In contrast, when building the search index, the
`/\w+/` regexp is used to split compound words, such that the index
would only contain "mkdoc" and "material" as separate terms.
Consequently, "mkdocs-material" as a single term would never be found.

With this change, the JavaScript search query splitting has been
changed to match what is done when building the index.

Fixes #262.
  • Loading branch information
jbms committed Dec 12, 2023
1 parent f7bbc8f commit 143b469
Showing 1 changed file with 36 additions and 23 deletions.
59 changes: 36 additions & 23 deletions src/assets/javascripts/sphinx_search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -709,36 +709,49 @@ export async function getResults(query: string): Promise<SearchResultStream> {
// Object search terms.
const objectterms = []

for (const origTerm of splitQuery(query)) {
const lowerTerm = origTerm.toLowerCase()
for (let origTerm of splitQuery(query)) {
let negative = false;
if (origTerm[0] === '-') {
negative = true;
origTerm = origTerm.substr(1);
}
let lowerTerm = origTerm.toLowerCase()
if (lowerTerm.length === 0) {
continue
}
objectterms.push(lowerTerm)

if (stopwords.indexOf(lowerTerm) !== -1) {
// skip this "word"
continue
}
// stem the word
let word = stemmer.stemWord(lowerTerm)
// prevent stemmer from cutting word smaller than two chars
if (word.length < 3 && lowerTerm.length >= 3) {
word = lowerTerm
let atLeastOneWord = false
// The search term made be made up of multiple "words" separated
// by special characters like [-._]. Split them up and treat each
// as a separate search term.
for (const wordMatch of lowerTerm.matchAll(/\w+/g)) {
const subTerm = wordMatch[0];
if (stopwords.indexOf(subTerm) !== -1) {
// skip this "word"
continue
}
// stem the word
let word = stemmer.stemWord(subTerm)
// prevent stemmer from cutting word smaller than two chars
if (word.length < 3 && subTerm.length >= 3) {
word = subTerm
}
let toAppend: string[]
// select the correct list
if (negative) {
toAppend = excluded
} else {
toAppend = searchterms
atLeastOneWord = true
}
// only add if not already in the list
if (toAppend.indexOf(word) === -1) {
toAppend.push(word)
}
}
let toAppend: string[]
// select the correct list
if (word[0] === "-") {
toAppend = excluded
word = word.substr(1)
} else {
toAppend = searchterms
if (!negative && atLeastOneWord) {
hlterms.push(lowerTerm)
}
// only add if not already in the list
if (toAppend.indexOf(word) === -1) {
toAppend.push(word)
}
}

// console.debug('SEARCH: searching for:');
Expand Down

0 comments on commit 143b469

Please sign in to comment.