Skip to content

Commit

Permalink
more robust umlauts handling
Browse files Browse the repository at this point in the history
  • Loading branch information
derhuerst committed Mar 28, 2016
1 parent 8990f31 commit 6098606
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 16 deletions.
17 changes: 4 additions & 13 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
'use strict'

const normalize = require('normalize-for-search')



const g = { // global patterns
ß: /ß/g
, ä: /ä/g
, ö: /ö/g
, ü: /ü/g
, pl: /\(pl\)/g
pl: /\(pl\)/g
, berlin: /\((berlin|bln)\)/g
, S: /^s\s/
, U: /^u\s/
Expand All @@ -27,14 +25,7 @@ const delimiter = /[\s\/\(\)\-,\.]+/
const specialChars = /[^\w\s]|_/g
const isNotEmpty = (x) => x.length > 0

const tokenize = (station) => station
.toLowerCase()

// German umlauts
.replace(g.ß, 'ss')
.replace(g.ä, 'ae')
.replace(g.ö, 'oe')
.replace(g.ü, 'ue')
const tokenize = (station) => normalize(station)

// expand abbreviations
.replace(g.pl, 'polen')
Expand Down
9 changes: 6 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@
"repository": "git://github.com/derhuerst/vbb-tokenize-station.git",
"license": "ISC",
"engines": {"node": ">=4"},
"dependencies": {
"normalize-for-search": "^1.2.0"
},
"devDependencies": {
"coffee-script": "^1.10",
"nodeunit": "^0.9.1",
"watchy": "^0.6.6"
"coffee-script": "^1.10",
"nodeunit": "^0.9.1",
"watchy": "^0.6.6"
},
"scripts": {
"test": "nodeunit --reporter minimal test.coffee",
Expand Down

0 comments on commit 6098606

Please sign in to comment.