-
-
Notifications
You must be signed in to change notification settings - Fork 298
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
test(lyra): improves test coverage (#23)
- Loading branch information
1 parent
dd65b9e
commit 7a82bcb
Showing
2 changed files
with
257 additions
and
0 deletions.
There are no files selected for viewing
157 changes: 157 additions & 0 deletions
157
packages/lyra/tests/__snapshots__/tokenizer.test.ts.snap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
// Jest Snapshot v1, https://goo.gl/fbAQLP | ||
|
||
exports[`Should tokenize and stem correctly in english 1`] = ` | ||
Set { | ||
"the", | ||
"quick", | ||
"brown", | ||
"fox", | ||
"jump", | ||
"over", | ||
"lazi", | ||
"dog", | ||
} | ||
`; | ||
|
||
exports[`Should tokenize and stem correctly in english 2`] = ` | ||
Set { | ||
"I", | ||
"bake", | ||
"some", | ||
"cake", | ||
} | ||
`; | ||
|
||
exports[`Should tokenize and stem correctly in french 1`] = ` | ||
Set { | ||
"voyon", | ||
"quel", | ||
"temp", | ||
"il", | ||
"fait", | ||
"dehor", | ||
} | ||
`; | ||
|
||
exports[`Should tokenize and stem correctly in french 2`] = ` | ||
Set { | ||
"j", | ||
"ai", | ||
"fait", | ||
"de", | ||
"g", | ||
"teau", | ||
} | ||
`; | ||
|
||
exports[`Should tokenize and stem correctly in italian 1`] = ` | ||
Set { | ||
"ho", | ||
"cucin", | ||
"dell", | ||
"tort", | ||
} | ||
`; | ||
|
||
exports[`Should tokenize and stem correctly in italian 2`] = ` | ||
Set { | ||
"dorm", | ||
"una", | ||
"cos", | ||
"difficil", | ||
"quand", | ||
"i", | ||
"test", | ||
"non", | ||
"pass", | ||
} | ||
`; | ||
|
||
exports[`Should tokenize and stem correctly in norwegian 1`] = ` | ||
Set { | ||
"jeg", | ||
"kokt", | ||
"noen", | ||
"kak", | ||
} | ||
`; | ||
|
||
exports[`Should tokenize and stem correctly in norwegian 2`] = ` | ||
Set { | ||
"sov", | ||
"er", | ||
"en", | ||
"vansk", | ||
"ting", | ||
"n", | ||
"r", | ||
"test", | ||
"mislykk", | ||
} | ||
`; | ||
|
||
exports[`Should tokenize and stem correctly in portugese 1`] = ` | ||
Set { | ||
"eu", | ||
"cozinh", | ||
"alguns", | ||
"bol", | ||
} | ||
`; | ||
|
||
exports[`Should tokenize and stem correctly in portugese 2`] = ` | ||
Set { | ||
"dorm", | ||
"uma", | ||
"cois", | ||
"dif", | ||
"cil", | ||
"quand", | ||
"os", | ||
"test", | ||
"falh", | ||
} | ||
`; | ||
|
||
exports[`Should tokenize and stem correctly in russian 1`] = ` | ||
Set { | ||
"я", | ||
"приготов", | ||
"пирожн", | ||
} | ||
`; | ||
|
||
exports[`Should tokenize and stem correctly in russian 2`] = ` | ||
Set { | ||
"спат", | ||
"трудн", | ||
"когд", | ||
"тест", | ||
"не", | ||
"срабатыва", | ||
} | ||
`; | ||
|
||
exports[`Should tokenize and stem correctly in swedish 1`] = ` | ||
Set { | ||
"jag", | ||
"lag", | ||
"n", | ||
"gra", | ||
"kak", | ||
} | ||
`; | ||
|
||
exports[`Should tokenize and stem correctly in swedish 2`] = ` | ||
Set { | ||
"att", | ||
"sov", | ||
"r", | ||
"en", | ||
"sv", | ||
"sak", | ||
"n", | ||
"test", | ||
"misslyck", | ||
} | ||
`; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
import { tokenize } from "../src/tokenizer"; | ||
|
||
it("Should tokenize and stem correctly in english", () => { | ||
const I1 = "the quick brown fox jumps over the lazy dog"; | ||
const I2 = "I baked some cakes"; | ||
|
||
const O1 = tokenize(I1, "english"); | ||
const O2 = tokenize(I2, "english"); | ||
|
||
expect(O1).toMatchSnapshot(); | ||
expect(O2).toMatchSnapshot(); | ||
}); | ||
|
||
it("Should tokenize and stem correctly in french", () => { | ||
const I1 = "voyons quel temps il fait dehors"; | ||
const I2 = "j'ai fait des gâteaux"; | ||
|
||
const O1 = tokenize(I1, "french"); | ||
const O2 = tokenize(I2, "french"); | ||
|
||
expect(O1).toMatchSnapshot(); | ||
expect(O2).toMatchSnapshot(); | ||
}); | ||
|
||
it("Should tokenize and stem correctly in italian", () => { | ||
const I1 = "ho cucinato delle torte"; | ||
const I2 = "dormire è una cosa difficile quando i test non passano"; | ||
|
||
const O1 = tokenize(I1, "italian"); | ||
const O2 = tokenize(I2, "italian"); | ||
|
||
expect(O1).toMatchSnapshot(); | ||
expect(O2).toMatchSnapshot(); | ||
}); | ||
|
||
it("Should tokenize and stem correctly in norwegian", () => { | ||
const I1 = "Jeg kokte noen kaker"; | ||
const I2 = "å sove er en vanskelig ting når testene mislykkes"; | ||
|
||
const O1 = tokenize(I1, "norwegian"); | ||
const O2 = tokenize(I2, "norwegian"); | ||
|
||
expect(O1).toMatchSnapshot(); | ||
expect(O2).toMatchSnapshot(); | ||
}); | ||
|
||
it("Should tokenize and stem correctly in portugese", () => { | ||
const I1 = "Eu cozinhei alguns bolos"; | ||
const I2 = "dormir é uma coisa difícil quando os testes falham"; | ||
|
||
const O1 = tokenize(I1, "portugese"); | ||
const O2 = tokenize(I2, "portugese"); | ||
|
||
expect(O1).toMatchSnapshot(); | ||
expect(O2).toMatchSnapshot(); | ||
}); | ||
|
||
it("Should tokenize and stem correctly in russian", () => { | ||
const I1 = "я приготовила пирожные"; | ||
const I2 = "спать трудно, когда тесты не срабатывают"; | ||
|
||
const O1 = tokenize(I1, "russian"); | ||
const O2 = tokenize(I2, "russian"); | ||
|
||
expect(O1).toMatchSnapshot(); | ||
expect(O2).toMatchSnapshot(); | ||
}); | ||
|
||
it("Should tokenize and stem correctly in swedish", () => { | ||
const I1 = "Jag lagade några kakor"; | ||
const I2 = "att sova är en svår sak när testerna misslyckas"; | ||
|
||
const O1 = tokenize(I1, "swedish"); | ||
const O2 = tokenize(I2, "swedish"); | ||
|
||
expect(O1).toMatchSnapshot(); | ||
expect(O2).toMatchSnapshot(); | ||
}); | ||
|
||
it.skip("Should tokenize and stem correctly in spanish", () => { | ||
const I1 = "cociné unos pasteles"; | ||
const I2 = "dormir es algo dificil cuando las pruebas fallan"; | ||
|
||
const O1 = tokenize(I1, "spanish"); | ||
const O2 = tokenize(I2, "spanish"); | ||
|
||
expect(O1).toMatchSnapshot(); | ||
expect(O2).toMatchSnapshot(); | ||
}); | ||
|
||
it.skip("Should tokenize and stem correctly in dutch", () => { | ||
const I1 = "de kleine koeien"; | ||
const I2 = "Ik heb wat taarten gemaakt"; | ||
|
||
const O1 = tokenize(I1, "dutch"); | ||
const O2 = tokenize(I2, "dutch"); | ||
|
||
expect(O1).toMatchSnapshot(); | ||
expect(O2).toMatchSnapshot(); | ||
}); |