test(lyra): improves test coverage (#23)

oramasearch · Jul 18, 2022 · 7a82bcb · 7a82bcb
1 parent dd65b9e
commit 7a82bcb
Show file tree

Hide file tree

Showing 2 changed files with 257 additions and 0 deletions.
diff --git a/packages/lyra/tests/__snapshots__/tokenizer.test.ts.snap b/packages/lyra/tests/__snapshots__/tokenizer.test.ts.snap
@@ -0,0 +1,157 @@
+// Jest Snapshot v1, https://goo.gl/fbAQLP
+
+exports[`Should tokenize and stem correctly in english 1`] = `
+Set {
+  "the",
+  "quick",
+  "brown",
+  "fox",
+  "jump",
+  "over",
+  "lazi",
+  "dog",
+}
+`;
+
+exports[`Should tokenize and stem correctly in english 2`] = `
+Set {
+  "I",
+  "bake",
+  "some",
+  "cake",
+}
+`;
+
+exports[`Should tokenize and stem correctly in french 1`] = `
+Set {
+  "voyon",
+  "quel",
+  "temp",
+  "il",
+  "fait",
+  "dehor",
+}
+`;
+
+exports[`Should tokenize and stem correctly in french 2`] = `
+Set {
+  "j",
+  "ai",
+  "fait",
+  "de",
+  "g",
+  "teau",
+}
+`;
+
+exports[`Should tokenize and stem correctly in italian 1`] = `
+Set {
+  "ho",
+  "cucin",
+  "dell",
+  "tort",
+}
+`;
+
+exports[`Should tokenize and stem correctly in italian 2`] = `
+Set {
+  "dorm",
+  "una",
+  "cos",
+  "difficil",
+  "quand",
+  "i",
+  "test",
+  "non",
+  "pass",
+}
+`;
+
+exports[`Should tokenize and stem correctly in norwegian 1`] = `
+Set {
+  "jeg",
+  "kokt",
+  "noen",
+  "kak",
+}
+`;
+
+exports[`Should tokenize and stem correctly in norwegian 2`] = `
+Set {
+  "sov",
+  "er",
+  "en",
+  "vansk",
+  "ting",
+  "n",
+  "r",
+  "test",
+  "mislykk",
+}
+`;
+
+exports[`Should tokenize and stem correctly in portugese 1`] = `
+Set {
+  "eu",
+  "cozinh",
+  "alguns",
+  "bol",
+}
+`;
+
+exports[`Should tokenize and stem correctly in portugese 2`] = `
+Set {
+  "dorm",
+  "uma",
+  "cois",
+  "dif",
+  "cil",
+  "quand",
+  "os",
+  "test",
+  "falh",
+}
+`;
+
+exports[`Should tokenize and stem correctly in russian 1`] = `
+Set {
+  "я",
+  "приготов",
+  "пирожн",
+}
+`;
+
+exports[`Should tokenize and stem correctly in russian 2`] = `
+Set {
+  "спат",
+  "трудн",
+  "когд",
+  "тест",
+  "не",
+  "срабатыва",
+}
+`;
+
+exports[`Should tokenize and stem correctly in swedish 1`] = `
+Set {
+  "jag",
+  "lag",
+  "n",
+  "gra",
+  "kak",
+}
+`;
+
+exports[`Should tokenize and stem correctly in swedish 2`] = `
+Set {
+  "att",
+  "sov",
+  "r",
+  "en",
+  "sv",
+  "sak",
+  "n",
+  "test",
+  "misslyck",
+}
+`;
diff --git a/packages/lyra/tests/tokenizer.test.ts b/packages/lyra/tests/tokenizer.test.ts
@@ -0,0 +1,100 @@
+import { tokenize } from "../src/tokenizer";
+
+it("Should tokenize and stem correctly in english", () => {
+  const I1 = "the quick brown fox jumps over the lazy dog";
+  const I2 = "I baked some cakes";
+
+  const O1 = tokenize(I1, "english");
+  const O2 = tokenize(I2, "english");
+
+  expect(O1).toMatchSnapshot();
+  expect(O2).toMatchSnapshot();
+});
+
+it("Should tokenize and stem correctly in french", () => {
+  const I1 = "voyons quel temps il fait dehors";
+  const I2 = "j'ai fait des gâteaux";
+
+  const O1 = tokenize(I1, "french");
+  const O2 = tokenize(I2, "french");
+
+  expect(O1).toMatchSnapshot();
+  expect(O2).toMatchSnapshot();
+});
+
+it("Should tokenize and stem correctly in italian", () => {
+  const I1 = "ho cucinato delle torte";
+  const I2 = "dormire è una cosa difficile quando i test non passano";
+
+  const O1 = tokenize(I1, "italian");
+  const O2 = tokenize(I2, "italian");
+
+  expect(O1).toMatchSnapshot();
+  expect(O2).toMatchSnapshot();
+});
+
+it("Should tokenize and stem correctly in norwegian", () => {
+  const I1 = "Jeg kokte noen kaker";
+  const I2 = "å sove er en vanskelig ting når testene mislykkes";
+
+  const O1 = tokenize(I1, "norwegian");
+  const O2 = tokenize(I2, "norwegian");
+
+  expect(O1).toMatchSnapshot();
+  expect(O2).toMatchSnapshot();
+});
+
+it("Should tokenize and stem correctly in portugese", () => {
+  const I1 = "Eu cozinhei alguns bolos";
+  const I2 = "dormir é uma coisa difícil quando os testes falham";
+
+  const O1 = tokenize(I1, "portugese");
+  const O2 = tokenize(I2, "portugese");
+
+  expect(O1).toMatchSnapshot();
+  expect(O2).toMatchSnapshot();
+});
+
+it("Should tokenize and stem correctly in russian", () => {
+  const I1 = "я приготовила пирожные";
+  const I2 = "спать трудно, когда тесты не срабатывают";
+
+  const O1 = tokenize(I1, "russian");
+  const O2 = tokenize(I2, "russian");
+
+  expect(O1).toMatchSnapshot();
+  expect(O2).toMatchSnapshot();
+});
+
+it("Should tokenize and stem correctly in swedish", () => {
+  const I1 = "Jag lagade några kakor";
+  const I2 = "att sova är en svår sak när testerna misslyckas";
+
+  const O1 = tokenize(I1, "swedish");
+  const O2 = tokenize(I2, "swedish");
+
+  expect(O1).toMatchSnapshot();
+  expect(O2).toMatchSnapshot();
+});
+
+it.skip("Should tokenize and stem correctly in spanish", () => {
+  const I1 = "cociné unos pasteles";
+  const I2 = "dormir es algo dificil cuando las pruebas fallan";
+
+  const O1 = tokenize(I1, "spanish");
+  const O2 = tokenize(I2, "spanish");
+
+  expect(O1).toMatchSnapshot();
+  expect(O2).toMatchSnapshot();
+});
+
+it.skip("Should tokenize and stem correctly in dutch", () => {
+  const I1 = "de kleine koeien";
+  const I2 = "Ik heb wat taarten gemaakt";
+
+  const O1 = tokenize(I1, "dutch");
+  const O2 = tokenize(I2, "dutch");
+
+  expect(O1).toMatchSnapshot();
+  expect(O2).toMatchSnapshot();
+});