Skip to content

Commit

Permalink
test(lyra): improves test coverage (#23)
Browse files Browse the repository at this point in the history
  • Loading branch information
micheleriva authored Jul 18, 2022
1 parent dd65b9e commit 7a82bcb
Show file tree
Hide file tree
Showing 2 changed files with 257 additions and 0 deletions.
157 changes: 157 additions & 0 deletions packages/lyra/tests/__snapshots__/tokenizer.test.ts.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP

exports[`Should tokenize and stem correctly in english 1`] = `
Set {
"the",
"quick",
"brown",
"fox",
"jump",
"over",
"lazi",
"dog",
}
`;

exports[`Should tokenize and stem correctly in english 2`] = `
Set {
"I",
"bake",
"some",
"cake",
}
`;

exports[`Should tokenize and stem correctly in french 1`] = `
Set {
"voyon",
"quel",
"temp",
"il",
"fait",
"dehor",
}
`;

exports[`Should tokenize and stem correctly in french 2`] = `
Set {
"j",
"ai",
"fait",
"de",
"g",
"teau",
}
`;

exports[`Should tokenize and stem correctly in italian 1`] = `
Set {
"ho",
"cucin",
"dell",
"tort",
}
`;

exports[`Should tokenize and stem correctly in italian 2`] = `
Set {
"dorm",
"una",
"cos",
"difficil",
"quand",
"i",
"test",
"non",
"pass",
}
`;

exports[`Should tokenize and stem correctly in norwegian 1`] = `
Set {
"jeg",
"kokt",
"noen",
"kak",
}
`;

exports[`Should tokenize and stem correctly in norwegian 2`] = `
Set {
"sov",
"er",
"en",
"vansk",
"ting",
"n",
"r",
"test",
"mislykk",
}
`;

exports[`Should tokenize and stem correctly in portugese 1`] = `
Set {
"eu",
"cozinh",
"alguns",
"bol",
}
`;

exports[`Should tokenize and stem correctly in portugese 2`] = `
Set {
"dorm",
"uma",
"cois",
"dif",
"cil",
"quand",
"os",
"test",
"falh",
}
`;

exports[`Should tokenize and stem correctly in russian 1`] = `
Set {
"я",
"приготов",
"пирожн",
}
`;

exports[`Should tokenize and stem correctly in russian 2`] = `
Set {
"спат",
"трудн",
"когд",
"тест",
"не",
"срабатыва",
}
`;

exports[`Should tokenize and stem correctly in swedish 1`] = `
Set {
"jag",
"lag",
"n",
"gra",
"kak",
}
`;

exports[`Should tokenize and stem correctly in swedish 2`] = `
Set {
"att",
"sov",
"r",
"en",
"sv",
"sak",
"n",
"test",
"misslyck",
}
`;
100 changes: 100 additions & 0 deletions packages/lyra/tests/tokenizer.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import { tokenize } from "../src/tokenizer";

it("Should tokenize and stem correctly in english", () => {
const I1 = "the quick brown fox jumps over the lazy dog";
const I2 = "I baked some cakes";

const O1 = tokenize(I1, "english");
const O2 = tokenize(I2, "english");

expect(O1).toMatchSnapshot();
expect(O2).toMatchSnapshot();
});

it("Should tokenize and stem correctly in french", () => {
const I1 = "voyons quel temps il fait dehors";
const I2 = "j'ai fait des gâteaux";

const O1 = tokenize(I1, "french");
const O2 = tokenize(I2, "french");

expect(O1).toMatchSnapshot();
expect(O2).toMatchSnapshot();
});

it("Should tokenize and stem correctly in italian", () => {
const I1 = "ho cucinato delle torte";
const I2 = "dormire è una cosa difficile quando i test non passano";

const O1 = tokenize(I1, "italian");
const O2 = tokenize(I2, "italian");

expect(O1).toMatchSnapshot();
expect(O2).toMatchSnapshot();
});

it("Should tokenize and stem correctly in norwegian", () => {
const I1 = "Jeg kokte noen kaker";
const I2 = "å sove er en vanskelig ting når testene mislykkes";

const O1 = tokenize(I1, "norwegian");
const O2 = tokenize(I2, "norwegian");

expect(O1).toMatchSnapshot();
expect(O2).toMatchSnapshot();
});

it("Should tokenize and stem correctly in portugese", () => {
const I1 = "Eu cozinhei alguns bolos";
const I2 = "dormir é uma coisa difícil quando os testes falham";

const O1 = tokenize(I1, "portugese");
const O2 = tokenize(I2, "portugese");

expect(O1).toMatchSnapshot();
expect(O2).toMatchSnapshot();
});

it("Should tokenize and stem correctly in russian", () => {
const I1 = "я приготовила пирожные";
const I2 = "спать трудно, когда тесты не срабатывают";

const O1 = tokenize(I1, "russian");
const O2 = tokenize(I2, "russian");

expect(O1).toMatchSnapshot();
expect(O2).toMatchSnapshot();
});

it("Should tokenize and stem correctly in swedish", () => {
const I1 = "Jag lagade några kakor";
const I2 = "att sova är en svår sak när testerna misslyckas";

const O1 = tokenize(I1, "swedish");
const O2 = tokenize(I2, "swedish");

expect(O1).toMatchSnapshot();
expect(O2).toMatchSnapshot();
});

it.skip("Should tokenize and stem correctly in spanish", () => {
const I1 = "cociné unos pasteles";
const I2 = "dormir es algo dificil cuando las pruebas fallan";

const O1 = tokenize(I1, "spanish");
const O2 = tokenize(I2, "spanish");

expect(O1).toMatchSnapshot();
expect(O2).toMatchSnapshot();
});

it.skip("Should tokenize and stem correctly in dutch", () => {
const I1 = "de kleine koeien";
const I2 = "Ik heb wat taarten gemaakt";

const O1 = tokenize(I1, "dutch");
const O2 = tokenize(I2, "dutch");

expect(O1).toMatchSnapshot();
expect(O2).toMatchSnapshot();
});

0 comments on commit 7a82bcb

Please sign in to comment.