Skip to content

Commit

Permalink
test: update for new component and file format
Browse files Browse the repository at this point in the history
  • Loading branch information
dhdaines committed Feb 8, 2023
1 parent 163de15 commit 1d2ca59
Show file tree
Hide file tree
Showing 12 changed files with 113 additions and 126 deletions.
56 changes: 22 additions & 34 deletions test/test_align_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ def test_invoke_align(self):
# print(results.output)
self.assertEqual(results.exit_code, 0)
expected_output_files = [
"output.smil",
"output.xml",
"output.ras",
"output.m4a",
"index.html",
"output.TextGrid",
Expand All @@ -72,15 +71,15 @@ def test_invoke_align(self):
)
with open(join(output, "index.html"), encoding="utf8") as f:
self.assertIn(
'<read-along readalong="output.xml" audio="output.m4a"',
'<read-along href="output.ras" audio="output.m4a"',
f.read(),
)
self.assertTrue(
exists(join(output, "tempfiles", "output.tokenized.xml")),
"alignment with -s should have created tempfiles/output.tokenized.xml",
exists(join(output, "tempfiles", "output.tokenized.ras")),
"alignment with -s should have created tempfiles/output.tokenized.ras",
)
with open(
join(output, "tempfiles", "output.tokenized.xml"), "r", encoding="utf-8"
join(output, "tempfiles", "output.tokenized.ras"), "r", encoding="utf-8"
) as f:
self.assertNotIn("\ufeff", f.read())
self.assertTrue(
Expand Down Expand Up @@ -111,16 +110,16 @@ def test_invoke_align(self):
"-s",
"--config",
join(self.data_dir, "sample-config.json"),
self.add_bom(join(self.data_dir, "ej-fra-dna.xml")),
self.add_bom(join(self.data_dir, "ej-fra-dna.ras")),
join(self.data_dir, "ej-fra.m4a"),
output,
],
)
self.assertEqual(results_dna.exit_code, 0)
# print(results_dna.stdout)
self.assertTrue(
exists(join(output, "output.smil")),
"successful alignment with DNA should have created output.smil",
exists(join(output, "output.ras")),
"successful alignment with DNA should have created output.ras",
)
self.assertTrue(
exists(join(output, "output.xhtml")),
Expand All @@ -135,22 +134,11 @@ def test_invoke_align(self):
"Align mode moderate succeeded for sequence 0.", results_dna.stdout
)

# Functionally the same as self.assertTrue(filecmp.cmp(f1, f2)), but show where
# the differences are if the files are not identical
# Since f2 was created using -o xhtml, we need to substitute .xhtml back to .xml during
# the comparison of the contents of the .smil files.
with open(join(output1, "output.smil"), encoding="utf8") as f1, open(
join(output, "output.smil"), encoding="utf8"
) as f2:
self.assertListEqual(
list(f1), [line.replace(".xhtml", ".xml") for line in f2]
)

# We test error situations in the same test case, since we reuse the same outputs
results_output_exists = self.runner.invoke(
align,
[
join(self.data_dir, "ej-fra-dna.xml"),
join(self.data_dir, "ej-fra-dna.ras"),
join(self.data_dir, "ej-fra.m4a"),
output,
],
Expand All @@ -164,9 +152,9 @@ def test_invoke_align(self):
results_output_is_regular_file = self.runner.invoke(
align,
[
join(self.data_dir, "ej-fra-dna.xml"),
join(self.data_dir, "ej-fra-dna.ras"),
join(self.data_dir, "ej-fra.m4a"),
join(output, "output.smil"),
join(output, "output.ras"),
],
)
self.assertNotEqual(results_output_is_regular_file, 0)
Expand All @@ -183,7 +171,7 @@ def test_align_with_package(self):
results_html = self.runner.invoke(
align,
[
join(self.data_dir, "ej-fra-package.xml"),
join(self.data_dir, "ej-fra-package.ras"),
join(self.data_dir, "ej-fra.m4a"),
output,
"-o",
Expand All @@ -204,7 +192,7 @@ def test_align_with_package(self):
htmldoc = fromstring(path_bytes)
b64_pattern = r"data:[\w\/\+]*;base64,\w*"
self.assertRegex(
htmldoc.body.xpath("//read-along")[0].attrib["readalong"], b64_pattern
htmldoc.body.xpath("//read-along")[0].attrib["href"], b64_pattern
)
self.assertRegex(
htmldoc.body.xpath("//read-along")[0].attrib["audio"], b64_pattern
Expand All @@ -227,7 +215,7 @@ def not_test_permission_denied(self):
align,
[
"-f",
join(self.data_dir, "ej-fra-dna.xml"),
join(self.data_dir, "ej-fra-dna.ras"),
join(self.data_dir, "ej-fra.m4a"),
dirname,
],
Expand Down Expand Up @@ -270,7 +258,7 @@ def test_align_english(self):
g2p_ref = '<s id="t0b0d0p0s0"><w id="t0b0d0p0s0w0" ARPABET="DH IH S">This</w> <w id="t0b0d0p0s0w1" ARPABET="IH Z">is</w> <w id="t0b0d0p0s0w2" ARPABET="S AH M">some</w> <w id="t0b0d0p0s0w3" ARPABET="T EH K S T">text</w> <w id="t0b0d0p0s0w4" ARPABET="DH AE T">that</w> <w id="t0b0d0p0s0w5" ARPABET="W IY">we</w> <w id="t0b0d0p0s0w6" ARPABET="W IH L">will</w> <w id="t0b0d0p0s0w7" ARPABET="R AH N">run</w> <w id="t0b0d0p0s0w8" ARPABET="TH R UW">through</w> <w id="t0b0d0p0s0w9" ARPABET="DH AH">the</w> <w id="t0b0d0p0s0w10" ARPABET="IH NG G L IH SH">English</w> <w id="t0b0d0p0s0w11" ARPABET="L EH K S IH K AA N">lexicon</w> <w id="t0b0d0p0s0w12" ARPABET="G R AE F IY M">grapheme</w> <w id="t0b0d0p0s0w13" ARPABET="T UW">to</w> <w id="t0b0d0p0s0w14" ARPABET="M AO R F IY M">morpheme</w> <w id="t0b0d0p0s0w15" ARPABET="AH P R OW CH">approach</w>.</s>'

tokenized_file = join(
self.tempdir, "eng-output", "tempfiles", "eng-output.g2p.xml"
self.tempdir, "eng-output", "tempfiles", "eng-output.g2p.ras"
)
with open(tokenized_file, "r", encoding="utf8") as f:
tok_output = f.read()
Expand Down Expand Up @@ -317,7 +305,7 @@ def test_bad_anchors(self):
<anchor /><s>Bonjour.</s><anchor time="invalid"/>
</p></body></text></read-along>
"""
xml_file = join(self.tempdir, "bad-anchor.xml")
xml_file = join(self.tempdir, "bad-anchor.ras")
with open(xml_file, "w", encoding="utf8") as f:
print(xml_text, file=f)
bad_anchors_result = self.runner.invoke(
Expand Down Expand Up @@ -353,7 +341,7 @@ def test_misc_align_errors(self):
results = self.runner.invoke(
align,
[
join(self.data_dir, "fra-prepared.xml"),
join(self.data_dir, "fra-prepared.ras"),
join(self.data_dir, "noise.mp3"),
join(self.tempdir, "noise-only"),
],
Expand All @@ -367,7 +355,7 @@ def test_misc_align_errors(self):
results = self.runner.invoke(
align,
[
join(self.data_dir, "ej-fra.xml"),
join(self.data_dir, "ej-fra.ras"),
join(self.data_dir, "ej-fra.m4a"),
join(self.tempdir, "two-words"),
],
Expand Down Expand Up @@ -449,7 +437,7 @@ def test_infer_plain_text_or_xml(self):
self.assertIn("Error parsing XML", results.output)

# XML by file extension
infile5 = write_file(join(self.tempdir, "infile5.xml"), "Not XML!")
infile5 = write_file(join(self.tempdir, "infile5.ras"), "Not XML!")
with SoundSwallowerStub("word:0:1"):
results = self.runner.invoke(
align,
Expand Down Expand Up @@ -514,7 +502,7 @@ def test_oo_option(self):
[
"-oo",
"eng-arpabet",
join(self.data_dir, "ej-fra.xml"),
join(self.data_dir, "ej-fra.ras"),
join(self.data_dir, "noise.mp3"),
join(self.tempdir, "outdir9"),
],
Expand All @@ -527,7 +515,7 @@ def test_oo_option(self):
[
"-oo",
"not-an-alphabet",
join(self.data_dir, "ej-fra.xml"),
join(self.data_dir, "ej-fra.ras"),
join(self.data_dir, "noise.mp3"),
join(self.tempdir, "outdir10"),
],
Expand All @@ -542,7 +530,7 @@ def test_oo_option(self):
[
"-oo",
"dan-ipa",
join(self.data_dir, "ej-fra.xml"),
join(self.data_dir, "ej-fra.ras"),
join(self.data_dir, "noise.mp3"),
join(self.tempdir, "outdir11"),
],
Expand Down
8 changes: 4 additions & 4 deletions test/test_anchors.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_anchors_inner_only(self):

# ej-fra-anchors has anchors between words/sentences only
results = align_audio(
os.path.join(self.data_dir, "ej-fra-anchors.xml"),
os.path.join(self.data_dir, "ej-fra-anchors.ras"),
os.path.join(self.data_dir, "ej-fra.m4a"),
)
words = results["words"]
Expand All @@ -40,7 +40,7 @@ def test_anchors_outer_too(self):
# ej-fra-anchors2 also has anchors before the first word and after the last word
save_temps_prefix = os.path.join(self.tempdir, "anchors2-temps")
results = align_audio(
os.path.join(self.data_dir, "ej-fra-anchors2.xml"),
os.path.join(self.data_dir, "ej-fra-anchors2.ras"),
os.path.join(self.data_dir, "ej-fra.m4a"),
save_temps=save_temps_prefix,
)
Expand All @@ -49,7 +49,7 @@ def test_anchors_outer_too(self):
self.assertEqual(len(words), 99)

# Make sure the aligned segments stay on the right side of their anchors,
# including the initial and final ones inserted into anchors2.xml
# including the initial and final ones inserted into anchors2.ras
self.assertGreaterEqual(words[0]["start"], 0.5)
self.assertLessEqual(words[0]["end"], 1.2)
self.assertGreaterEqual(words[1]["start"], 1.2)
Expand Down Expand Up @@ -79,7 +79,7 @@ def test_anchors_align_modes(self):
<anchor time="5.62s"/>
</body></doc>
"""
xml_file = os.path.join(self.tempdir, "text-with-anchors.xml")
xml_file = os.path.join(self.tempdir, "text-with-anchors.ras")
with open(xml_file, "wt", encoding="utf8") as f:
print(xml_with_anchors, file=f)
with self.assertLogs(LOGGER, level="INFO") as cm:
Expand Down
11 changes: 5 additions & 6 deletions test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ def test_call_align(self):
self.assertTrue(exception is None)
self.assertIn("Words (<w>) not present; tokenizing", log)
expected_output_files = (
"output.smil",
"output.xml",
"output.ras",
"output.m4a",
"output.TextGrid",
"output_sentences.srt",
Expand All @@ -60,25 +59,25 @@ def test_call_align(self):

def test_call_make_xml(self):
(status, exception, log) = api.make_xml(
self.data_dir / "ej-fra.txt", self.tempdir / "prepared.xml", ("fra", "eng")
self.data_dir / "ej-fra.txt", self.tempdir / "prepared.ras", ("fra", "eng")
)
self.assertEqual(status, 0)
self.assertTrue(exception is None)
self.assertIn("Wrote ", log)
with open(self.tempdir / "prepared.xml") as f:
with open(self.tempdir / "prepared.ras") as f:
xml_text = f.read()
self.assertIn('xml:lang="fra" fallback-langs="eng,und"', xml_text)

(status, exception, log) = api.make_xml(
self.data_dir / "ej-fra.txt",
self.tempdir / "bad.xml",
self.tempdir / "bad.ras",
("fra", "not-a-lang"),
)
self.assertNotEqual(status, 0)
self.assertTrue(isinstance(exception, click.BadParameter))

(status, exception, log) = api.make_xml(
self.data_dir / "file-not-found.txt", self.tempdir / "none.xml", ("fra",)
self.data_dir / "file-not-found.txt", self.tempdir / "none.ras", ("fra",)
)
self.assertNotEqual(status, 0)
self.assertTrue(isinstance(exception, click.UsageError))
Expand Down
24 changes: 12 additions & 12 deletions test/test_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,11 @@ def test_align_sample(self):
if process.returncode != 0:
LOGGER.error("Subprocess readalongs align failed: %s", process.stderr)
# Check Result
smilpath = Path(output_path)
smil_files = smilpath.glob("*.smil")
raspath = Path(output_path)
ras_files = raspath.glob("*.ras")
self.assertTrue(
next(smil_files, False),
"No *.smil files found; "
next(ras_files, False),
"No *.ras files found; "
"pip install --force-reinstall --upgrade might be required "
"if dependencies changed.",
)
Expand All @@ -108,11 +108,11 @@ def test_align_removed(self):
if process.returncode != 0:
LOGGER.error("Subprocess readalongs align failed: %s", process.stderr)
# Check Result
smilpath = Path(output_path)
smil_files = smilpath.glob("*.smil")
raspath = Path(output_path)
ras_files = raspath.glob("*.ras")
self.assertTrue(
next(smil_files, False),
"No *.smil files found; "
next(ras_files, False),
"No *.ras files found; "
"pip install --force-reinstall --upgrade might be required "
"if dependencies changed.",
)
Expand All @@ -135,11 +135,11 @@ def test_align_muted(self):
if process.returncode != 0:
LOGGER.error("Subprocess readalongs align failed: %s", process.stderr)
# Check Result
smilpath = Path(output_path)
smil_files = smilpath.glob("*.smil")
raspath = Path(output_path)
ras_files = raspath.glob("*.ras")
self.assertTrue(
next(smil_files, False),
"No *.smil files found; "
next(ras_files, False),
"No *.ras files found; "
"pip install --force-reinstall --upgrade might be required "
"if dependencies changed.",
)
Expand Down
20 changes: 10 additions & 10 deletions test/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,29 +19,29 @@ class TestConfig(TestCase):
@classmethod
def setUpClass(cls):
data_dir = os.path.join(os.path.dirname(__file__), "data")
cls.xml = load_xml(os.path.join(data_dir, "ej-fra.xml"))
cls.ras = load_xml(os.path.join(data_dir, "ej-fra.ras"))

def test_image(self):
"""Test images are added correctly"""
with self.assertRaises(KeyError):
new_xml = add_images(self.xml, {})
new_xml = add_images(self.xml, {"images": {"0": "test.jpg"}})
new_xml = add_images(self.ras, {})
new_xml = add_images(self.ras, {"images": {"0": "test.jpg"}})
self.assertTrue(len(new_xml.xpath("//graphic")) == 1)
with self.assertRaises(TypeError):
new_xml = add_images(self.xml, {"images": [{"0": "test.jpg"}]})
new_xml = add_images(self.ras, {"images": [{"0": "test.jpg"}]})
with self.assertRaises(ValueError):
new_xml = add_images(self.xml, {"images": {"a": "test.jpg"}})
new_xml = add_images(self.ras, {"images": {"a": "test.jpg"}})
with self.assertRaises(IndexError):
new_xml = add_images(
self.xml, {"images": {"0": "test.jpg", "999": "out_of_range.jpg"}}
self.ras, {"images": {"0": "test.jpg", "999": "out_of_range.jpg"}}
)

def test_arbitrary_xml(self):
"""Test arbitrary xml is added correctly"""
with self.assertRaises(KeyError):
new_xml = add_supplementary_xml(self.xml, {})
new_xml = add_supplementary_xml(self.ras, {})
new_xml = add_supplementary_xml(
self.xml,
self.ras,
{
"xml": [
{
Expand All @@ -56,14 +56,14 @@ def test_arbitrary_xml(self):
# bad xml raises lxml.etree.XMLSyntaxError
with self.assertRaises(etree.XMLSyntaxError):
new_xml = add_supplementary_xml(
self.xml, {"xml": [{"xpath": "//div[1]", "value": "bloop"}]}
self.ras, {"xml": [{"xpath": "//div[1]", "value": "bloop"}]}
)

# if xpath isn't valid, log warning
log_output = io.StringIO()
with redirect_stderr(log_output):
new_xml = add_supplementary_xml(
self.xml,
self.ras,
{
"xml": [
{
Expand Down
Loading

0 comments on commit 1d2ca59

Please sign in to comment.