From b7285f5de5ef7b1a379c37af92b6ec922e120e85 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 8 Feb 2023 10:52:08 -0500 Subject: [PATCH] feat: refine the DTD somewhat - add a version number in comment and filename - set the root element to `` and add attributes from web component - allow things in a lot more places - add `` --- readalongs/align.py | 4 +- .../{readalong.dtd => read-along-0.1.dtd} | 41 +++++++++++++++---- test/data/ej-fra-anchors.xml | 4 +- test/data/ej-fra-anchors2.xml | 4 +- test/data/ej-fra-converted.xml | 4 +- test/data/ej-fra-dna.xml | 4 +- test/data/ej-fra-package.xml | 4 +- test/data/ej-fra-silence-bad.xml | 4 +- test/data/ej-fra-silence.xml | 4 +- test/data/ej-fra.xml | 4 +- test/data/fra-prepared.xml | 4 +- test/data/fra-tokenized.xml | 4 +- test/data/mixed-langs.g2p.xml | 4 +- test/data/mixed-langs.tokenized.xml | 4 +- test/data/mixed-langs.xml | 4 +- test/data/patrickxtlan.xml | 4 +- test/test_align_cli.py | 4 +- test/test_misc.py | 4 +- 18 files changed, 67 insertions(+), 42 deletions(-) rename readalongs/static/{readalong.dtd => read-along-0.1.dtd} (53%) diff --git a/readalongs/align.py b/readalongs/align.py index c7fa03e7..876e3098 100644 --- a/readalongs/align.py +++ b/readalongs/align.py @@ -1141,7 +1141,7 @@ def convert_to_xhtml(tokenized_xml, title="Book"): RAS_TEMPLATE = """ - + {{#pages}} @@ -1157,7 +1157,7 @@ def convert_to_xhtml(tokenized_xml, title="Book"): {{/pages}} - + """ diff --git a/readalongs/static/readalong.dtd b/readalongs/static/read-along-0.1.dtd similarity index 53% rename from readalongs/static/readalong.dtd rename to readalongs/static/read-along-0.1.dtd index 924c7b37..692c518b 100644 --- a/readalongs/static/readalong.dtd +++ b/readalongs/static/read-along-0.1.dtd @@ -1,14 +1,24 @@ - - + + + + + - + @@ -22,44 +32,59 @@ url CDATA #REQUIRED id CDATA #IMPLIED> - + + + + - + - + - + - + - +
@@ -23,4 +23,4 @@
-
+ diff --git a/test/data/ej-fra-anchors2.xml b/test/data/ej-fra-anchors2.xml index 015bc81d..c10763dd 100644 --- a/test/data/ej-fra-anchors2.xml +++ b/test/data/ej-fra-anchors2.xml @@ -1,5 +1,5 @@ - + @@ -25,4 +25,4 @@ - + diff --git a/test/data/ej-fra-converted.xml b/test/data/ej-fra-converted.xml index dec81f78..ab9b06eb 100644 --- a/test/data/ej-fra-converted.xml +++ b/test/data/ej-fra-converted.xml @@ -1,5 +1,5 @@ - +
@@ -21,4 +21,4 @@
-
+ diff --git a/test/data/ej-fra-dna.xml b/test/data/ej-fra-dna.xml index 9840fb7c..2510f901 100644 --- a/test/data/ej-fra-dna.xml +++ b/test/data/ej-fra-dna.xml @@ -1,5 +1,5 @@ - +
@@ -28,4 +28,4 @@
-
+ diff --git a/test/data/ej-fra-package.xml b/test/data/ej-fra-package.xml index 8c929416..e4411469 100644 --- a/test/data/ej-fra-package.xml +++ b/test/data/ej-fra-package.xml @@ -1,5 +1,5 @@ - +
@@ -23,4 +23,4 @@
-
+ diff --git a/test/data/ej-fra-silence-bad.xml b/test/data/ej-fra-silence-bad.xml index 6d283bdb..5f67c5b7 100644 --- a/test/data/ej-fra-silence-bad.xml +++ b/test/data/ej-fra-silence-bad.xml @@ -1,5 +1,5 @@ - +
@@ -21,4 +21,4 @@
-
+ diff --git a/test/data/ej-fra-silence.xml b/test/data/ej-fra-silence.xml index e8fed71f..d407583e 100644 --- a/test/data/ej-fra-silence.xml +++ b/test/data/ej-fra-silence.xml @@ -1,5 +1,5 @@ - +
@@ -21,4 +21,4 @@
-
+ diff --git a/test/data/ej-fra.xml b/test/data/ej-fra.xml index d901063a..1c0d1a43 100644 --- a/test/data/ej-fra.xml +++ b/test/data/ej-fra.xml @@ -1,5 +1,5 @@ - +
@@ -21,4 +21,4 @@
-
+ diff --git a/test/data/fra-prepared.xml b/test/data/fra-prepared.xml index 5b22c9d2..729bbdf3 100644 --- a/test/data/fra-prepared.xml +++ b/test/data/fra-prepared.xml @@ -1,5 +1,5 @@ - +
@@ -21,4 +21,4 @@
-
+ diff --git a/test/data/fra-tokenized.xml b/test/data/fra-tokenized.xml index c1d15083..7ade0ffb 100644 --- a/test/data/fra-tokenized.xml +++ b/test/data/fra-tokenized.xml @@ -1,5 +1,5 @@ - +
@@ -21,4 +21,4 @@
-
+ diff --git a/test/data/mixed-langs.g2p.xml b/test/data/mixed-langs.g2p.xml index 4fcbaa09..bda9c8f4 100644 --- a/test/data/mixed-langs.g2p.xml +++ b/test/data/mixed-langs.g2p.xml @@ -1,5 +1,5 @@ - +
@@ -12,4 +12,4 @@
-
+ diff --git a/test/data/mixed-langs.tokenized.xml b/test/data/mixed-langs.tokenized.xml index 4963412e..83392adf 100644 --- a/test/data/mixed-langs.tokenized.xml +++ b/test/data/mixed-langs.tokenized.xml @@ -1,5 +1,5 @@ - +
@@ -12,4 +12,4 @@
-
+ diff --git a/test/data/mixed-langs.xml b/test/data/mixed-langs.xml index 07ec97d2..7f9293d6 100644 --- a/test/data/mixed-langs.xml +++ b/test/data/mixed-langs.xml @@ -1,5 +1,5 @@ - +
@@ -12,4 +12,4 @@
-
+ diff --git a/test/data/patrickxtlan.xml b/test/data/patrickxtlan.xml index 728509be..eea0cfbc 100644 --- a/test/data/patrickxtlan.xml +++ b/test/data/patrickxtlan.xml @@ -1,5 +1,5 @@ - +

@@ -9,4 +9,4 @@

-
+ diff --git a/test/test_align_cli.py b/test/test_align_cli.py index c80372f4..5795afb6 100755 --- a/test/test_align_cli.py +++ b/test/test_align_cli.py @@ -313,9 +313,9 @@ def test_bad_anchors(self): """Make sure invalid anchors yield appropriate errors""" xml_text = """ -

+

Bonjour. -

+

""" xml_file = join(self.tempdir, "bad-anchor.xml") with open(xml_file, "w", encoding="utf8") as f: diff --git a/test/test_misc.py b/test/test_misc.py index 2d7879c4..6be078dc 100755 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -86,7 +86,7 @@ def test_split_silences(self): self.assertEqual(words, ref) def test_get_attrib_recursive(self): - raw_xml = """ + raw_xml = """

stuffnonsense

stuffnonsense

@@ -97,7 +97,7 @@ def test_get_attrib_recursive(self):

stuffnonsense!

-
+ """ xml = etree.fromstring(raw_xml) for i, s, lang in zip(