Skip to content

Commit

Permalink
feat: refine the DTD somewhat
Browse files Browse the repository at this point in the history
- add a version number in comment and filename
- set the root element to `<read-along>` and add attributes from web component
- allow things in a lot more places
- add `<span>`
  • Loading branch information
dhdaines committed Feb 8, 2023
1 parent 899dbf3 commit b7285f5
Show file tree
Hide file tree
Showing 18 changed files with 67 additions and 42 deletions.
4 changes: 2 additions & 2 deletions readalongs/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -1141,7 +1141,7 @@ def convert_to_xhtml(tokenized_xml, title="Book"):


RAS_TEMPLATE = """<?xml version='1.0' encoding='utf-8'?>
<readalong>
<read-along>
<text xml:lang="{{main_lang}}" fallback-langs="{{fallback_langs}}">
<body>
{{#pages}}
Expand All @@ -1157,7 +1157,7 @@ def convert_to_xhtml(tokenized_xml, title="Book"):
{{/pages}}
</body>
</text>
</readalong>
</read-along>
"""


Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
<!ELEMENT readalong (text)>
<!ELEMENT text (body|anchor)*>
<!-- VERSION: 0.1 -->
<!ELEMENT read-along (text|body|div|span|anchor|silence|graphic|p|s|w)*>
<!ATTLIST read-along
use-assets-folder CDATA #IMPLIED
href CDATA #IMPLIED
audio CDATA #IMPLIED
xml:lang CDATA #IMPLIED
language CDATA #IMPLIED
lang CDATA #IMPLIED>

<!ELEMENT text (body|div|span|anchor|silence|graphic|p|s|w)*>
<!ATTLIST text
xml:lang CDATA #IMPLIED
lang CDATA #IMPLIED
fallback-langs CDATA #IMPLIED
id CDATA #IMPLIED>

<!ELEMENT body (div|anchor|silence|graphic|p|s|w)*>
<!ELEMENT body (div|span|anchor|silence|graphic|p|s|w)*>
<!ATTLIST body
xml:lang CDATA #IMPLIED
lang CDATA #IMPLIED
id CDATA #IMPLIED>

<!ELEMENT anchor EMPTY>
Expand All @@ -22,44 +32,59 @@
url CDATA #REQUIRED
id CDATA #IMPLIED>

<!ELEMENT div (#PCDATA|anchor|silence|graphic|p|s|w)*>
<!ELEMENT div (#PCDATA|div|span|anchor|silence|graphic|p|s|w)*>
<!ATTLIST div
xml:lang CDATA #IMPLIED
lang CDATA #IMPLIED
id CDATA #IMPLIED
type CDATA #IMPLIED
do-not-align CDATA #IMPLIED
time CDATA #IMPLIED
dur CDATA #IMPLIED>

<!ELEMENT span (#PCDATA|div|span|anchor|silence|graphic|p|s|w)*>
<!ATTLIST span
xml:lang CDATA #IMPLIED
lang CDATA #IMPLIED
id CDATA #IMPLIED
type CDATA #IMPLIED
do-not-align CDATA #IMPLIED
time CDATA #IMPLIED
dur CDATA #IMPLIED>

<!ELEMENT p (#PCDATA|anchor|silence|s|w)*>
<!ELEMENT p (#PCDATA|span|anchor|silence|s|w)*>
<!ATTLIST p
xml:lang CDATA #IMPLIED
lang CDATA #IMPLIED
id CDATA #IMPLIED
do-not-align CDATA #IMPLIED
time CDATA #IMPLIED
dur CDATA #IMPLIED>

<!ELEMENT s (#PCDATA|anchor|silence|w)*>
<!ELEMENT s (#PCDATA|span|anchor|silence|w)*>
<!ATTLIST s
xml:lang CDATA #IMPLIED
lang CDATA #IMPLIED
id CDATA #IMPLIED
class CDATA #IMPLIED
do-not-align CDATA #IMPLIED
time CDATA #IMPLIED
dur CDATA #IMPLIED>

<!ELEMENT w (#PCDATA|syl)*>
<!ELEMENT w (#PCDATA|span|syl)*>
<!ATTLIST w
xml:lang CDATA #IMPLIED
lang CDATA #IMPLIED
id CDATA #IMPLIED
do-not-align CDATA #IMPLIED
ARPABET CDATA #IMPLIED
time CDATA #IMPLIED
dur CDATA #IMPLIED>

<!ELEMENT syl (#PCDATA)>
<!ELEMENT syl (#PCDATA|span)*>
<!ATTLIST syl
xml:lang CDATA #IMPLIED
lang CDATA #IMPLIED
id CDATA #IMPLIED
do-not-align CDATA #IMPLIED
ARPABET CDATA #IMPLIED
Expand Down
4 changes: 2 additions & 2 deletions test/data/ej-fra-anchors.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<readalong>
<read-along>
<text xml:lang="fra">
<body>
<div type="page">
Expand All @@ -23,4 +23,4 @@
</div>
</body>
</text>
</readalong>
</read-along>
4 changes: 2 additions & 2 deletions test/data/ej-fra-anchors2.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<readalong>
<read-along>
<text xml:lang="fra">
<anchor time=".5s"/>
<body>
Expand All @@ -25,4 +25,4 @@
</div>
</body>
</text>
</readalong>
</read-along>
4 changes: 2 additions & 2 deletions test/data/ej-fra-converted.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<readalong>
<read-along>
<text xml:lang="fra" id="t0">
<body id="t0b0">
<div type="page" id="t0b0d0">
Expand All @@ -21,4 +21,4 @@
</div>
</body>
</text>
</readalong>
</read-along>
4 changes: 2 additions & 2 deletions test/data/ej-fra-dna.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<readalong>
<read-along>
<text xml:lang="fra">
<body>
<div type="page">
Expand Down Expand Up @@ -28,4 +28,4 @@
</div>
</body>
</text>
</readalong>
</read-along>
4 changes: 2 additions & 2 deletions test/data/ej-fra-package.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<readalong>
<read-along>
<text xml:lang="fra">
<body>
<div type="page">
Expand All @@ -23,4 +23,4 @@
</div>
</body>
</text>
</readalong>
</read-along>
4 changes: 2 additions & 2 deletions test/data/ej-fra-silence-bad.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<readalong>
<read-along>
<text xml:lang="fra">
<body>
<div type="page">
Expand All @@ -21,4 +21,4 @@
</div>
</body>
</text>
</readalong>
</read-along>
4 changes: 2 additions & 2 deletions test/data/ej-fra-silence.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<readalong>
<read-along>
<text xml:lang="fra">
<body>
<div type="page">
Expand All @@ -21,4 +21,4 @@
</div>
</body>
</text>
</readalong>
</read-along>
4 changes: 2 additions & 2 deletions test/data/ej-fra.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<readalong>
<read-along>
<text xml:lang="fra">
<body>
<div type="page">
Expand All @@ -21,4 +21,4 @@
</div>
</body>
</text>
</readalong>
</read-along>
4 changes: 2 additions & 2 deletions test/data/fra-prepared.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<readalong>
<read-along>
<text xml:lang="fra" fallback-langs="und">
<body>
<div type="page">
Expand All @@ -21,4 +21,4 @@
</div>
</body>
</text>
</readalong>
</read-along>
4 changes: 2 additions & 2 deletions test/data/fra-tokenized.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<readalong>
<read-along>
<text xml:lang="fra">
<body>
<div type="page">
Expand All @@ -21,4 +21,4 @@
</div>
</body>
</text>
</readalong>
</read-along>
4 changes: 2 additions & 2 deletions test/data/mixed-langs.g2p.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<readalong>
<read-along>
<text id="t0">
<body id="t0b0">
<div type="page" id="t0b0d0">
Expand All @@ -12,4 +12,4 @@
</div>
</body>
</text>
</readalong>
</read-along>
4 changes: 2 additions & 2 deletions test/data/mixed-langs.tokenized.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<readalong>
<read-along>
<text>
<body>
<div type="page">
Expand All @@ -12,4 +12,4 @@
</div>
</body>
</text>
</readalong>
</read-along>
4 changes: 2 additions & 2 deletions test/data/mixed-langs.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<readalong>
<read-along>
<text>
<body>
<div type="page">
Expand All @@ -12,4 +12,4 @@
</div>
</body>
</text>
</readalong>
</read-along>
4 changes: 2 additions & 2 deletions test/data/patrickxtlan.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<readalong>
<read-along>
<text>
<body>
<p>
Expand All @@ -9,4 +9,4 @@
</p>
</body>
</text>
</readalong>
</read-along>
4 changes: 2 additions & 2 deletions test/test_align_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,9 +313,9 @@ def test_bad_anchors(self):
"""Make sure invalid anchors yield appropriate errors"""

xml_text = """<?xml version='1.0' encoding='utf-8'?>
<readalong><text xml:lang="fra"><body><p>
<read-along><text xml:lang="fra"><body><p>
<anchor /><s>Bonjour.</s><anchor time="invalid"/>
</p></body></text></readalong>
</p></body></text></read-along>
"""
xml_file = join(self.tempdir, "bad-anchor.xml")
with open(xml_file, "w", encoding="utf8") as f:
Expand Down
4 changes: 2 additions & 2 deletions test/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def test_split_silences(self):
self.assertEqual(words, ref)

def test_get_attrib_recursive(self):
raw_xml = """<readalong>
raw_xml = """<read-along>
<text lang="text">
<p lang="p1"><s>stuff</s><s lang="p1s2">nonsense</s></p>
<p><s lang="p2s1">stuff</s><s>nonsense</s></p>
Expand All @@ -97,7 +97,7 @@ def test_get_attrib_recursive(self):
<text>
<p><s xml:lang="p4s1" lang="not:xml:lang">stuff</s><s>nonsense<s xml:lang="p4p2c">!</s></s></p>
</text>
</readalong>
</read-along>
"""
xml = etree.fromstring(raw_xml)
for i, s, lang in zip(
Expand Down

0 comments on commit b7285f5

Please sign in to comment.