Skip to content

Commit

Permalink
feat: set our .readalong format to version 1.0 for publication
Browse files Browse the repository at this point in the history
  • Loading branch information
joanise committed Feb 24, 2023
1 parent 2e28d86 commit 2f0da60
Show file tree
Hide file tree
Showing 25 changed files with 37 additions and 34 deletions.
6 changes: 3 additions & 3 deletions docs/cli-guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ The format of the generated XML is based on [TEI
Lite](https://tei-c.org/guidelines/customization/lite/) but is
considerably simplified. The DTD (document type definition) can be
found in the ReadAlong Studio source code under
`readalongs/static/read-along-0.2.dtd`.
`readalongs/static/read-along-1.0.dtd`.

.. _dna:

Expand Down Expand Up @@ -395,7 +395,7 @@ Example:
.. code-block:: xml
<?xml version='1.0' encoding='utf-8'?>
<read-along> <text xml:lang="eng"> <body>
<read-along version="1.0"> <text xml:lang="eng"> <body>
<anchor time="143ms"/>
<div type="page">
<p>
Expand Down Expand Up @@ -479,7 +479,7 @@ Example:
.. code-block:: xml
<?xml version='1.0' encoding='utf-8'?>
<read-along> <text xml:lang="eng"> <body>
<read-along version="1.0"> <text xml:lang="eng"> <body>
<silence dur="1s"/>
<div type="page">
<p>
Expand Down
13 changes: 8 additions & 5 deletions readalongs/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def parse_and_make_xml(
"""Parse XML input and run tokenization and G2P.
Args:
xml_path (str): Path to input in ReadAlong XML format (see static/read-along-0.2.dtd)
xml_path (str): Path to input in ReadAlong XML format (see static/read-along-1.0.dtd)
config (dict): Optional; ReadAlong-Studio configuration to use
save_temps (str): Optional; Save temporary files, by default None
verbose_g2p_warnings (boolean): Optional; display all g2p errors and warnings
Expand Down Expand Up @@ -555,7 +555,7 @@ def align_audio(
"""Align an XML input file to an audio file.
Args:
xml_path (str): Path to input file in ReadAlong XML format (see static/read-along-0.2.dtd)
xml_path (str): Path to input file in ReadAlong XML format (see static/read-along-1.0.dtd)
audio_path (str): Path to audio input. Must be in a format supported by ffmpeg
unit (str): Optional; Element to create alignments for, by default 'w'
bare (boolean): Optional;
Expand Down Expand Up @@ -1105,6 +1105,9 @@ def convert_to_xhtml(tokenized_xml, title="Book"):
tokenized_xml (etree): xml etree with tokens, converted in place
title (str, optional): title for xhtml, by default 'Book'
"""
# The read-along version ends up as html version, which makes no sense, so remove it
if "version" in tokenized_xml.attrib:
del tokenized_xml.attrib["version"]
tokenized_xml.tag = "html"
tokenized_xml.attrib["xmlns"] = "http://www.w3.org/1999/xhtml"
for elem in tokenized_xml.iter():
Expand All @@ -1131,7 +1134,7 @@ def convert_to_xhtml(tokenized_xml, title="Book"):


RAS_TEMPLATE = """<?xml version='1.0' encoding='utf-8'?>
<read-along version=0.2>
<read-along version="1.0">
<text xml:lang="{{main_lang}}" fallback-langs="{{fallback_langs}}">
<body>
{{#pages}}
Expand All @@ -1152,7 +1155,7 @@ def convert_to_xhtml(tokenized_xml, title="Book"):


def create_ras_from_text(lines: Iterable[str], text_languages=Sequence[str]) -> str:
"""Create input xml in ReadAlong XML format (see static/read-along-0.2.dtd)
"""Create input xml in ReadAlong XML format (see static/read-along-1.0.dtd)
Uses the line sequence to infer paragraph and sentence structure from plain text:
Assumes a double blank line marks a page break, and a single blank line
marks a paragraph break.
Expand Down Expand Up @@ -1198,7 +1201,7 @@ def create_ras_from_text(lines: Iterable[str], text_languages=Sequence[str]) ->


def create_input_ras(**kwargs):
"""Create input xml in ReadAlong XML format (see static/read-along-0.2.dtd)
"""Create input xml in ReadAlong XML format (see static/read-along-1.0.dtd)
Uses readlines to infer paragraph and sentence structure from plain text.
Assumes a double blank line marks a page break, and a single blank line
marks a paragraph break.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<!-- VERSION: 0.3 -->
<!-- VERSION: 1.0 -->
<!ELEMENT read-along (text|body|div|span|anchor|silence|graphic|p|s|w)*>
<!ATTLIST read-along
use-assets-folder CDATA #IMPLIED
Expand Down
6 changes: 3 additions & 3 deletions readalongs/web_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@
# Call get_langs() when the server loads to load the languages into memory
LANGS = get_langs()
# Get the DTD
DTDPATH = os.path.join(os.path.dirname(__file__), "static", "read-along-0.2.dtd")
DTDPATH = os.path.join(os.path.dirname(__file__), "static", "read-along-1.0.dtd")
with open(DTDPATH) as dtdfh:
DTD = etree.DTD(dtdfh)

Expand Down Expand Up @@ -161,7 +161,7 @@ async def assemble(
"xml": {
"summary": "A basic example with xml input",
"value": {
"input": "<?xml version='1.0' encoding='utf-8'?><read-along><text><p><s>hej verden</s></p></text></read-along>",
"input": "<?xml version='1.0' encoding='utf-8'?><read-along version=\"1.0\"><text><p><s>hej verden</s></p></text></read-along>",
"type": "application/readalong+xml",
"text_languages": ["dan", "und"],
"debug": False,
Expand Down Expand Up @@ -321,7 +321,7 @@ class ConvertRequest(BaseModel):
example=dedent(
"""\
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<text xml:lang="dan" fallback-langs="und" id="t0">
<body id="t0b0">
<div type="page" id="t0b0d0">
Expand Down
2 changes: 1 addition & 1 deletion test/data/ej-fra-anchors.readalong
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<text xml:lang="fra">
<body>
<div type="page">
Expand Down
2 changes: 1 addition & 1 deletion test/data/ej-fra-anchors2.readalong
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<text xml:lang="fra">
<anchor time=".5s"/>
<body>
Expand Down
2 changes: 1 addition & 1 deletion test/data/ej-fra-converted.readalong
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<text xml:lang="fra" id="t0">
<body id="t0b0">
<div type="page" id="t0b0d0">
Expand Down
2 changes: 1 addition & 1 deletion test/data/ej-fra-dna.readalong
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<text xml:lang="fra">
<body>
<div type="page">
Expand Down
2 changes: 1 addition & 1 deletion test/data/ej-fra-invalid.readalong
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<!-- Note the typo: this should fail validation -->
<txet xml:lang="fra">
<body>
Expand Down
2 changes: 1 addition & 1 deletion test/data/ej-fra-package.readalong
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<text xml:lang="fra">
<body>
<div type="page">
Expand Down
2 changes: 1 addition & 1 deletion test/data/ej-fra-silence-bad.readalong
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<text xml:lang="fra">
<body>
<div type="page">
Expand Down
2 changes: 1 addition & 1 deletion test/data/ej-fra-silence.readalong
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<text xml:lang="fra">
<body>
<div type="page">
Expand Down
2 changes: 1 addition & 1 deletion test/data/ej-fra-subword.readalong
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<!-- To exclude any element from alignment, add the do-not-align="true" attribute to
it, e.g., <p do-not-align="true">...</p>, or
<s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
Expand Down
2 changes: 1 addition & 1 deletion test/data/ej-fra-translated.readalong
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<!-- To exclude any element from alignment, add the do-not-align="true" attribute to
it, e.g., <p do-not-align="true">...</p>, or
<s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
Expand Down
2 changes: 1 addition & 1 deletion test/data/ej-fra.readalong
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<text xml:lang="fra">
<body>
<div type="page">
Expand Down
2 changes: 1 addition & 1 deletion test/data/fra-prepared.readalong
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<text xml:lang="fra" fallback-langs="und">
<body>
<div type="page">
Expand Down
2 changes: 1 addition & 1 deletion test/data/fra-tokenized.readalong
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<text xml:lang="fra">
<body>
<div type="page">
Expand Down
2 changes: 1 addition & 1 deletion test/data/mixed-langs.g2p.readalong
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<text id="t0">
<body id="t0b0">
<div type="page" id="t0b0d0">
Expand Down
2 changes: 1 addition & 1 deletion test/data/mixed-langs.readalong
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<text>
<body>
<div type="page">
Expand Down
2 changes: 1 addition & 1 deletion test/data/mixed-langs.tokenized.readalong
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<text>
<body>
<div type="page">
Expand Down
2 changes: 1 addition & 1 deletion test/data/patrickxtlan.readalong
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<text>
<body>
<p>
Expand Down
2 changes: 1 addition & 1 deletion test/test_align_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ def test_bad_anchors(self):
"""Make sure invalid anchors yield appropriate errors"""

xml_text = """<?xml version='1.0' encoding='utf-8'?>
<read-along><text xml:lang="fra"><body><p>
<read-along version="1.0"><text xml:lang="fra"><body><p>
<anchor /><s>Bonjour.</s><anchor time="invalid"/>
</p></body></text></read-along>
"""
Expand Down
2 changes: 1 addition & 1 deletion test/test_dtd.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from lxml import etree

DTDPATH = os.path.join(
dirname(__file__), "..", "readalongs", "static", "read-along-0.2.dtd"
dirname(__file__), "..", "readalongs", "static", "read-along-1.0.dtd"
)

VALID_RAS = """
Expand Down
2 changes: 1 addition & 1 deletion test/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def test_split_silences(self):
self.assertEqual(words, ref)

def test_get_attrib_recursive(self):
raw_xml = """<read-along>
raw_xml = """<read-along version="1.0">
<text lang="text">
<p lang="p1"><s>stuff</s><s lang="p1s2">nonsense</s></p>
<p><s lang="p2s1">stuff</s><s>nonsense</s></p>
Expand Down
4 changes: 2 additions & 2 deletions test/test_web_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def test_debug(self):
hej_verden_xml = dedent(
"""\
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<text xml:lang="dan" fallback-langs="und" id="t0">
<body id="t0b0">
<div type="page" id="t0b0d0">
Expand Down Expand Up @@ -390,7 +390,7 @@ def test_cleanup_even_if_error(self):
overlap_xml = dedent(
"""\
<?xml version='1.0' encoding='utf-8'?>
<read-along>
<read-along version="1.0">
<text xml:lang="dan" fallback-langs="und" id="t0">
<body id="t0b0">
<div type="page" id="t0b0d0">
Expand Down

0 comments on commit 2f0da60

Please sign in to comment.