Skip to content

Commit

Permalink
gettext po: fix broken syntax due to missing quotations, unescape |
Browse files Browse the repository at this point in the history
…, add test

also fix duplicate msgids
  • Loading branch information
ilius committed Dec 17, 2024
1 parent c848a7c commit 5d7cc16
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 17 deletions.
59 changes: 43 additions & 16 deletions pyglossary/plugins/gettext_po.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
BoolOption,
Option,
)
from pyglossary.text_utils import splitByBar

if TYPE_CHECKING:
import io
Expand Down Expand Up @@ -60,6 +61,7 @@ class Reader:

def __init__(self, glos: GlossaryType) -> None:
self._glos = glos
self._alts = glos.alts
self.clear()

def clear(self) -> None:
Expand Down Expand Up @@ -95,6 +97,11 @@ def __len__(self) -> int:
)
return self._wordCount

def makeEntry(self, word: str, defi: str) -> EntryType:
if self._alts:
return self._glos.newEntry(splitByBar(word), defi)
return self._glos.newEntry(word, defi)

def __iter__(self) -> Iterator[EntryType]: # noqa: PLR0912
try:
from polib import unescape as po_unescape
Expand All @@ -108,15 +115,15 @@ def __iter__(self) -> Iterator[EntryType]: # noqa: PLR0912
defi = ""
msgstr = False
wordCount = 0
for line in file:
line = line.strip() # noqa: PLW2901
for line_ in file:
line = line_.strip() # noqa: PLW2901
if not line:
continue
if line.startswith("#"):
continue
if line.startswith("msgid "):
if word:
yield self._glos.newEntry(word, defi)
yield self.makeEntry(word, defi)
wordCount += 1
word = ""
defi = ""
Expand All @@ -125,18 +132,35 @@ def __iter__(self) -> Iterator[EntryType]: # noqa: PLR0912
# TODO: parse defi and set glos info?
# but this should be done in self.open
word = po_unescape(line[6:])
if word.startswith('"'):
if len(word) < 2 or word[-1] != '"':
raise ValueError("invalid po line: line")
word = word[1:-1]
msgstr = False
elif line.startswith("msgstr "):
continue
if line.startswith("msgstr "):
if msgstr:
log.error("msgid omitted!")
defi = po_unescape(line[7:])
if defi.startswith('"'):
if len(defi) < 2 or defi[-1] != '"':
raise ValueError("invalid po line: line")
defi = defi[1:-1]
msgstr = True
elif msgstr:
defi += po_unescape(line)
continue

line = po_unescape(line)
if line.startswith('"'):
if len(line) < 2 or line[-1] != '"':
raise ValueError("invalid po line: line")
line = line[1:-1]

if msgstr:
defi += line
else:
word += po_unescape(line)
word += line
if word:
yield self._glos.newEntry(word, defi)
yield self.makeEntry(word, defi)
wordCount += 1
self._wordCount = wordCount

Expand All @@ -152,25 +176,28 @@ def __init__(self, glos: GlossaryType) -> None:
self._glos = glos
self._filename = ""
self._file: io.TextIOBase = nullTextIO
glos.preventDuplicateWords()

def open(self, filename: str) -> None:
try:
from polib import escape as po_escape
except ModuleNotFoundError as e:
exc_note(e, f"Run `{pip} install polib` to install")
raise

self._filename = filename
self._file = file = open(filename, mode="w", encoding="utf-8")
file.write('#\nmsgid ""\nmsgstr ""\n')
for key, value in self._glos.iterInfo():
file.write(f'"{key}: {value}\\n"\n')
file.write(f'"{po_escape(key)}: {po_escape(value)}\\n"\n')

def finish(self) -> None:
self._filename = ""
self._file.close()
self._file = nullTextIO

def write(self) -> Generator[None, EntryType, None]:
try:
from polib import escape as po_escape
except ModuleNotFoundError as e:
exc_note(e, f"Run `{pip} install polib` to install")
raise
from polib import escape as po_escape

file = self._file

Expand All @@ -185,6 +212,6 @@ def write(self) -> Generator[None, EntryType, None]:
entry.save(filename + "_res")
continue
file.write(
f"msgid {po_escape(entry.s_word)}\n"
f"msgstr {po_escape(entry.defi)}\n\n",
f'msgid "{po_escape(entry.s_word)}"\n'
f'msgstr "{po_escape(entry.defi)}"\n\n',
)
3 changes: 2 additions & 1 deletion scripts/test-deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ python -m pip install \
python-idzip \
lxml==5.3 \
marisa-trie \
mistune
mistune \
polib
51 changes: 51 additions & 0 deletions tests/g_gettext_po_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import sys
import unittest
from os.path import abspath, dirname

rootDir = dirname(dirname(abspath(__file__)))
sys.path.insert(0, rootDir)

from glossary_v2_test import TestGlossaryBase


class TestGlossaryGetttestPo(TestGlossaryBase):
def __init__(self, *args, **kwargs):
TestGlossaryBase.__init__(self, *args, **kwargs)

self.dataFileCRC32.update(
{
"100-en-fa.po": "694de186",
"100-en-fa.po.txt": "f0c3ea53",
},
)

def convert_txt_po(self, fname, fname2, **convertArgs):
self.convert(
f"{fname}.txt",
f"{fname}-2.po",
compareText=f"{fname2}.po",
**convertArgs,
)

def convert_po_txt(self, fname, fname2, **convertArgs):
self.convert(
f"{fname}.po",
f"{fname}-2.txt",
compareText=f"{fname2}.txt",
**convertArgs,
)

def test_convert_txt_po_1(self):
self.convert_txt_po("100-en-fa", "100-en-fa")

# TODO
def test_convert_po_txt_1(self):
self.convert_po_txt(
"100-en-fa",
"100-en-fa.po",
infoOverride={"input_file_size": None},
)


if __name__ == "__main__":
unittest.main()

0 comments on commit 5d7cc16

Please sign in to comment.