From aa6765b9284c62ddc85174908d66c2ff0b95c3ed Mon Sep 17 00:00:00 2001
From: Saeed Rasooli <saeed.gnu@gmail.com>
Date: Mon, 2 Sep 2024 11:34:39 +0330
Subject: [PATCH] add new plugin xdxf_css (XdxfCss) based on PR #570 by
 @soshial

- use CSS for all XDXF element (decrease size of output dict file)
- created a list (ol / ul) for nested `<def>`s
- support loading of `<abbreviations>` data
- show js tooltips for abbreviations
- fixed showing hidden `<gr>`
- fixed losing spaces before XDXF tags
- add braces to `<co>`
- show CSS instead of inlined HTML tags for: `<ex>, <pos>, <abbr>, <k>, <gr>, <mrkd>`
---
 doc/p/__index__.md                      |   1 +
 doc/p/xdxf_css.md                       |  28 ++
 plugins-meta/index.json                 |  20 +
 pyglossary/plugins/xdxf/__init__.py     |  13 +-
 pyglossary/plugins/xdxf_css/__init__.py | 339 +++++++++++++++++
 pyglossary/xdxf/css_js_transform.py     | 473 ++++++++++++++++++++++++
 pyglossary/xdxf/xdxf.css                |  70 ++++
 pyglossary/xdxf/xdxf.js                 |  56 +++
 8 files changed, 990 insertions(+), 10 deletions(-)
 create mode 100644 doc/p/xdxf_css.md
 create mode 100644 pyglossary/plugins/xdxf_css/__init__.py
 create mode 100644 pyglossary/xdxf/css_js_transform.py
 create mode 100644 pyglossary/xdxf/xdxf.css
 create mode 100644 pyglossary/xdxf/xdxf.js
diff --git a/doc/p/__index__.md b/doc/p/__index__.md
index cb2985087..438d4f596 100644
--- a/doc/p/__index__.md
+++ b/doc/p/__index__.md
@@ -44,6 +44,7 @@
 | WordNet                        | Wordnet         | [wordnet.md](./wordnet.md)                     |
 | Wordset.org JSON directory     | Wordset         | [wordset.md](./wordset.md)                     |
 | XDXF (.xdxf)                   | Xdxf            | [xdxf.md](./xdxf.md)                           |
+| XDXF with CSS and JS           | XdxfCss         | [xdxf_css.md](./xdxf_css.md)                   |
 | XDXF Lax (.xdxf)               | XdxfLax         | [xdxf_lax.md](./xdxf_lax.md)                   |
 | Yomichan (.zip)                | Yomichan        | [yomichan.md](./yomichan.md)                   |
 | Zim (.zim, for Kiwix)          | Zim             | [zim.md](./zim.md)                             |
diff --git a/doc/p/xdxf_css.md b/doc/p/xdxf_css.md
new file mode 100644
index 000000000..384afded8
--- /dev/null
+++ b/doc/p/xdxf_css.md
@@ -0,0 +1,28 @@
+## XDXF with CSS and JS
+
+### General Information
+
+| Attribute       | Value                                                                                                          |
+| --------------- | -------------------------------------------------------------------------------------------------------------- |
+| Name            | XdxfCss                                                                                                        |
+| snake_case_name | xdxf_css                                                                                                       |
+| Description     | XDXF with CSS and JS                                                                                           |
+| Extensions      |                                                                                                                |
+| Read support    | Yes                                                                                                            |
+| Write support   | No                                                                                                             |
+| Single-file     | Yes                                                                                                            |
+| Kind            | 📝 text                                                                                                         |
+| Sort-on-write   | default_no                                                                                                     |
+| Sort key        | (`headword_lower`)                                                                                             |
+| Wiki            | [XDXF](https://en.wikipedia.org/wiki/XDXF)                                                                     |
+| Website         | [XDXF standard - @soshial/xdxf_makedict](https://github.com/soshial/xdxf_makedict/tree/master/format_standard) |
+
+### Dependencies for reading
+
+PyPI Links: [lxml](https://pypi.org/project/lxml)
+
+To install, run:
+
+```sh
+pip3 install lxml
+```
diff --git a/plugins-meta/index.json b/plugins-meta/index.json
index 68c13cffd..5e1d7d0cd 100644
--- a/plugins-meta/index.json
+++ b/plugins-meta/index.json
@@ -1823,6 +1823,26 @@
 			"lzma"
 		]
 	},
+	{
+		"module": "xdxf_css",
+		"lname": "xdxf_css",
+		"name": "XdxfCss",
+		"description": "XDXF with CSS and JS",
+		"extensions": [],
+		"singleFile": true,
+		"optionsProp": {},
+		"canRead": true,
+		"canWrite": false,
+		"readOptions": {},
+		"readDepends": {
+			"lxml": "lxml"
+		},
+		"readCompressions": [
+			"gz",
+			"bz2",
+			"lzma"
+		]
+	},
 	{
 		"module": "xdxf_lax",
 		"lname": "xdxf_lax",
diff --git a/pyglossary/plugins/xdxf/__init__.py b/pyglossary/plugins/xdxf/__init__.py
index d1712a17e..3a5193a9d 100644
--- a/pyglossary/plugins/xdxf/__init__.py
+++ b/pyglossary/plugins/xdxf/__init__.py
@@ -31,6 +31,8 @@
 
 	from pyglossary.lxml_types import Element
 
+from lxml import etree as ET
+
 from pyglossary.compression import (
 	compressionOpen,
 	stdCompressions,
@@ -79,7 +81,6 @@
 	),
 }
 
-
 """
 new format
 <xdxf ...>
@@ -110,9 +111,7 @@
 </xdxf>
 """
 
-
 if TYPE_CHECKING:
-
 	class TransformerType(typing.Protocol):
 		def transform(self, article: "Element") -> str: ...
 
@@ -154,7 +153,6 @@ def makeTransformer(self) -> None:
 
 	def open(self, filename: str) -> None:  # noqa: PLR0912
 		# <!DOCTYPE xdxf SYSTEM "http://xdxf.sourceforge.net/xdxf_lousy.dtd">
-		from lxml import etree as ET
 
 		self._filename = filename
 		if self._html:
@@ -220,9 +218,6 @@ def __len__(self) -> int:
 		return 0
 
 	def __iter__(self) -> "Iterator[EntryType]":
-		from lxml import etree as ET
-		from lxml.etree import tostring
-
 		context = ET.iterparse(  # type: ignore
 			self._file,
 			events=("end",),
@@ -238,7 +233,7 @@ def __iter__(self) -> "Iterator[EntryType]":
 				if len(words) == 1:
 					defi = self._re_span_k.sub("", defi)
 			else:
-				b_defi = cast(bytes, tostring(article, encoding=self._encoding))
+				b_defi = cast(bytes, ET.tostring(article, encoding=self._encoding))
 				defi = b_defi[4:-5].decode(self._encoding).strip()
 				defiFormat = "x"
 
@@ -265,8 +260,6 @@ def close(self) -> None:
 	def tostring(
 		elem: "Element",
 	) -> str:
-		from lxml import etree as ET
-
 		return (
 			ET.tostring(
 				elem,
diff --git a/pyglossary/plugins/xdxf_css/__init__.py b/pyglossary/plugins/xdxf_css/__init__.py
new file mode 100644
index 000000000..bc6418818
--- /dev/null
+++ b/pyglossary/plugins/xdxf_css/__init__.py
@@ -0,0 +1,339 @@
+# -*- coding: utf-8 -*-
+# xdxf/__init__.py
+"""xdxf file format reader and utils to convert xdxf to html."""
+#
+# Copyright © 2023 Saeed Rasooli
+# Copyright © 2016 ivan tkachenko me@ratijas.tk
+#
+# some parts of this file include code from:
+# Aard Dictionary Tools <http://aarddict.org>.
+# Copyright © 2008-2009  Igor Tkach
+#
+# This program is a free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# You can get a copy of GNU General Public License along this program
+# But you can always get it from http://www.gnu.org/licenses/gpl.txt
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+import re
+import typing
+from collections.abc import Iterator, Sequence
+from os.path import join
+from typing import TYPE_CHECKING, cast
+
+if TYPE_CHECKING:
+	import io
+
+	from pyglossary.lxml_types import Element
+	from pyglossary.option import Option
+
+
+from lxml import etree as ET
+
+from pyglossary.compression import (
+	compressionOpen,
+	stdCompressions,
+)
+from pyglossary.core import log, rootDir
+from pyglossary.glossary_types import EntryType, GlossaryType
+from pyglossary.io_utils import nullBinaryIO
+from pyglossary.text_utils import toStr
+
+__all__ = [
+	"Reader",
+	"description",
+	"enable",
+	"extensionCreate",
+	"extensions",
+	"format",
+	"kind",
+	"lname",
+	"optionsProp",
+	"singleFile",
+	"website",
+	"wiki",
+]
+
+enable = True
+lname = "xdxf_css"
+format = "XdxfCss"
+description = "XDXF with CSS and JS"
+extensions = ()
+extensionCreate = ".xdxf"
+singleFile = True
+kind = "text"
+wiki = "https://en.wikipedia.org/wiki/XDXF"
+website = (
+	"https://github.com/soshial/xdxf_makedict/tree/master/format_standard",
+	"XDXF standard - @soshial/xdxf_makedict",
+)
+optionsProp: "dict[str, Option]" = {}
+
+"""
+new format
+<xdxf ...>
+	<meta_info>
+		<!--All meta information about the dictionary: its title, author etc.!-->
+		<basename>...</basename>
+		<full_title>...</full_title>
+		<description>...</description>
+	</meta_info>
+	<lexicon>
+		<ar>article 1</ar>
+		<ar>article 2</ar>
+		<ar>article 3</ar>
+		<ar>article 4</ar>
+		...
+	</lexicon>
+</xdxf>
+
+old format
+<xdxf ...>
+	<full_name>...</full_name>
+	<description>...</description>
+	<ar>article 1</ar>
+	<ar>article 2</ar>
+	<ar>article 3</ar>
+	<ar>article 4</ar>
+	...
+</xdxf>
+"""
+
+if TYPE_CHECKING:
+	class TransformerType(typing.Protocol):
+		def transform(self, article: "Element") -> str: ...
+
+
+class Reader:
+	compressions = stdCompressions
+	depends = {
+		"lxml": "lxml",
+	}
+
+	_html: bool = True
+
+	infoKeyMap = {
+		"full_name": "name",
+		"full_title": "name",
+	}
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		self._file: "io.IOBase" = nullBinaryIO
+		self._encoding = "utf-8"
+		self._htmlTr: "TransformerType | None" = None
+		self._re_span_k = re.compile(
+			'<span class="k">[^<>]*</span>(<br/>)?',
+		)
+		self._has_added_css: bool = False
+		self._has_added_js: bool = False
+		self._abbr_defs_js: bytes
+
+	def makeTransformer(self) -> None:
+		from pyglossary.xdxf.css_js_transform import XdxfTransformer
+
+		self._htmlTr = XdxfTransformer(encoding=self._encoding)
+
+	def open(self, filename: str) -> None:  # noqa: PLR0912
+		# <!DOCTYPE xdxf SYSTEM "http://xdxf.sourceforge.net/xdxf_lousy.dtd">
+
+		self._filename = filename
+		self.makeTransformer()
+		self._glos.setDefaultDefiFormat("h")
+
+		cfile = self._file = cast(
+			"io.IOBase",
+			compressionOpen(
+				self._filename,
+				mode="rb",
+			),
+		)
+
+		context = ET.iterparse(  # type: ignore
+			cfile,
+			events=("end",),
+		)
+		abbr_defs = []
+		for _, _elem in context:
+			elem = cast("Element", _elem)
+			if elem.tag in {"meta_info", "ar", "k", "abr", "dtrn"}:
+				break
+			# every other tag before </meta_info> or </ar> is considered info
+			if elem.tag == "abbr_def":
+				abbr_defs.append(elem)
+				continue
+			# in case of multiple <from> or multiple <to> tags, the last one
+			# will be stored.
+			# Very few formats support more than one language pair in their
+			# metadata, so it's not very useful to have multiple
+			if elem.tag == "from":
+				for key, value in elem.attrib.items():
+					if key.endswith("}lang"):
+						self._glos.sourceLangName = value.split("-")[0]
+						break
+				continue
+			if elem.tag == "to":
+				for key, value in elem.attrib.items():
+					if key.endswith("}lang"):
+						self._glos.targetLangName = value.split("-")[0]
+						break
+				continue
+			if not elem.text:
+				log.warning(f"empty tag <{elem.tag}>")
+				continue
+			key = self.infoKeyMap.get(elem.tag, elem.tag)
+			self._glos.setInfo(key, elem.text)
+		self._abbr_defs_js = self.generate_abbr_js(abbr_defs)
+		del context
+
+		if cfile.seekable():
+			cfile.seek(0, 2)
+			self._fileSize = cfile.tell()
+			cfile.seek(0)
+			self._glos.setInfo("input_file_size", f"{self._fileSize}")
+		else:
+			log.warning("XDXF Reader: file is not seekable")
+			self._file.close()
+			self._file = compressionOpen(self._filename, mode="rb")
+
+	def __len__(self) -> int:
+		return 0
+
+	def __iter__(self) -> "Iterator[EntryType]":
+		context = ET.iterparse(  # type: ignore
+			self._file,
+			events=("end",),
+			tag="ar",
+		)
+
+		if self._has_added_css is False:
+			self._has_added_css = True
+			with open(join(rootDir, "pyglossary", "xdxf", "xdxf.css"), "rb") as css_file:
+				yield self._glos.newDataEntry("css/xdxf.css", css_file.read())
+
+		if self._abbr_defs_js is not None and not self._has_added_js:
+			self._has_added_js = True
+			yield self._glos.newDataEntry("js/xdxf.js", self._abbr_defs_js)
+
+		for _, _article in context:
+			article = cast("Element", _article)
+			article.tail = None
+			words = [toStr(w) for w in self.titles(article)]
+
+			defi = self._htmlTr.transform(article)
+			defiFormat = "h"
+			if len(words) == 1:
+				defi = self._re_span_k.sub("", defi)
+
+			defi = f"""<!DOCTYPE html>
+<html>
+	<head>
+		<link rel="stylesheet" href="css/xdxf.css"/>
+	</head>
+	<body>
+		{defi}
+		<script type="text/javascript" src="js/xdxf.js"></script>
+	</body>
+</html>"""
+			# log.info(f"{defi=}, {words=}")
+			yield self._glos.newEntry(
+				words,
+				defi,
+				defiFormat=defiFormat,
+				byteProgress=(self._file.tell(), self._fileSize),
+			)
+			# clean up preceding siblings to save memory
+			# this can reduce memory usage from 1 GB to ~25 MB
+			parent = article.getparent()
+			if parent is None:
+				continue
+			while article.getprevious() is not None:
+				del parent[0]
+
+	def close(self) -> None:
+		self._file.close()
+		self._file = nullBinaryIO
+
+	def generate_abbr_js(self, abbr_defs: list["Element"]) -> bytes:
+		abbr_map_js = """const abbr_map = new Map();\n"""
+		for abbr_def in abbr_defs:
+			abbr_k_list: list[str] = []
+			abbr_v_text = ""
+			for child in abbr_def.xpath("child::node()"):
+				if child.tag == "abbr_k":
+					abbr_k_list.append(self._htmlTr.stringify_children(child))
+				if child.tag == "abbr_v":
+					abbr_v_text = self._htmlTr.stringify_children(child)
+			# TODO escape apostrophes
+			for abbr_k in abbr_k_list:
+				if len(abbr_k) > 0 and len(abbr_v_text) > 0:
+					abbr_map_js += f"abbr_map.set('{abbr_k}', '{abbr_v_text}');\n"
+		with open(join(rootDir, "pyglossary", "xdxf", "xdxf.js"), "rb") as js_file:
+			return abbr_map_js.encode(encoding="utf-8") + js_file.read()
+
+	@staticmethod
+	def tostring(
+		elem: "Element",
+	) -> str:
+		return (
+			ET.tostring(
+				elem,
+				method="html",
+				pretty_print=True,
+			)
+			.decode("utf-8")
+			.strip()
+		)
+
+	def titles(self, article: "Element") -> "list[str]":
+		"""
+		:param article: <ar> tag
+		:return: (title (str) | None, alternative titles (set))
+		"""
+		from itertools import combinations
+
+		titles: "list[str]" = []
+		for title_element in article.findall("k"):
+			if title_element.text is None:
+				# TODO: look for <opt> tag?
+				log.warning(f"empty title element: {self.tostring(title_element)}")
+				continue
+			n_opts = len([c for c in title_element if c.tag == "opt"])
+			if n_opts:
+				for j in range(n_opts + 1):
+					for comb in combinations(list(range(n_opts)), j):
+						titles.append(self._mktitle(title_element, comb))
+			else:
+				titles.append(self._mktitle(title_element))
+
+		return titles
+
+	def _mktitle(  # noqa: PLR6301
+		self,
+		title_element: "Element",
+		include_opts: "Sequence | None" = None,
+	) -> str:
+		if include_opts is None:
+			include_opts = ()
+		title = title_element.text or ""
+		opt_i = -1
+		for c in title_element:
+			if c.tag == "nu" and c.tail:
+				if title:
+					title += c.tail
+				else:
+					title = c.tail
+			if c.tag == "opt" and c.text is not None:
+				opt_i += 1
+				if opt_i in include_opts:
+					title += c.text
+				if c.tail:
+					title += c.tail
+		return title.strip()
diff --git a/pyglossary/xdxf/css_js_transform.py b/pyglossary/xdxf/css_js_transform.py
new file mode 100644
index 000000000..e1d5f0fae
--- /dev/null
+++ b/pyglossary/xdxf/css_js_transform.py
@@ -0,0 +1,473 @@
+import logging
+import sys
+from io import BytesIO
+from typing import TYPE_CHECKING, cast
+
+if TYPE_CHECKING:
+	from pyglossary.lxml_types import Element, T_htmlfile
+
+log = logging.getLogger("pyglossary")
+
+__all__ = [
+	"XdxfTransformer",
+]
+
+
+class XdxfTransformer:
+	def __init__(self, encoding: str = "utf-8") -> None:
+		self._encoding = encoding
+		self.logging_enabled = False
+		self._childTagWriteMapping = {
+			"br": self._write_br,
+			"u": self._write_basic_format,
+			"i": self._write_basic_format,
+			"b": self._write_basic_format,
+			"sub": self._write_basic_format,
+			"sup": self._write_basic_format,
+			"tt": self._write_basic_format,
+			"big": self._write_basic_format,
+			"small": self._write_basic_format,
+			"blockquote": self._write_blockquote,
+			"tr": self._write_tr,
+			"k": self._write_k,
+			"sr": self._write_sr,
+			"ex": self._write_example,
+			"mrkd": self._write_mrkd,
+			"kref": self._write_kref,
+			"iref": self._write_iref,
+			"pos": self._write_pos,
+			"abr": self._write_abr,
+			"abbr": self._write_abbr,
+			"dtrn": self._write_dtrn,
+			"co": self._write_co,
+			"c": self._write_c,
+			"rref": self._write_rref,
+			"def": self._write_def,
+			"deftext": self._write_deftext,
+			"span": self._write_span,
+			"gr": self._write_gr,
+			"ex_orig": self._write_ex_orig,
+			"categ": self._write_categ,
+			"opt": self._write_opt,
+			"img": self._write_img,
+			"etm": self._write_etm,
+		}
+
+	@staticmethod
+	def tostring(elem: "Element") -> str:
+		from lxml import etree as ET
+
+		return (
+			ET.tostring(
+				elem,
+				method="html",
+				pretty_print=True,
+			)
+			.decode("utf-8")
+			.strip()
+		)
+
+	@staticmethod
+	def hasPrevText(prev: "None | str | Element") -> bool:
+		if isinstance(prev, str):
+			return True
+		if prev is None:
+			return False
+		if prev.tag == "k":
+			return False
+		if prev.tag in {
+			"dtrn",
+			"def",
+			"span",
+			"co",
+			"i",
+			"b",
+			"sub",
+			"sup",
+			"tt",
+			"big",
+			"small",
+		}:
+			return True
+		if prev.text:  # noqa: SIM103
+			return True
+		# print(prev)
+		return False
+
+	def writeString(  # noqa: PLR0913
+		self,
+		hf: "T_htmlfile",
+		child: str,
+		parent: "Element",
+		prev: "None | str | Element",
+		stringSep: "str | None" = None,
+	) -> None:
+		from lxml import etree as ET
+
+		def addSep() -> None:
+			if stringSep is None:
+				hf.write(ET.Element("br"))
+			else:
+				hf.write(stringSep)
+
+		hasPrev = self.hasPrevText(prev)
+		trail = False
+		if parent.tag in {"ar", "font"}:
+			if child.startswith("\n"):
+				child = child.lstrip("\n")
+				if hasPrev:
+					hf.write(ET.Element("br"))
+			elif child.endswith("\n"):
+				child = child.rstrip("\n")
+				trail = True
+			if not hasPrev:
+				child = child.lstrip()
+		elif child.startswith("\n"):
+			# child = child.lstrip()
+			if hasPrev:
+				addSep()
+
+		lines = [line for line in child.split("\n") if line]
+		for index, line in enumerate(lines):
+			if index > 0:
+				# and line[0] not in ".,;)"
+				addSep()
+			hf.write(line)
+		if trail:
+			addSep()
+
+	def _write_example(self, hf: "T_htmlfile", elem: "Element") -> None:
+		prev = None
+		stringSep = " "
+		with hf.element(  # noqa: PLR1702
+			"div",
+			attrib={"class": elem.tag},
+		):
+			for child in elem.xpath("child::node()"):
+				if isinstance(child, str):
+					# if not child.strip():
+					# 	continue
+					self.writeString(hf, child, elem, prev, stringSep=stringSep)
+					continue
+				if child.tag == "iref":
+					with hf.element("div"):
+						self._write_iref(hf, child)  # NESTED 5
+					continue
+
+				if child.tag == "ex_orig":
+					with hf.element("span", attrib={"class": child.tag}):
+						self.writeChildrenOf(hf, child, stringSep=stringSep)
+					continue
+				if child.tag == "ex_tran":
+					ex_trans = elem.xpath("./ex_tran")
+					if ex_trans.index(child) == 0:
+						# when several translations, make HTML unordered list of them
+						if len(ex_trans) > 1:
+							with hf.element("ul", attrib={}):
+								for ex_tran in ex_trans:
+									with hf.element("li", attrib={}):
+										self._write_ex_transl(hf, ex_tran)
+						else:
+							self._write_ex_transl(hf, child)
+					continue
+				# log.warning(f"unknown tag {child.tag} inside <ex>")
+				self.writeChild(hf, child, elem, prev, stringSep=stringSep)
+				prev = child
+
+	def _write_ex_orig(self, hf: "T_htmlfile", child: "Element") -> None:
+		# TODO NOT REACHABLE
+		sys.exit("NOT REACHABLE")
+		with hf.element("i"):
+			self.writeChildrenOf(hf, child)
+
+	def _write_ex_transl(self, hf: "T_htmlfile", child: "Element") -> None:
+		with hf.element("span", attrib={"class": child.tag}):
+			self.writeChildrenOf(hf, child)
+
+	def _write_iref(self, hf: "T_htmlfile", child: "Element") -> None:
+		iref_url = child.attrib.get("href", "")
+		if iref_url.endswith((".mp3", ".wav", ".aac", ".ogg")):
+			#  with hf.element("audio", src=iref_url):
+			with hf.element(
+				"a",
+				attrib={
+					"class": "iref",
+					"href": iref_url,
+				},
+			):
+				hf.write("🔊")
+			return
+
+		with hf.element(
+			"a",
+			attrib={
+				"class": "iref",
+				"href": child.attrib.get("href", child.text or ""),
+			},
+		):
+			self.writeChildrenOf(hf, child, stringSep=" ")
+
+	def _write_blockquote(self, hf: "T_htmlfile", child: "Element") -> None:
+		with hf.element("div", attrib={"class": "m"}):
+			self.writeChildrenOf(hf, child)
+
+	def _write_tr(self, hf: "T_htmlfile", child: "Element") -> None:
+		from lxml import etree as ET
+
+		hf.write("[")
+		self.writeChildrenOf(hf, child)
+		hf.write("]")
+		hf.write(ET.Element("br"))
+
+	def _write_k(self, hf: "T_htmlfile", child: "Element") -> None:
+		self.logging_enabled = child.text == "iść"
+
+		index = child.getparent().index(child)
+		if index == 0:
+			with (hf.element("div", attrib={"class": child.tag})):
+				# with hf.element(glos.titleTag(child.text)):
+				# ^ no glos object here!
+				self.writeChildrenOf(hf, child)
+		# TODO Lenny: show other forms in a collapsible list
+		# else:
+		# 	with (hf.element("span", attrib={"class": child.tag})):
+		# 		hf.write(str(index))
+		# 		self.writeChildrenOf(hf, child)
+
+	def _write_mrkd(self, hf: "T_htmlfile", child: "Element") -> None:  # noqa: PLR6301
+		if not child.text:
+			return
+		with hf.element("span", attrib={"class": child.tag}):
+			hf.write(child.text)
+
+	def _write_kref(self, hf: "T_htmlfile", child: "Element") -> None:
+		if not child.text:
+			log.warning(f"kref with no text: {self.tostring(child)}")
+			return
+		with hf.element(
+			"a",
+			attrib={
+				"class": "kref",
+				"href": f"bword://{child.attrib.get('k', child.text)}",
+			},
+		):
+			hf.write(child.text)
+
+	def _write_sr(self, hf: "T_htmlfile", child: "Element") -> None:
+		with hf.element("div", attrib={"class": child.tag}):
+			self.writeChildrenOf(hf, child)
+
+	def _write_pos(self, hf: "T_htmlfile", child: "Element") -> None:
+		with hf.element("span", attrib={"class": child.tag}):
+			self.writeChildrenOf(hf, child)
+
+	def _write_abr(self, hf: "T_htmlfile", child: "Element") -> None:
+		with hf.element("span", attrib={"class": "abbr"}):
+			self.writeChildrenOf(hf, child)
+
+	def _write_abbr(self, hf: "T_htmlfile", child: "Element") -> None:  # noqa: PLR6301
+		with hf.element("span", attrib={"class": child.tag}):
+			self.writeChildrenOf(hf, child)
+
+	def _write_dtrn(self, hf: "T_htmlfile", child: "Element") -> None:
+		self.writeChildrenOf(hf, child, sep=" ")
+
+	def _write_co(self, hf: "T_htmlfile", child: "Element") -> None:
+		with hf.element("span", attrib={"class": child.tag}):
+			hf.write("(")
+			self.writeChildrenOf(hf, child, sep=" ")
+			hf.write(")")
+
+	def _write_basic_format(self, hf: "T_htmlfile", child: "Element") -> None:
+		with hf.element(child.tag):
+			self.writeChildrenOf(hf, child)
+			# if child.text is not None:
+			# 	hf.write(child.text.strip("\n"))
+
+	def _write_br(self, hf: "T_htmlfile", child: "Element") -> None:
+		from lxml import etree as ET
+
+		hf.write(ET.Element("br"))
+		self.writeChildrenOf(hf, child)
+
+	def _write_c(self, hf: "T_htmlfile", child: "Element") -> None:
+		color = child.attrib.get("c", "green")
+		with hf.element("font", color=color):
+			self.writeChildrenOf(hf, child)
+
+	def _write_rref(self, _hf: "T_htmlfile", child: "Element") -> None:
+		if not child.text:
+			log.warning(f"rref with no text: {self.tostring(child)}")
+			return
+
+	def _write_def(self, hf: "T_htmlfile", elem: "Element") -> None:
+		has_nested_def = False
+		has_deftext = False
+		for child in elem.iterchildren():
+			if child.tag == "def":
+				has_nested_def = True
+			if child.tag == "deftext":
+				has_deftext = True
+
+		if elem.getparent().tag == "ar":  # this is a root <def>
+			if has_nested_def:
+				with hf.element("ol"):
+					self.writeChildrenOf(hf, elem)
+			else:
+				with hf.element("div"):
+					self.writeChildrenOf(hf, elem)
+		elif has_deftext:
+			with hf.element("li"):
+				self.writeChildrenOf(hf, elem)
+		elif has_nested_def:
+			with hf.element("li"):
+				with hf.element("ol"):
+					self.writeChildrenOf(hf, elem)
+		else:
+			with hf.element("li"):
+				self.writeChildrenOf(hf, elem)
+
+	def _write_deftext(self, hf: "T_htmlfile", child: "Element") -> None:
+		with hf.element("span", attrib={"class": child.tag}):
+			self.writeChildrenOf(hf, child, stringSep=" ", sep=" ")
+
+	def _write_span(self, hf: "T_htmlfile", child: "Element") -> None:
+		with hf.element("span"):
+			self.writeChildrenOf(hf, child)
+
+	def _write_gr(self, hf: "T_htmlfile", child: "Element") -> None:
+		with hf.element("div", attrib={"class": child.tag}):
+			self.writeChildrenOf(hf, child)
+
+	def _write_categ(self, hf: "T_htmlfile", child: "Element") -> None:
+		with hf.element("span", style="background-color: green;"):
+			self.writeChildrenOf(hf, child, stringSep=" ")
+
+	def _write_opt(self, hf: "T_htmlfile", child: "Element") -> None:  # noqa: PLR6301
+		if child.text:
+			hf.write(" (")
+			hf.write(child.text)
+			hf.write(")")
+
+	def _write_img(self, hf: "T_htmlfile", child: "Element") -> None:  # noqa: PLR6301
+		with hf.element("img", attrib=dict(child.attrib)):
+			pass
+
+	def _write_etm(self, hf: "T_htmlfile", child: "Element") -> None:  # noqa: PLR6301
+		# Etymology (history and origin)
+		# TODO: formatting?
+		hf.write(f"{child.text}")
+
+	def writeChildElem(  # noqa: PLR0913
+		self,
+		hf: "T_htmlfile",
+		child: "Element",
+		parent: "Element",  # noqa: ARG002
+		prev: "None | str | Element",
+		stringSep: "str | None" = None,  # noqa: ARG002
+	) -> None:
+		func = self._childTagWriteMapping.get(child.tag, None)
+		if func is not None:
+			func(hf, child)
+			return
+
+		if child.tag == "ex_transl" and prev is not None:
+			if isinstance(prev, str):
+				pass
+			elif prev.tag == "ex_orig":
+				if child.text != prev.text:
+					with hf.element("i"):
+						self.writeChildrenOf(hf, child)
+				return
+
+		log.warning(f"unknown tag {child.tag}")
+		self.writeChildrenOf(hf, child)
+
+	def writeChild(  # noqa: PLR0913
+		self,
+		hf: "T_htmlfile",
+		child: "str | Element",
+		parent: "Element",
+		prev: "None | str | Element",
+		stringSep: "str | None" = None,
+	) -> None:
+		if isinstance(child, str):
+			self.writeString(hf, child, parent, prev, stringSep=stringSep)
+		else:
+			self.writeChildElem(
+				hf=hf,
+				child=child,
+				parent=parent,
+				prev=prev,
+				stringSep=stringSep,
+			)
+
+	def shouldAddSep(  # noqa: PLR6301
+		self,
+		child: "str | Element",
+		prev: "str | Element",
+	) -> bool:
+		if isinstance(child, str):
+			return not (len(child) > 0 and child[0] in ".,;)")
+
+		if child.tag in {"sub", "sup"}:
+			return False
+
+		if isinstance(prev, str):
+			pass
+		elif prev.tag in {"sub", "sup"}:
+			return False
+
+		return True
+
+	def writeChildrenOf(
+		self,
+		hf: "T_htmlfile",
+		elem: "Element",
+		sep: "str | None" = None,
+		stringSep: "str | None" = None,
+	) -> None:
+		prev = None
+		for child in elem.xpath("child::node()"):
+			if sep and prev is not None and self.shouldAddSep(child, prev):
+				hf.write(sep)
+			self.writeChild(hf, child, elem, prev, stringSep=stringSep)
+			prev = child
+
+	@staticmethod
+	def stringify_children(elem: "Element") -> str:
+		from itertools import chain
+
+		from lxml.etree import tostring
+		children = [chunk for chunk in chain(
+				(elem.text,),
+				chain(*((tostring(child, with_tail=False), child.tail)
+						for child in elem.getchildren())),
+				(elem.tail,)) if chunk]
+		normalized_children = ""
+		for chunk in children:
+			if isinstance(chunk, str):
+				normalized_children += chunk
+			if isinstance(chunk, bytes):
+				normalized_children += chunk.decode(encoding="utf-8")
+		return normalized_children
+
+	def transform(self, article: "Element") -> str:
+		from lxml import etree as ET
+
+		# encoding = self._encoding
+		f = BytesIO()
+		with ET.htmlfile(f, encoding="utf-8") as hf:
+			with hf.element("div", attrib={"class": "article"}):
+				self.writeChildrenOf(cast("T_htmlfile", hf), article)
+
+		text = f.getvalue().decode("utf-8")
+		text = text.replace("<br>", "<br/>")  # for compatibility
+		return text  # noqa: RET504
+
+	def transformByInnerString(self, articleInnerStr: str) -> str:
+		from lxml import etree as ET
+
+		return self.transform(
+			ET.fromstring(f"<ar>{articleInnerStr}</ar>"),
+		)
diff --git a/pyglossary/xdxf/xdxf.css b/pyglossary/xdxf/xdxf.css
new file mode 100644
index 000000000..0ee23a41d
--- /dev/null
+++ b/pyglossary/xdxf/xdxf.css
@@ -0,0 +1,70 @@
+div.k {
+    font-weight: 700;
+    font-size: 150%;
+}
+
+span.k {
+    font-size: 100%;
+}
+
+.gr {
+    color: green;
+}
+
+ol {
+    list-style-type: decimal;
+    padding-left: 20px;
+}
+
+ol > li > ol > li > ol {
+    list-style-type: lower-latin;
+}
+
+.ex {
+    margin: 0px 0px 0px 20px;
+    color: #888888;
+}
+
+.ex i {
+    color: red;
+}
+
+.ex_orig {
+    font-weight: 700;
+}
+
+.ex .mrkd {
+    text-decoration: underline;
+}
+
+.co {
+    color: #888888;
+    font-style: italic;
+}
+
+.abbr {
+    color: green;
+    font-style: italic;
+    text-decoration: underline;
+    text-decoration-style: dotted;
+}
+
+.pos {
+    color: red;
+    font-style: italic;
+}
+
+.abbr_popup {
+  background: #feffca;
+  border: 1px solid rgba(0,0,0,.15);
+  border-radius: 2px;
+  box-shadow: 2px 2px 3px rgba(0,0,0,.1),0 2px 0 rgba(255,255,255,.4) inset,0 -2px 0 rgba(242,85,0,1) inset;
+  cursor: pointer;
+  display: none;
+  font-size: 100%;
+  font-style: normal;
+  padding: .05em .6em .2em;
+  position: absolute;
+  z-index: 999;
+  margin-bottom: 100px;
+}
\ No newline at end of file
diff --git a/pyglossary/xdxf/xdxf.js b/pyglossary/xdxf/xdxf.js
new file mode 100644
index 000000000..83257309e
--- /dev/null
+++ b/pyglossary/xdxf/xdxf.js
@@ -0,0 +1,56 @@
+
+prepare_tooltips()
+
+// iterate over all tags that can show tooltip
+function prepare_tooltips() {
+    var pos_elems = document.querySelectorAll(".pos");
+    var abbr_elems = document.querySelectorAll(".abbr");
+    iterate_over_abbr_elems(pos_elems)
+    iterate_over_abbr_elems(abbr_elems)
+}
+
+function iterate_over_abbr_elems(elems) {
+    for (var i = 0; i < elems.length; i++) {
+        var elem = elems[i];
+        if (abbr_map.has(elem.textContent)) {
+            elem.classList.add("abbr");
+            elem.classList.remove("pos");
+            elem.addEventListener("mouseover", show_popup);
+            elem.addEventListener("mouseout", hide_popup);
+        } else {
+            elem.classList.add("pos");
+            elem.classList.remove("abbr");
+        }
+    }
+}
+
+function show_popup(event) {
+    var pos_elem = event.target
+    var pos_text = pos_elem.textContent
+    var s = document.createElement("small");
+    s.classList.add("abbr_popup");
+    s.innerHTML = abbr_map.get(pos_text)
+    pos_elem.parentNode.insertBefore(s, pos_elem.nextSibling);
+
+    if (s.offsetWidth > 200) {
+        if ((pos_elem.offsetLeft + 200) > document.body.offsetWidth) {
+            s.style.left = pos_elem.offsetLeft - ((pos_elem.offsetLeft + 200) - document.body.offsetWidth) + 'px';
+        } else {
+            s.style.left = pos_elem.offsetLeft + 'px';
+        }
+    } else {
+        if ((pos_elem.offsetLeft + s.offsetWidth) > document.body.offsetWidth) {
+            s.style.left = pos_elem.offsetLeft - ((pos_elem.offsetLeft + s.offsetWidth) - document.body.offsetWidth) + 'px';
+        } else {
+            s.style.left = pos_elem.offsetLeft + 'px';
+        }
+    }
+    s.style.display = 'block';
+}
+
+function hide_popup(event) {
+    var popups = document.getElementsByClassName('abbr_popup');
+    for (var i = 0; i < popups.length; ++i) {
+        popups[i].remove();
+    }
+}