From 6c533ee22d4edced7466e50ae3f3005d73d763d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Wang?= Date: Thu, 27 Jan 2022 09:56:42 +0100 Subject: [PATCH] Add a check for https://github.com/w3c/mathml-core/issues/104#issuecomment-1022983991 --- tables/operator-dictionary.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tables/operator-dictionary.py b/tables/operator-dictionary.py index 3b1dbcb..873953c 100755 --- a/tables/operator-dictionary.py +++ b/tables/operator-dictionary.py @@ -4,7 +4,7 @@ from download import downloadUnicodeXML from math import ceil from inline_axis_operators import stretchAxis, inlineAxisOperators - +from bisect import bisect_left import operator import json @@ -489,7 +489,24 @@ def serializeValue(value, fence, separator): print("done."); ################################################################################ -# Delete infix operators using default values. +# Delete infix operators using default values. But before doing that, check +# (for single char entries) whether they actually don't exist in another +# category. Otherwise, such a category will be used when no explicit form is +# specified, which will override the default values. +# https://w3c.github.io/mathml-core/#ref-for-dfn-algorithm-for-determining-the-properties-of-an-embellished-operator-1 + +for entry in knownTables["infixEntriesWithDefaultValues"]["singleChar"]: + otherCategories = [] + for name in knownTables: + if name.startswith("infix") or name == "fence": + continue + i = bisect_left(knownTables[name]["singleChar"], entry) + if (i != len(knownTables[name]["singleChar"]) and + knownTables[name]["singleChar"][i] == entry): + otherCategories.append(name) + assert len(otherCategories) == 0, ( + "U+%04X is in infixEntriesWithDefaultValues but also in %s" % + (entry, str(otherCategories))) del knownTables["infixEntriesWithDefaultValues"] # Convert nonBMP characters to surrogates pair (multiChar)