forked from rictic/sutsis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parseXML.html
150 lines (135 loc) · 4.13 KB
/
parseXML.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
<html>
<head>
<title></title>
</head>
<body>
<!-- How to use:
1. Download the updated XML database dump to data/lojban-export.xml
2. Run `bower install ./`
3. Start the app engine development server: dev_appserver.py ./
4. Open http://localhost:8080/parseXML.html in your browser
5. Copy the output to data/parsed.js
-->
<script src='/components/jquery/dist/jquery.js'></script>
<script>
literals = {};
documentStore = {};
</script>
<script src='lujvo.js'></script>
<script>
function getDictionaryXML(callback) {
var xhr = new XMLHttpRequest();
xhr.open('GET', '/data/lojban-export.xml');
xhr.onloadend = function() {
callback(xhr.responseXML);
}
xhr.send();
}
function addLiteral(english, lojban) {
if (!(english in literals)) {
literals[english] = [];
}
literals[english].push(lojban);
}
function checkDocumentStore() {
for (var key in documentStore) {
var doc = documentStore[key];
if (!doc.rafsi) {
throw new Error("no rafsi for " + key);
}
}
}
getDictionaryXML(function(doc) {
if (doc == null) {return;}
var dictionary = doc.childNodes[0];
var eToL = dictionary.childNodes[2];
var words = eToL.childNodes;
for (var i = 1; i < words.length; i++) {
var nlword = $(eToL.childNodes[i]);
var terms = [nlword.attr('word')];
while (terms.length > 0) {
var term = terms.pop();
if (term == null) {
continue;
}
addLiteral(term, nlword.attr('valsi'))
}
}
var lToE = dictionary.childNodes[1];
var words = lToE.childNodes;
for (var i = 1; i < words.length; i++) {
var valsi = $(words[i]);
var term = valsi.attr("word")
if (!term) {
continue;
}
var type = valsi.attr("type")
var definition = valsi.find('definition').text();
var notes = valsi.find('notes').text();
var rafsi = [].slice.call(
valsi.find('rafsi').map(function(_, r){
return $(r).text();
})
);
if (type === "gismu" || type === "experimental gismu") {
rafsi.push(term);
rafsi.push(term.substring(0, term.length - 1));
}
documentStore[term] = {
word: term,
type: type,
definition: definition,
rafsi: rafsi,
notes: notes
};
}
checkDocumentStore()
onUpdateDocumentStore();
checkDocumentStore();
var knownUnparsable = {
"boirlokra": true, // no rafsi called lokra found
"zdima'aterte'e": true, // no rafsi te'e found, just a cmene
// Based on jax, a proposed rafsi of jai
"jaxnalvai": true,
"jaxpu'a": true,
"samjavyfonxypliduskemjuglerci'analka'ebi'ojaxyja'e": true,
"sudjaxri'a": true,
// Based on vob, a proposed rafsi for vo'a
"vobmipri": true,
// Based on rafsi of experimental gismu:
"ditka'e": true,
"duvma'i": true,
"felma'i": true,
"gamyma'i": true,
"kibdu'a": true,
"kibmalmri": true,
"kibyca'o": true,
"kibycpa": true,
"kibykarni": true,
"kibypapri": true,
"refkibycpa": true,
"sarvanxu'u": true,
// No clue, vlasisku is also confused:
"ku'urcedru": true,
"ku'urtaksu": true
};
for (var key in documentStore) {
var doc = documentStore[key];
if (doc.type === 'lujvo') {
var decompositions = parseLujvo(doc.word);
if (decompositions.length !== 1) {
if (!(doc.word in knownUnparsable)) {
console.error("parsed " + doc.word + " wrong: ", decompositions);
}
} else {
doc.rafsi = decompositions[0];
}
}
}
checkDocumentStore();
document.write(('var documentStore = ' + JSON.stringify(documentStore)) + ';');
document.write(('var literals = ' + JSON.stringify(literals)) + ';');
});
</script>
</body>
</html>