-
Notifications
You must be signed in to change notification settings - Fork 1
/
CorpusLoader.py
365 lines (316 loc) · 10 KB
/
CorpusLoader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
#!/usr/bin/env python3
# -*- coding: utf-8, vim: expandtab:ts=4 -*-
"""
* A CorpusLoader is responsible for loading and managing corpora. A corpus is implemented as a list of NLPInstance
* objects. Each CorpusLoader maintains a list of such corpora, which can be extended by loading corpora from files. The
* loader displays the corpora and allows the user to select one such corpus. The selected corpus will then be used by
* other components (such as the {@link com.googlecode.whatswrong.CorpusNavigator} to pick and render NLPInstance
* objects from.
* <p/>
* <p>A CorpusLoader sends out messages to {@link com.googlecode.whatswrong.CorpusLoader.Listener} objects whenever a
* new corpus is added, removed or selected.
* <p/>
* <p>The CorpusLoader loads files using {@link com.googlecode.whatswrong.io.CorpusFormat} objects. Each such object
* provides an swing panel that will be used in the file dialog to configure how the particular format needs to be
* loaded.
*
* @author Sebastian Riedel
"""
class CorpusLoader:
"""
* The current selected corpus.
"""
@property
def selected(self):
if self._selected is None:
return None
else:
return tuple(self.selected)
@selected.setter
def selected(self, value):
self._selected = value
"""
* The set of all loaded corpora.
"""
@property
def corpora(self):
return self._corpora
@corpora.setter
def corpora(self, value):
self._corpora = value
"""
* The file names the corpora came from, stored in a list model.
"""
@property
def fileNames(self):
return self._fileNames
@fileNames.setter
def fileNames(self, value):
self._fileNames = value
"""
* A mapping from names to CorpusFormat objects that will load corpora when the user chooses the corresponding name.
"""
@property
def formats(self):
return self._formats
@formats.setter
def formats(self, value):
self._formats = value
"""
* The list of listeners of this loader.
"""
@property
def changeListeners(self):
return self._changeListeners
@changeListeners.setter
def changeListeners(self, value):
self._changeListeners = value
"""
* The button that removes the selected corpus.
"""
# XXX private JButton remove;
"""
* The file chooser dialog.
"""
@property
def fileChooser(self):
return self._fileChooser
@fileChooser.setter
def fileChooser(self, value):
self._fileChooser = value
"""
* The id of this loader (used when loading properties from the user configuration file).
"""
@property
def id(self):
return self._id
@id.setter
def id(self, value):
self._id = value
"""
* The file dialog accessory to define the range of instances.
"""
@property
def accessory(self):
return self._accessory
@accessory.setter
def accessory(self, value):
self._accessory = value
"""
* A CorpusLoader.Listener listens to events of this loader.
"""
class Listener:
"""
* Called when a new corpus is added.
*
* @param corpus the corpus that was added.
* @param src the loader which added the corpus.
"""
def corpusAdded(self, corpus, src):
pass
"""
* Called when a corpus is removed.
*
* @param corpus the corpus which was removed.
* @param src the loader which removed the corpus.
"""
def corpusRemoved(self, corpus, src):
pass
"""
* Called when a corpus is selected.
*
* @param corpus the selected corpus.
* @param src the loader which selected the corpus.
"""
def corpusSelected(self, corpus, src):
pass
"""
* Adds a listener to this loader.
*
* @param changeListener the listener to add.
"""
def addChangeListener(self, changelistener):
self._changeListeners.append(changelistener)
"""
* Notifies all listeners that a corpus was added.
*
* @param corpus the added corpus.
"""
def fireAdded(self, corpus):
for listener in self._changeListeners:
listener.corpusAdded(corpus, self)
"""
* Notifies all listeners that a corpus was removed.
*
* @param corpus the removed corpus.
"""
def fireRemoved(self, corpus):
for listener in self._changeListeners:
listener.corpusRemoved(corpus, self)
"""
* Notifies all listeners that a corpus was selected.
*
* @param corpus the selected corpus.
"""
def fireSelected(self, corpus):
for listener in self._changeListeners:
listener.corpusSelected(corpus, self)
"""
* Returns the currently selected corpus or null if no corpus is selected.
*
* @return the currently selected corpus or null if no corpus is selected.
"""
# See getter above...
"""
* The LoadAccessory contains fields to define the first and last instance, allows us to select the format to load
and displays an internal format-specific accessory.
"""
class LoadAccessory:
"""
* The combo box to pick the format from.
"""
@property
def filetypeComboBox(self):
return self._filetypeComboBox
@filetypeComboBox.setter
def filetypeComboBox(self, value):
self._filetypeComboBox = value
"""
* The spinner to pick the first instance.
"""
@property
def start(self):
return self._start
@start.setter
def start(self, value):
self._start = value
"""
* The spinner to pick the last instance.
"""
@property
def end(self):
return self._end
@end.setter
def end(self, value):
self._end = value
"""
* The accessories of each format are stored in a card layout of this panel.
"""
@property
def accessoryCards(self):
return self._accessoryCards
@accessoryCards.setter
def accessoryCards(self, value):
self._accessoryCards = value
"""
* Creates a new LoadAccessory.
"""
def __init__(self):
self._filetypeComboBox = None
self._start = None
self._end = None
self._accessoryCards = None
# TODO: QtDesigner
pass
"""
* Gets the currently chosen format.
*
* @return the currently chosen CorpusFormat.
"""
def getFormat(self):
pass
# self._filetypeCombobox.getSelectedItem()
"""
* Gets the index of the first instance.
*
* @return the index of the first instance.
"""
# def getStart(self):
# return self._start.getModel().getValue()
"""
* Gets the index of the last instance.
*
* @return the index of the last instance.
"""
def getEnd(self):
pass
# return self._end.getModel().getValue();
"""
* Adds a CorpusFormat.
*
* @param format the format to add.
"""
def addFormat(self, format):
self._formats[format.getName()] = format
"""
* Sets the directory to use in the file dialog.
*
* @param dir the directory of the file dialog.
"""
def setDirectory(self, dir):
pass
"""
* gets the directory to use in the file dialog.
*
* @return the directory of the file dialog.
"""
def getDirectory(self):
pass
"""
* Loads the properties of this loader from the properties object.
*
* @param properties the properties to load this loader's properties from.
"""
def loadProperties(self, properties):
self.setDirectory(properties.getProperty(self.property("dir"), ""))
# formatString = properties.getProperty(self.property("format"), "TAB-separated")
# if formatString == "CoNLL":
# formatString = "TAB-separated"
# accessory.filetypeComboBox.setSelectedItem(formats.get(formatString))
for format in self._formats.values():
format.loadProperties(properties, self._id)
"""
* Returns a qualified version of the given name to be used as keys in {@link Properties} objects.
*
* @param name the name to qualify.
* @return a name qualified using the id of this loader.
"""
def property(self, name):
return self._id + "." + name
"""
* Saves the properties of this loader to the given Properties object.
*
* @param properties the Properties object to store this loader's properties to.
"""
def saveProperties(self, properties):
properties.setProperty(self.property("dir"), self.getDirectory())
# properties.setProperty(self.property("format"), accessory.filetypeCombobox,getSelectedItem().toString())
for format in self.formats.values():
format.saveProperties(properties, self.id)
"""
* Creates a new CorpusLoader with the given title. The title is used to derive an id from.
*
* @param title the title of this CorpusLoader.
"""
def __init__(self, title):
self._selected = []
self._corpora = []
self._fileNames = []
self._formats = {}
self._changeListeners = []
self._fileChooser = None
self._id = ""
self._accessory = None
self.id = title.replaceAll(" ", "_").toLowerCase()
# self.setLayout(GridLayout)
# self.setBorder()
# //setBorder(new TitledBorder(new EtchedBorder(), title));
# GridBagConstraints c = new GridBagConstraints();
# setUpFormats();
self._corpora = []
c = {}
c.gridx = 0
c.gridy = 0
c.gridwidth = 2
# self.add
# XXX CORPUS LOADER NOT IMPLEMENTED!