This repository has been archived by the owner on May 4, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test-stanford-output.txt
633 lines (591 loc) · 96.6 KB
/
test-stanford-output.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
Test dataset: Domain1Test, Model name: Domain1Train
Results on 470 sentences and 12382 words, of which 1359 were unknown.
Unknown words right: 1073 (78.9551%); wrong: 286 (21.0449%).
Total tags right: 11686 (94.3789%); wrong: 696 (5.6211%).
Total sentences right: 152 (32.3404%); wrong: 318 (67.6596%).
==========================OOV Sampling========================
Sampling: randomly selected 10 sentences that contain OOV words.
Format: [(word, (gold_tag, test_tag)), (word, (gold_tag, test_tag))]
1: [('He', ('PRP', 'PRP')), ('opens', ('VBZ', 'VBZ')), ('his', ('PRP$', 'PRP$')), ('discourse', ('NN', 'NN')), (',', (',', ',')), ('however', ('RB', 'RB')), (',', (',', ',')), ('with', ('IN', 'IN')), ('a', ('DT', 'DT')), ('review', ('NN', 'NN')), ('of', ('IN', 'IN')), ('the', ('DT', 'DT')), ('Eisenhower', ('NNP', 'NNP')), ('inaugural', ('NN', 'NN')), ('festivities', ('NNS', 'NNS')), ('at', ('IN', 'IN')), ('which', ('WDT', 'WDT')), ('a', ('DT', 'DT')), ('sympathetic', ('JJ', 'JJ')), ('press', ('NN', 'VB')), ('had', ('VBD', 'VBD')), ('assembled', ('VBN', 'VBN')), ('its', ('PRP$', 'PRP$')), ('massive', ('JJ', 'JJ')), ('talents', ('NNS', 'NNS')), (',', (',', ',')), ('all', ('DT', 'DT')), ('primed', ('VBN', 'VBD')), ('to', ('TO', 'TO')), ('catch', ('VB', 'VB')), ('some', ('DT', 'DT')), ('revelation', ('NN', 'NN')), ('of', ('IN', 'IN')), ('the', ('DT', 'DT')), ('emerging', ('VBG', 'VBG')), ('new', ('JJ', 'JJ')), ('age', ('NN', 'NN')), ('.', ('.', '.'))]
2: [('Has', ('VBZ', 'VBZ')), ('not', ('RB', 'RB')), ('that', ('DT', 'DT')), ('way', ('NN', 'NN')), ('been', ('VBN', 'VBN')), ('lit', ('VBN', 'JJ')), ('always', ('RB', 'RB')), ('by', ('IN', 'IN')), ('the', ('DT', 'DT')), ('lamp', ('NN', 'NN')), ('of', ('IN', 'IN')), ('liberalism', ('NN', 'NN')), ('up', ('IN', 'IN')), ('until', ('IN', 'IN')), ('the', ('DT', 'DT')), ('turning', ('NN', 'VBG')), ('back', ('NN', 'RB')), ('under', ('IN', 'IN')), ('Eisenhower', ('NNP', 'NNP')), ('?', ('.', '.')), ('?', ('.', '.'))]
3: [('June', ('NNP', 'NN')), (',', (',', ',')), ('1940', ('CD', 'CD')), (',', (',', ',')), ('Sergeant', ('NNP', 'NNP')), ('Helion', ('NNP', 'NNP')), (',', (',', ',')), ('with', ('IN', 'IN')), ('a', ('DT', 'DT')), ('company', ('NN', 'NN')), ('of', ('IN', 'IN')), ('reserve', ('NN', 'NN')), ('troops', ('NNS', 'NNS')), ('waiting', ('VBG', 'VBG')), ('to', ('TO', 'TO')), ('go', ('VB', 'VB')), ('into', ('IN', 'IN')), ('battle', ('NN', 'NN')), (',', (',', ',')), ('was', ('VBD', 'VBD')), ('sketching', ('VBG', 'VBG')), ('the', ('DT', 'DT')), ('hills', ('NNS', 'NNS')), ('south', ('RB', 'NN')), ('of', ('IN', 'IN')), ('the', ('DT', 'DT')), ('Loire', ('NNP', 'NNP')), ('River', ('NNP', 'NNP')), (',', (',', ',')), ('when', ('WRB', 'WRB')), ('the', ('DT', 'DT')), ('war', ('NN', 'NN')), ('suddenly', ('RB', 'RB')), ('rolled', ('VBD', 'VBN')), ('in', ('IN', 'IN')), ('upon', ('IN', 'IN')), ('him', ('PRP', 'PRP')), ('.', ('.', '.'))]
4: [('Anyone', ('NN', 'NN')), ('who', ('WP', 'WP')), ('tried', ('VBD', 'VBD')), ('to', ('TO', 'TO')), ('remedy', ('VB', 'NN')), ('some', ('DT', 'DT')), ('of', ('IN', 'IN')), ('the', ('DT', 'DT')), ('most', ('JJS', 'RBS')), ('glaring', ('JJ', 'VBG')), ('defects', ('NNS', 'NNS')), ('in', ('IN', 'IN')), ('our', ('PRP$', 'PRP$')), ('form', ('NN', 'NN')), ('of', ('IN', 'IN')), ('democracy', ('NN', 'NN')), ('was', ('VBD', 'VBD')), ('denounced', ('VBN', 'VBN')), ('as', ('IN', 'IN')), ('a', ('DT', 'DT')), ('traitorous', ('JJ', 'JJ')), ('red', ('NN', 'JJ')), ('whose', ('WP$', 'WP$')), ('real', ('JJ', 'JJ')), ('purpose', ('NN', 'NN')), ('was', ('VBD', 'VBD')), ('the', ('DT', 'DT')), ('destruction', ('NN', 'NN')), ('of', ('IN', 'IN')), ('our', ('PRP$', 'PRP$')), ('government', ('NN', 'NN')), ('.', ('.', '.'))]
5: [('A', ('DT', 'DT')), ('successful', ('JJ', 'JJ')), ('businessman', ('NN', 'NN')), ('recently', ('RB', 'RB')), ('prefaced', ('VBD', 'VBN')), ('his', ('PRP$', 'PRP$')), ('address', ('NN', 'NN')), ('to', ('TO', 'TO')), ('a', ('DT', 'DT')), ('luncheon', ('NN', 'NN')), ('group', ('NN', 'NN')), ('with', ('IN', 'IN')), ('the', ('DT', 'DT')), ('statement', ('NN', 'NN')), ('that', ('IN', 'IN')), ('all', ('DT', 'DT')), ('economists', ('NNS', 'NNS')), ('should', ('MD', 'MD')), ('be', ('VB', 'VB')), ('sent', ('VBN', 'VBN')), ('to', ('TO', 'TO')), ('the', ('DT', 'DT')), ('hospitals', ('NNS', 'NNS')), ('for', ('IN', 'IN')), ('the', ('DT', 'DT')), ('mentally', ('RB', 'RB')), ('deranged', ('VBN', 'VBN')), ('where', ('WRB', 'WRB')), ('they', ('PRP', 'PRP')), ('and', ('CC', 'CC')), ('their', ('PRP$', 'PRP$')), ('theories', ('NNS', 'NNS')), ('might', ('MD', 'MD')), ('rot', ('VB', 'VB')), ('together', ('RB', 'RB')), ('.', ('.', '.'))]
6: [('The', ('DT', 'DT')), ('discrepancy', ('NN', 'NN')), ('between', ('IN', 'IN')), ('what', ('WP', 'WP')), ('we', ('PRP', 'PRP')), ('commonly', ('RB', 'RB')), ('profess', ('VBP', 'JJ')), ('and', ('CC', 'CC')), ('what', ('WP', 'WP')), ('we', ('PRP', 'PRP')), ('practice', ('VBP', 'NN')), ('or', ('CC', 'CC')), ('tolerate', ('VBP', 'NN')), ('is', ('VBZ', 'VBZ')), ('great', ('JJ', 'JJ')), (',', (',', ',')), ('and', ('CC', 'CC')), ('it', ('PRP', 'PRP')), ('does', ('VBZ', 'VBZ')), ('not', ('RB', 'RB')), ('escape', ('VB', 'VB')), ('the', ('DT', 'DT')), ('notice', ('NN', 'NN')), ('of', ('IN', 'IN')), ('others', ('NNS', 'NNS')), ('.', ('.', '.'))]
7: [('the', ('DT', 'DT')), ('Stalag', ('NNP', 'NNP')), (',', (',', ',')), ('Helion', ('NNP', 'NNP')), ('came', ('VBD', 'VBD')), ('to', ('TO', 'TO')), ('know', ('VB', 'VB')), ('and', ('CC', 'CC')), ('love', ('VB', 'VB')), ('his', ('PRP$', 'PRP$')), ('comrades', ('NNS', 'NNS')), (',', (',', ',')), ('most', ('JJS', 'RBS')), ('of', ('IN', 'IN')), ('them', ('PRP', 'PRP')), ('plain', ('JJ', 'VBP')), ('folk', ('NN', 'NN')), (',', (',', ',')), ('who', ('WP', 'WP')), (',', (',', ',')), ('in', ('IN', 'IN')), ('their', ('PRP$', 'PRP$')), ('extremity', ('NN', 'NN')), (',', (',', ',')), ('showed', ('VBD', 'VBN')), ('true', ('JJ', 'JJ')), ('courage', ('NN', 'NN')), ('and', ('CC', 'CC')), ('ran', ('VBD', 'VBD')), ('great', ('JJ', 'JJ')), ('risks', ('NNS', 'NNS')), ('to', ('TO', 'TO')), ('help', ('VB', 'VB')), ('each', ('DT', 'DT')), ('other', ('JJ', 'JJ')), ('.', ('.', '.'))]
8: [('Even', ('RB', 'RB')), ('so', ('RB', 'RB')), ('apparently', ('RB', 'RB')), ('impartial', ('JJ', 'JJ')), ('a', ('DT', 'DT')), ('critic', ('NN', 'NN')), ('as', ('IN', 'IN')), ('W.H.', ('NNP', 'NNP')), ('Frohock', ('NNP', 'NNP')), ('has', ('VBZ', 'VBZ')), ('taken', ('VBN', 'VBN')), ('for', ('IN', 'IN')), ('granted', ('VBN', 'VBN')), ('that', ('IN', 'IN')), ('the', ('DT', 'DT')), ('book', ('NN', 'NN')), ('was', ('VBD', 'VBD')), ('originally', ('RB', 'RB')), ('intended', ('VBN', 'VBN')), ('as', ('IN', 'IN')), ('a', ('DT', 'DT')), ('piece', ('NN', 'NN')), ('of', ('IN', 'IN')), ('Loyalist', ('JJ', 'NNP')), ('propaganda', ('NN', 'NN')), (';', (':', ':')), (';', (':', ':')), ('and', ('CC', 'CC')), ('has', ('VBZ', 'VBZ')), ('then', ('RB', 'RB')), ('gone', ('VBN', 'VBN')), ('on', ('RB', 'IN')), ('to', ('TO', 'TO')), ('argue', ('VB', 'VB')), (',', (',', ',')), ('with', ('IN', 'IN')), ('unimpeachable', ('JJ', 'JJ')), ('consistency', ('NN', 'NN')), (',', (',', ',')), ('that', ('IN', 'IN')), ('all', ('DT', 'PDT')), ('the', ('DT', 'DT')), ('obviously', ('RB', 'RB')), ('non-propagandistic', ('JJ', 'JJ')), ('aspects', ('NNS', 'NNS')), ('of', ('IN', 'IN')), ('the', ('DT', 'DT')), ('book', ('NN', 'NN')), ('are', ('VBP', 'VBP')), ('simply', ('RB', 'RB')), ('inadvertent', ('JJ', 'JJ')), ('``', ('``', '``')), ('contradictions', ('NNS', 'NNS')), ("''", ("''", "''")), ('.', ('.', '.'))]
9: [('The', ('DT', 'DT')), ('darkening', ('VBG', 'NN')), ('world', ('NN', 'NN')), ('scene', ('NN', 'NN')), (',', (',', ',')), ('at', ('IN', 'IN')), ('the', ('DT', 'DT')), ('time', ('NN', 'NN')), ('of', ('IN', 'IN')), ('the', ('DT', 'DT')), ('Munich', ('NNP', 'NNP')), ('Pact', ('NNP', 'NNP')), (',', (',', ',')), ('continued', ('VBD', 'VBD')), ('to', ('TO', 'TO')), ('trouble', ('VB', 'NN')), ('his', ('PRP$', 'PRP$')), ('mind', ('NN', 'NN')), ('even', ('RB', 'RB')), ('in', ('IN', 'IN')), ('his', ('PRP$', 'PRP$')), ('remote', ('JJ', 'JJ')), ('Virginia', ('NNP', 'NNP')), ('studio', ('NN', 'NN')), ('.', ('.', '.'))]
10: [('Demagogues', ('NNS', 'NNS')), ('of', ('IN', 'IN')), ('this', ('DT', 'DT')), ('sort', ('NN', 'NN')), ('found', ('VBD', 'VBD')), ('communist', ('NN', 'JJ')), ('bogeys', ('NNS', 'NNS')), ('lurking', ('VBG', 'VBG')), ('behind', ('IN', 'IN')), ('any', ('DT', 'DT')), ('new', ('JJ', 'JJ')), ('idea', ('NN', 'NN')), ('that', ('WDT', 'WDT')), ('would', ('MD', 'MD')), ('run', ('VB', 'VB')), ('counter', ('RB', 'NN')), ('to', ('TO', 'TO')), ('stereotyped', ('JJ', 'JJ')), ('notions', ('NNS', 'NNS')), ('.', ('.', '.'))]
========================End of OOV Sampling======================
==========================Confusion Matrix========================
| N N |
| N I D J N N R |
| N N T J , S P B . |
----+----------------------------------------------------------------+
NN | <12.3%> . . 0.5% . 0.0% 0.1% 0.0% . |
IN | 0.0% <12.0%> 0.0% 0.0% . . 0.0% 0.2% . |
DT | . 0.0% <10.1%> 0.0% . . . . . |
JJ | 0.3% 0.0% . <6.8%> . . 0.1% 0.1% . |
, | . . . . <5.2%> . . . . |
NNS | 0.1% 0.0% . . . <4.7%> 0.0% . . |
NNP | 0.2% 0.0% 0.0% 0.4% . 0.0% <3.9%> 0.0% . |
RB | 0.1% 0.1% 0.0% 0.1% . . . <3.9%> . |
. | . . . . . . . . <4.2%>|
----+----------------------------------------------------------------+
(row = reference; col = test)
========================End of Confusion Matrix======================
+-------+--------------------+--------------------+---------------------+
| label | precision | recall | f-score |
+-------+--------------------+--------------------+---------------------+
| '' | 1.0 | 0.9473684210526315 | 0.972972972972973 |
| ( | 1.0 | 1.0 | 1.0 |
| ) | 1.0 | 1.0 | 1.0 |
| , | 1.0 | 1.0 | 1.0 |
| . | 1.0 | 1.0 | 1.0 |
| : | 1.0 | 0.9858156028368794 | 0.9928571428571429 |
| CC | 0.9901477832512315 | 0.995049504950495 | 0.9925925925925926 |
| CD | 0.8961038961038961 | 0.7752808988764045 | 0.8313253012048193 |
| DT | 0.9841897233201581 | 0.992822966507177 | 0.9884874950377135 |
| EX | 0.9523809523809523 | 1.0 | 0.975609756097561 |
| FW | 0.8571428571428571 | 0.8571428571428571 | 0.8571428571428571 |
| IN | 0.9770190413657256 | 0.9795918367346939 | 0.9783037475345169 |
| JJ | 0.8423153692614771 | 0.8978723404255319 | 0.8692070030895984 |
| JJR | 0.8666666666666667 | 0.52 | 0.65 |
| JJS | 0.9090909090909091 | 0.7407407407407407 | 0.8163265306122449 |
| MD | 0.9833333333333333 | 0.9725274725274725 | 0.9779005524861878 |
| NN | 0.9078478002378121 | 0.9356617647058824 | 0.9215449607724804 |
| NNP | 0.8970588235294118 | 0.8229342327150084 | 0.8583992963940194 |
| NNPS | 0.6 | 0.1111111111111111 | 0.18750000000000003 |
| NNS | 0.96843853820598 | 0.9557377049180328 | 0.962046204620462 |
| PDT | 0.75 | 0.3 | 0.4285714285714285 |
| POS | 0.8846153846153846 | 1.0 | 0.9387755102040816 |
| PRP | 0.9858585858585859 | 0.9959183673469387 | 0.9908629441624365 |
| PRP$ | 0.9957446808510638 | 1.0 | 0.9978678038379531 |
| RB | 0.9044943820224719 | 0.9235181644359465 | 0.9139072847682119 |
| RBR | 0.7037037037037037 | 0.95 | 0.8085106382978724 |
| RBS | 0.6666666666666666 | 1.0 | 0.8 |
| SYM | None | None | 0 |
| TO | 0.9936102236421726 | 1.0 | 0.9967948717948718 |
| VB | 0.9422110552763819 | 0.8802816901408451 | 0.9101941747572816 |
| VBD | 0.9217391304347826 | 0.9111747851002865 | 0.9164265129682997 |
| VBG | 0.9025974025974026 | 0.896774193548387 | 0.8996763754045307 |
| VBN | 0.8544303797468354 | 0.8517350157728707 | 0.8530805687203792 |
| VBP | 0.8641975308641975 | 0.8333333333333334 | 0.8484848484848484 |
| VBZ | 0.9637681159420289 | 0.9568345323741008 | 0.9602888086642598 |
| WDT | 0.9830508474576272 | 0.8787878787878788 | 0.9279999999999999 |
| WP | 1.0 | 1.0 | 1.0 |
| WP$ | 1.0 | 1.0 | 1.0 |
| WRB | 1.0 | 0.9459459459459459 | 0.9722222222222222 |
| `` | 1.0 | 1.0 | 1.0 |
+-------+--------------------+--------------------+---------------------+
Occurences in Predicted (tagged result)= [('NN', 1682), ('IN', 1523), ('DT', 1265), ('JJ', 1002), (',', 643), ('NNS', 602), ('NNP', 544), ('RB', 534), ('.', 514), ('PRP', 495), ('CC', 406), ('VB', 398), ('VBD', 345), ('VBN', 316), ('TO', 313), ('VBZ', 276), ('PRP$', 235), ('MD', 180), ('VBP', 162), ('VBG', 154), (':', 139), ('``', 90), ("''", 90), ('POS', 78), ('CD', 77), ('WP', 66), ('WDT', 59), ('WRB', 35), ('RBR', 27), ('JJS', 22), ('EX', 21), ('(', 18), ('RBS', 18), (')', 17), ('JJR', 15), ('FW', 7), ('NNPS', 5), ('PDT', 4), ('WP$', 4), ('SYM', 1)]
Occurences in Test (Gold standard) = [('NN', 1632), ('IN', 1519), ('DT', 1254), ('JJ', 940), (',', 643), ('NNS', 610), ('NNP', 593), ('RB', 523), ('.', 514), ('PRP', 490), ('VB', 426), ('CC', 404), ('VBD', 349), ('VBN', 317), ('TO', 311), ('VBZ', 278), ('PRP$', 234), ('MD', 182), ('VBP', 168), ('VBG', 155), (':', 141), ("''", 95), ('``', 90), ('CD', 89), ('POS', 69), ('WDT', 66), ('WP', 66), ('WRB', 37), ('NNPS', 27), ('JJS', 27), ('JJR', 25), ('RBR', 20), ('EX', 20), ('(', 18), (')', 17), ('RBS', 12), ('PDT', 10), ('FW', 7), ('WP$', 4)]
Dictionary of mislabelled tags = [(('JJ', 'NN'), 56), (('JJ', 'NNP'), 54), (('NN', 'JJ'), 39), (('NN', 'VB'), 30), (('VBN', 'VBD'), 27), (('NN', 'NNP'), 24), (('VBD', 'VBN'), 24), (('RB', 'IN'), 22), (('NNP', 'NNPS'), 21), (('JJ', 'VBN'), 20), (('VBN', 'JJ'), 18), (('RB', 'JJ'), 15), (('VBP', 'VB'), 15), (('NNP', 'NN'), 14), (('NN', 'VBG'), 13), (('NN', 'CD'), 12), (('NN', 'RB'), 12), (('JJ', 'RB'), 10), (('IN', 'RB'), 10), (('VB', 'VBP'), 10), (('VBG', 'JJ'), 10), (('NN', 'VBP'), 10), (('NNS', 'VBZ'), 9), (('VBZ', 'NNS'), 9), (('RBR', 'JJR'), 8), (('NNP', 'JJ'), 8), (('IN', 'WDT'), 8), (('NN', 'NNS'), 8), (('VB', 'NN'), 7), (('DT', 'PDT'), 6), (('NNP', 'NNS'), 6), (('RBS', 'JJS'), 6), (('IN', 'NNP'), 6), (('CD', 'NN'), 6), (('IN', 'DT'), 5), (('JJ', 'VBP'), 5), (('RB', 'NN'), 5), (('DT', 'RB'), 5), (('VBG', 'NN'), 5), (('JJ', 'VB'), 5), (('DT', 'NNP'), 5), (('POS', "''"), 5), (('DT', 'IN'), 4), (('NNS', 'NN'), 4), (('VBP', 'NN'), 4), (('NNS', 'NNPS'), 3), (('CC', 'NNP'), 3), (('POS', 'VBZ'), 3), (('PRP', 'CD'), 3), (('RB', 'JJR'), 3), (('RB', 'NNP'), 2), (('MD', 'NNP'), 2), (('NN', 'VBD'), 2), (('IN', 'VBP'), 2), (('TO', 'NNP'), 2), (('NNS', 'NNP'), 2), (('IN', 'NNS'), 2), (('NN', 'IN'), 2), (('NNP', 'CD'), 2), (('PRP', 'NN'), 2), (('NNPS', 'NNS'), 2), (('NNP', 'MD'), 2), (('JJ', 'VBG'), 2), (('RB', 'WRB'), 2), (('JJ', 'DT'), 2), (('CC', 'IN'), 1), (('NNS', 'CD'), 1), (('NN', 'VBN'), 1), (('VB', 'RB'), 1), (('POS', 'PRP'), 1), (('IN', 'MD'), 1), (('VBN', 'VB'), 1), (('JJS', 'NNP'), 1), (('VBD', 'JJ'), 1), (('NN', 'JJS'), 1), (('VB', 'JJ'), 1), (('RB', 'CC'), 1), (('NNP', ':'), 1), (('CD', ':'), 1), (('JJ', 'CD'), 1), (('JJ', 'IN'), 1), (('JJR', 'NN'), 1), (('VB', 'VBD'), 1), (('JJR', 'RBR'), 1), (('NNP', 'VBG'), 1), (('NNP', 'IN'), 1), (('VB', 'MD'), 1), (('PRP$', 'PRP'), 1), (('JJ', 'PDT'), 1), (('WDT', 'DT'), 1), (('VB', 'JJR'), 1), (('VBZ', 'RB'), 1), (('JJ', 'VBD'), 1), (('EX', 'RB'), 1), (('VBP', 'MD'), 1), (('VBP', 'JJ'), 1), (('RB', 'CD'), 1), (('IN', 'JJ'), 1), (('MD', 'NN'), 1), (('PRP', 'NNP'), 1), (('SYM', 'CC'), 1), (('FW', 'NNP'), 1), (('VB', 'VBN'), 1), (('NN', 'FW'), 1), (('VBD', 'VBP'), 1), (('JJS', 'JJ'), 1), (('VBP', 'VBN'), 1), (('PRP', 'JJ'), 1), (('VBD', 'NNP'), 1), (('CD', 'NNP'), 1), (('PDT', 'DT'), 1)]
Test dataset: Domain1Test, Model name: Domain2Train
Results on 470 sentences and 12382 words, of which 1832 were unknown.
Unknown words right: 1300 (70.9607%); wrong: 532 (29.0393%).
Total tags right: 11422 (92.2468%); wrong: 960 (7.7532%).
Total sentences right: 97 (20.6383%); wrong: 373 (79.3617%).
==========================OOV Sampling========================
Sampling: randomly selected 10 sentences that contain OOV words.
Format: [(word, (gold_tag, test_tag)), (word, (gold_tag, test_tag))]
1: [('How', ('WRB', 'WRB')), ('would', ('MD', 'MD')), ('Thomas', ('NNP', 'NNP')), ('Jefferson', ('NNP', 'NNP')), ('feel', ('VB', 'NN')), ('after', ('IN', 'IN')), ('reading', ('VBG', 'VBG')), ('Factories', ('NNPS', 'NNS')), ('In', ('NNP', 'IN')), ('The', ('NNP', 'DT')), ('Field', ('NNP', 'NNP')), ('?', ('.', '.')), ('?', ('.', '.'))]
2: [('Attorney', ('NNP', 'NNP')), ('General', ('NNP', 'NNP')), ('Palmer', ('NNP', 'NNP')), ('made', ('VBD', 'VBD')), ('a', ('DT', 'DT')), ('series', ('NN', 'NNS')), ('of', ('IN', 'IN')), ('raids', ('NNS', 'NNS')), ('that', ('WDT', 'WP')), ('sent', ('VBD', 'VBD')), ('more', ('JJR', 'JJR')), ('than', ('IN', 'IN')), ('4,000', ('CD', 'CD')), ('so-called', ('JJ', 'JJ')), ('radicals', ('NNS', 'NNS')), ('to', ('TO', 'TO')), ('the', ('DT', 'DT')), ('jails', ('NNS', 'NNS')), (',', (',', ',')), ('in', ('IN', 'IN')), ('direct', ('JJ', 'JJ')), ('violation', ('NN', 'NN')), ('of', ('IN', 'IN')), ('their', ('PRP$', 'PRP$')), ('constitutional', ('JJ', 'JJ')), ('rights', ('NNS', 'NNS')), ('.', ('.', '.'))]
3: [('Corporations', ('NNS', 'NNS')), ('react', ('VBP', 'JJ')), ('violently', ('RB', 'RB')), ('to', ('TO', 'TO')), ('short-range', ('JJ', 'VB')), ('stimuli', ('NNS', 'NN')), (',', (',', ',')), ('e.g.', ('FW', 'RB')), (',', (',', ',')), ('quarterly', ('JJ', 'RB')), ('and', ('CC', 'CC')), ('annual', ('JJ', 'JJ')), ('dividend', ('NN', 'NN')), ('reports', ('NNS', 'NNS')), ('.', ('.', '.'))]
4: [('What', ('WP', 'WP')), ('is', ('VBZ', 'VBZ')), ('more', ('JJR', 'RBR')), (',', (',', ',')), ('the', ('DT', 'DT')), ('legends', ('NNS', 'NNS')), ('have', ('VBP', 'VBP')), ('become', ('VBN', 'VBN')), ('so', ('RB', 'RB')), ('sacrosanct', ('JJ', 'JJ')), ('that', ('IN', 'IN')), ('the', ('DT', 'DT')), ('very', ('JJ', 'JJ')), ('habit', ('NN', 'NN')), ('of', ('IN', 'IN')), ('self-examination', ('NN', 'NN')), ('or', ('CC', 'CC')), ('self-criticism', ('NN', 'JJ')), ('smells', ('VBZ', 'NNS')), ('of', ('IN', 'IN')), ('low', ('JJ', 'JJ')), ('treason', ('NN', 'NN')), (',', (',', ',')), ('and', ('CC', 'CC')), ('men', ('NNS', 'NNS')), ('who', ('WP', 'WP')), ('practice', ('VBP', 'VBD')), ('it', ('PRP', 'PRP')), ('are', ('VBP', 'VBP')), ('defeatists', ('NNS', 'NNS')), ('and', ('CC', 'CC')), ('unpatriotic', ('JJ', 'JJ')), ('scoundrels', ('NNS', 'NNS')), ('.', ('.', '.'))]
5: [('Many', ('JJ', 'JJ')), ('Americans', ('NNPS', 'NNS')), ('reacted', ('VBD', 'VBD')), ('irrationally', ('RB', 'RB')), ('to', ('TO', 'TO')), ('the', ('DT', 'DT')), ('challenge', ('NN', 'NN')), ('of', ('IN', 'IN')), ('Russia', ('NNP', 'NNP')), ('and', ('CC', 'CC')), ('turned', ('VBD', 'VBD')), ('to', ('TO', 'TO')), ('the', ('DT', 'DT')), ('repression', ('NN', 'NN')), ('of', ('IN', 'IN')), ('ideas', ('NNS', 'NNS')), ('by', ('IN', 'IN')), ('force', ('NN', 'NN')), ('.', ('.', '.'))]
6: [('``', ('``', '``')), ('We', ('PRP', 'PRP')), ('were', ('VBD', 'VBD')), ('possessed', ('VBN', 'VBN')), ('by', ('IN', 'IN')), ('visions', ('NNS', 'NNS')), ('of', ('IN', 'IN')), ('a', ('DT', 'DT')), ('new', ('JJ', 'JJ')), ('civilization', ('NN', 'NN')), ('to', ('TO', 'TO')), ('come', ('VB', 'VB')), (',', (',', ',')), ('very', ('RB', 'RB')), ('pure', ('JJ', 'JJ')), ('and', ('CC', 'CC')), ('elevated', ('JJ', 'JJ')), ("''", ("''", "''")), (',', (',', ',')), ('he', ('PRP', 'PRP')), ('has', ('VBZ', 'VBZ')), ('said', ('VBD', 'VBD')), (',', (',', ',')), ('``', ('``', '``')), ('in', ('IN', 'IN')), ('fact', ('NN', 'NN')), ('some', ('DT', 'DT')), ('ideal', ('JJ', 'JJ')), ('form', ('NN', 'NN')), ('of', ('IN', 'IN')), ('socialism', ('NN', 'JJ')), ('such', ('JJ', 'JJ')), ('as', ('IN', 'IN')), ('we', ('PRP', 'PRP')), ('had', ('VBD', 'VBD')), ('dreamed', ('VBN', 'VBN')), ('of', ('IN', 'IN')), ('since', ('IN', 'IN')), ('the', ('DT', 'DT')), ('war', ('NN', 'NN')), ('of', ('IN', 'IN')), ('1914-1918', ('CD', 'NN')), ("''", ("''", "''")), ('.', ('.', '.'))]
7: [('recollection', ('NN', 'NN')), ('he', ('PRP', 'PRP')), ('has', ('VBZ', 'VBZ')), ('said', ('VBD', 'VBD')), (':', (':', ':')), ('``', ('``', '``')), ('Natural', ('JJ', 'NNP')), ('or', ('CC', 'CC')), ('man-made', ('JJ', 'JJ')), ('objects', ('NNS', 'NNS')), ('kept', ('VBD', 'VBD')), ('coming', ('VBG', 'VBG')), ('into', ('IN', 'IN')), ('my', ('PRP$', 'PRP$')), ('head', ('NN', 'NN')), (',', (',', ',')), ('but', ('CC', 'CC')), ('I', ('PRP', 'PRP')), ('would', ('MD', 'MD')), ('suppress', ('VB', 'VB')), ('them', ('PRP', 'PRP')), ('sternly', ('RB', 'RB')), ("''", ("''", "''")), ('.', ('.', '.'))]
8: [('But', ('CC', 'CC')), ('as', ('IN', 'IN')), ('he', ('PRP', 'PRP')), ('remarks', ('VBZ', 'VBZ')), ('in', ('IN', 'IN')), ('his', ('PRP$', 'PRP$')), ('preface', ('NN', 'NN')), ('to', ('TO', 'TO')), ('The', ('DT', 'DT')), ('Walnut', ('NNP', 'NNP')), ('Trees', ('NNP', 'NNP')), (',', (',', ',')), ('``', ('``', '``')), ('a', ('DT', 'DT')), ('novel', ('NN', 'NN')), ('can', ('MD', 'MD')), ('hardly', ('RB', 'RB')), ('ever', ('RB', 'RB')), ('be', ('VB', 'VB')), ('rewritten', ('VBN', 'VBN')), ("''", ("''", "''")), (',', (',', ',')), ('and', ('CC', 'CC')), ('``', ('``', '``')), ('when', ('WRB', 'WRB')), ('this', ('DT', 'DT')), ('one', ('CD', 'CD')), ('appears', ('VBZ', 'NNS')), ('in', ('IN', 'IN')), ('its', ('PRP$', 'PRP$')), ('final', ('JJ', 'JJ')), ('form', ('NN', 'NN')), (',', (',', ',')), ('the', ('DT', 'DT')), ('form', ('NN', 'NN')), ('of', ('IN', 'IN')), ('the', ('DT', 'DT')), ('first', ('JJ', 'JJ')), ('part', ('NN', 'NN')), ('will', ('MD', 'MD')), ('no', ('RB', 'DT')), ('doubt', ('NN', 'NN')), ('be', ('VB', 'VB')), ('radically', ('RB', 'RB')), ('changed', ('VBN', 'VBD')), ("''", ("''", "''")), ('.', ('.', '.'))]
9: [('Strikes', ('NNS', 'NNS')), ('should', ('MD', 'MD')), ('be', ('VB', 'VB')), ('declared', ('VBN', 'VBN')), ('illegal', ('JJ', 'JJ')), ('against', ('IN', 'IN')), ('corporations', ('NNS', 'NNS')), ('because', ('IN', 'IN')), ('disagreements', ('NNS', 'NNS')), ('would', ('MD', 'MD')), ('have', ('VB', 'VB')), ('to', ('TO', 'TO')), ('be', ('VB', 'VB')), ('settled', ('VBN', 'VBN')), ('by', ('IN', 'IN')), ('government', ('NN', 'JJ')), ('representatives', ('NNS', 'NNS')), ('acting', ('VBG', 'VBG')), ('as', ('IN', 'IN')), ('controllers', ('NNS', 'NNS')), ('of', ('IN', 'IN')), ('the', ('DT', 'DT')), ('corporation', ('NN', 'NN')), ('whose', ('WP$', 'WP$')), ('responsibility', ('NN', 'NN')), ('to', ('TO', 'TO')), ('the', ('DT', 'DT')), ('state', ('NN', 'NN')), ('would', ('MD', 'MD')), ('now', ('RB', 'RB')), ('be', ('VB', 'VB')), ('defined', ('VBN', 'VBN')), ('against', ('IN', 'IN')), ('proprietorship', ('NN', 'NN')), ('because', ('IN', 'IN')), ('employees', ('NNS', 'NNS')), ('and', ('CC', 'CC')), ('proprietors', ('NNS', 'NNS')), ('must', ('MD', 'MD')), ('be', ('VB', 'VB')), ('completely', ('RB', 'RB')), ('interdependent', ('JJ', 'JJ')), (',', (',', ',')), ('as', ('IN', 'IN')), ('they', ('PRP', 'PRP')), ('are', ('VBP', 'VBP')), ('each', ('DT', 'DT')), ('a', ('DT', 'DT')), ('part', ('NN', 'NN')), ('of', ('IN', 'IN')), ('the', ('DT', 'DT')), ('whole', ('NN', 'JJ')), ('.', ('.', '.'))]
10: [('This', ('DT', 'DT')), ('magnificent', ('JJ', 'JJ')), ('but', ('CC', 'CC')), ('greatly', ('RB', 'RB')), ('underestimated', ('VBN', 'VBN')), ('book', ('NN', 'NN')), (',', (',', ',')), ('which', ('WDT', 'WDT')), ('bodies', ('VBZ', 'NNS')), ('forth', ('RB', 'IN')), ('the', ('DT', 'DT')), ('very', ('JJ', 'JJ')), ('form', ('NN', 'NN')), ('and', ('CC', 'CC')), ('pressure', ('NN', 'NN')), ('of', ('IN', 'IN')), ('its', ('PRP$', 'PRP$')), ('time', ('NN', 'NN')), ('as', ('IN', 'IN')), ('no', ('DT', 'DT')), ('other', ('JJ', 'JJ')), ('comparable', ('JJ', 'JJ')), ('creation', ('NN', 'NN')), (',', (',', ',')), ('has', ('VBZ', 'VBZ')), ('suffered', ('VBN', 'VBN')), ('severely', ('RB', 'RB')), ('from', ('IN', 'IN')), ('having', ('VBG', 'VBG')), ('been', ('VBN', 'VBN')), ('written', ('VBN', 'VBN')), ('about', ('IN', 'IN')), ('an', ('DT', 'DT')), ('historical', ('JJ', 'JJ')), ('event', ('NN', 'NN')), ('--', (':', ':')), ('the', ('DT', 'DT')), ('Spanish', ('NNP', 'NNP')), ('Civil', ('NNP', 'NNP')), ('War', ('NNP', 'NNP')), ('--', (':', ':')), ('that', ('DT', 'DT')), ('is', ('VBZ', 'VBZ')), ('still', ('RB', 'RB')), ('capable', ('JJ', 'JJ')), ('of', ('IN', 'IN')), ('fanning', ('VBG', 'VBG')), ('the', ('DT', 'DT')), ('smoldering', ('VBG', 'NN')), ('fires', ('NNS', 'NNS')), ('of', ('IN', 'IN')), ('old', ('JJ', 'JJ')), ('political', ('JJ', 'JJ')), ('feuds', ('NNS', 'NNS')), ('.', ('.', '.'))]
========================End of OOV Sampling======================
==========================Confusion Matrix========================
| N N |
| N I D J N N R |
| N N T J , S P B . |
----+----------------------------------------------------------------+
NN | <11.9%> . . 0.7% . 0.1% 0.1% 0.1% . |
IN | 0.0% <11.8%> 0.0% 0.0% . . 0.1% 0.2% . |
DT | . 0.0% <10.1%> 0.0% . . . 0.0% . |
JJ | 0.8% 0.0% 0.1% <6.2%> . 0.0% 0.1% 0.1% . |
, | . . . . <5.2%> . . . . |
NNS | 0.1% 0.0% . 0.0% . <4.8%> 0.0% . . |
NNP | 0.2% 0.0% 0.0% 0.2% . 0.0% <4.1%> 0.0% . |
RB | 0.1% 0.1% 0.0% 0.1% . 0.0% 0.0% <3.8%> . |
. | . . . . . . . . <4.2%>|
----+----------------------------------------------------------------+
(row = reference; col = test)
========================End of Confusion Matrix======================
+-------+---------------------+----------------------+---------------------+
| label | precision | recall | f-score |
+-------+---------------------+----------------------+---------------------+
| $ | None | None | 0 |
| '' | 1.0 | 1.0 | 1.0 |
| ( | 1.0 | 1.0 | 1.0 |
| ) | 1.0 | 1.0 | 1.0 |
| , | 1.0 | 1.0 | 1.0 |
| . | 1.0 | 1.0 | 1.0 |
| : | 1.0 | 0.9858156028368794 | 0.9928571428571429 |
| CC | 0.9877149877149877 | 0.995049504950495 | 0.9913686806411838 |
| CD | 0.8507462686567164 | 0.6404494382022472 | 0.7307692307692307 |
| DT | 0.9726989079563183 | 0.9944178628389154 | 0.9834384858044164 |
| EX | 1.0 | 0.65 | 0.787878787878788 |
| FW | None | None | 0 |
| IN | 0.9746835443037974 | 0.9631336405529954 | 0.9688741721854305 |
| JJ | 0.8277356446370531 | 0.8127659574468085 | 0.8201825013419217 |
| JJR | 0.6086956521739131 | 0.56 | 0.5833333333333334 |
| JJS | 1.0 | 0.7777777777777778 | 0.8750000000000001 |
| MD | 0.994413407821229 | 0.978021978021978 | 0.9861495844875345 |
| NN | 0.8588921282798834 | 0.9025735294117647 | 0.8801912160143412 |
| NNP | 0.8848167539267016 | 0.8549747048903878 | 0.869639794168096 |
| NNPS | 0.16666666666666666 | 0.037037037037037035 | 0.06060606060606061 |
| NNS | 0.8843843843843844 | 0.9655737704918033 | 0.9231974921630094 |
| PDT | 0.5 | 0.1 | 0.16666666666666669 |
| POS | 0.971830985915493 | 1.0 | 0.9857142857142858 |
| PRP | 0.9959266802443992 | 0.9979591836734694 | 0.9969418960244649 |
| PRP$ | 0.9957446808510638 | 1.0 | 0.9978678038379531 |
| RB | 0.8589743589743589 | 0.8967495219885278 | 0.8774555659494854 |
| RBR | 0.7692307692307693 | 0.5 | 0.6060606060606061 |
| RBS | 0.6666666666666666 | 1.0 | 0.8 |
| RP | None | None | 0 |
| TO | 0.9936102236421726 | 1.0 | 0.9967948717948718 |
| VB | 0.828125 | 0.8708920187793427 | 0.8489702517162472 |
| VBD | 0.8230958230958231 | 0.9598853868194842 | 0.8862433862433863 |
| VBG | 0.8764705882352941 | 0.9612903225806452 | 0.9169230769230768 |
| VBN | 0.9204545454545454 | 0.7665615141955836 | 0.8364888123924268 |
| VBP | 0.9739130434782609 | 0.6666666666666666 | 0.7915194346289752 |
| VBZ | 0.9736842105263158 | 0.7985611510791367 | 0.8774703557312253 |
| WDT | 1.0 | 0.6666666666666666 | 0.8 |
| WP | 0.868421052631579 | 1.0 | 0.9295774647887324 |
| WP$ | 1.0 | 1.0 | 1.0 |
| WRB | 0.972972972972973 | 0.972972972972973 | 0.972972972972973 |
| `` | 1.0 | 1.0 | 1.0 |
+-------+---------------------+----------------------+---------------------+
Occurences in Predicted (tagged result)= [('NN', 1715), ('IN', 1501), ('DT', 1282), ('JJ', 923), ('NNS', 666), (',', 643), ('NNP', 573), ('RB', 546), ('.', 514), ('PRP', 491), ('VB', 448), ('VBD', 407), ('CC', 407), ('TO', 313), ('VBN', 264), ('PRP$', 235), ('VBZ', 228), ('MD', 179), ('VBG', 170), (':', 139), ('VBP', 115), ("''", 95), ('``', 90), ('WP', 76), ('POS', 71), ('CD', 67), ('WDT', 44), ('WRB', 37), ('JJR', 23), ('JJS', 21), ('(', 18), ('RBS', 18), (')', 17), ('RBR', 13), ('EX', 13), ('RP', 6), ('NNPS', 6), ('WP$', 4), ('PDT', 2), ('$', 2)]
Occurences in Test (Gold standard) = [('NN', 1632), ('IN', 1519), ('DT', 1254), ('JJ', 940), (',', 643), ('NNS', 610), ('NNP', 593), ('RB', 523), ('.', 514), ('PRP', 490), ('VB', 426), ('CC', 404), ('VBD', 349), ('VBN', 317), ('TO', 311), ('VBZ', 278), ('PRP$', 234), ('MD', 182), ('VBP', 168), ('VBG', 155), (':', 141), ("''", 95), ('``', 90), ('CD', 89), ('POS', 69), ('WDT', 66), ('WP', 66), ('WRB', 37), ('NNPS', 27), ('JJS', 27), ('JJR', 25), ('RBR', 20), ('EX', 20), ('(', 18), (')', 17), ('RBS', 12), ('PDT', 10), ('FW', 7), ('WP$', 4)]
Dictionary of mislabelled tags = [(('NN', 'JJ'), 100), (('JJ', 'NN'), 81), (('NNS', 'VBZ'), 52), (('VBD', 'VBN'), 52), (('NN', 'VB'), 39), (('VB', 'VBP'), 34), (('RB', 'IN'), 29), (('JJ', 'NNP'), 27), (('NN', 'NNP'), 27), (('NN', 'CD'), 25), (('VB', 'NN'), 21), (('NNP', 'NNPS'), 20), (('NN', 'RB'), 18), (('NNP', 'NN'), 15), (('RB', 'JJ'), 15), (('JJ', 'VBN'), 14), (('IN', 'WDT'), 13), (('VBG', 'JJ'), 13), (('JJ', 'RB'), 11), (('VB', 'JJ'), 11), (('DT', 'JJ'), 11), (('NNP', 'JJ'), 10), (('CD', 'NN'), 10), (('IN', 'RB'), 10), (('NNS', 'NN'), 10), (('NNP', 'IN'), 10), (('VBN', 'JJ'), 10), (('VBN', 'VBD'), 9), (('WP', 'WDT'), 9), (('RB', 'NN'), 9), (('DT', 'PDT'), 9), (('JJR', 'RBR'), 8), (('JJ', 'VB'), 8), (('VBG', 'NN'), 8), (('VBD', 'VBP'), 8), (('RB', 'EX'), 7), (('NN', 'NNS'), 7), (('NN', 'VBP'), 7), (('RP', 'IN'), 6), (('DT', 'RB'), 6), (('RBS', 'JJS'), 6), (('NN', 'VBN'), 6), (('JJ', 'VBP'), 6), (('NNS', 'NNPS'), 6), (('IN', 'NNP'), 5), (('VBZ', 'NNS'), 5), (('DT', 'NNP'), 5), (('DT', 'IN'), 4), (('NNP', 'CD'), 4), (('VBD', 'VB'), 4), (('RB', 'NNP'), 4), (('RB', 'JJR'), 4), (('VBD', 'NN'), 3), (('NN', 'VBG'), 3), (('NNS', 'RB'), 3), (('CC', 'NNP'), 3), (('RB', 'FW'), 3), (('VB', 'RB'), 3), (('NN', 'VBD'), 3), (('NN', 'IN'), 3), (('NNS', 'NNP'), 3), (('NNPS', 'NNP'), 3), (('NNP', 'NNS'), 3), (('JJ', 'VBG'), 3), (('IN', 'DT'), 3), (('RBR', 'JJR'), 3), (('NN', 'JJR'), 2), (('RB', 'DT'), 2), (('NNP', 'RB'), 2), (('JJ', 'NNS'), 2), (('TO', 'NNP'), 2), (('VBD', 'JJ'), 2), (('VB', 'JJR'), 2), (('$', ':'), 2), (('VBD', 'VBZ'), 2), (('VB', 'NNP'), 2), (('NNPS', 'NNS'), 2), (('JJ', 'MD'), 2), (('IN', 'JJ'), 2), (('POS', 'VBZ'), 2), (('PRP', 'NNP'), 2), (('VB', 'VBN'), 2), (('VBN', 'VB'), 2), (('NNS', 'JJ'), 2), (('CC', 'IN'), 1), (('WRB', 'NNP'), 1), (('WP', 'IN'), 1), (('NNS', 'CD'), 1), (('CC', 'RB'), 1), (('NN', 'MD'), 1), (('VBP', 'VB'), 1), (('JJ', 'FW'), 1), (('RB', 'VB'), 1), (('VB', 'NNS'), 1), (('VBP', 'NN'), 1), (('IN', 'FW'), 1), (('RB', 'CC'), 1), (('JJ', 'IN'), 1), (('IN', 'VBP'), 1), (('IN', 'NNS'), 1), (('VB', 'VBD'), 1), (('RB', 'RBR'), 1), (('IN', 'MD'), 1), (('PRP$', 'PRP'), 1), (('JJR', 'NN'), 1), (('JJ', 'VBD'), 1), (('VBZ', 'IN'), 1), (('IN', 'CD'), 1), (('JJ', 'CD'), 1), (('MD', 'NNP'), 1), (('NNP', 'CC'), 1), (('VBP', 'FW'), 1), (('NN', 'RBR'), 1), (('NNP', 'FW'), 1), (('RB', 'WRB'), 1), (('JJ', 'DT'), 1), (('VBD', 'NNP'), 1), (('PDT', 'DT'), 1)]
Test dataset: Domain2Test, Model name: Domain1Train
Results on 544 sentences and 7082 words, of which 897 were unknown.
Unknown words right: 582 (64.8829%); wrong: 315 (35.1171%).
Total tags right: 6445 (91.0054%); wrong: 637 (8.9946%).
Total sentences right: 178 (32.7206%); wrong: 366 (67.2794%).
==========================OOV Sampling========================
Sampling: randomly selected 10 sentences that contain OOV words.
Format: [(word, (gold_tag, test_tag)), (word, (gold_tag, test_tag))]
1: [('a', ('DT', 'DT')), ('bucket', ('NN', 'JJ')), ('line', ('NN', 'NN')), ('going', ('VBG', 'VBG')), ("''", ("''", "''")), ('!', ('.', '.')), ('!', ('.', '.'))]
2: [('The', ('DT', 'DT')), ('guerrillas', ('NNS', 'NNS')), ('were', ('VBD', 'VBD')), ('running', ('VBG', 'VBG')), ('across', ('IN', 'IN')), ('the', ('DT', 'DT')), ('parade', ('NN', 'JJ')), ('ground', ('NN', 'NN')), ('and', ('CC', 'CC')), ('through', ('IN', 'IN')), ('the', ('DT', 'DT')), ('rear', ('JJ', 'JJ')), ('gate', ('NN', 'NN')), ('in', ('IN', 'IN')), ('the', ('DT', 'DT')), ('wake', ('NN', 'NN')), ('of', ('IN', 'IN')), ('the', ('DT', 'DT')), ('departing', ('VBG', 'VBG')), ('horses', ('NNS', 'NNS')), ('.', ('.', '.'))]
3: [('He', ('PRP', 'PRP')), ('turned', ('VBD', 'VBD')), ('and', ('CC', 'CC')), ('raced', ('VBD', 'VBN')), ('across', ('IN', 'IN')), ('the', ('DT', 'DT')), ('parade', ('NN', 'JJ')), ('ground', ('NN', 'NN')), ('toward', ('IN', 'IN')), ('the', ('DT', 'DT')), ('rock', ('NN', 'NN')), ('house', ('NN', 'NN')), ('.', ('.', '.'))]
4: [('the', ('DT', 'DT')), ('porch', ('NN', 'NN')), ('rail', ('NN', 'JJ')), ('beyond', ('IN', 'IN')), ('view', ('NN', 'NN')), ('of', ('IN', 'IN')), ('the', ('DT', 'DT')), ('bar', ('NN', 'NN')), ('windows', ('NNS', 'NNS')), (',', (',', ',')), ('he', ('PRP', 'PRP')), ('feverishly', ('RB', 'RB')), ('scanned', ('VBD', 'VBD')), ('the', ('DT', 'DT')), ('busy', ('JJ', 'JJ')), ('street', ('NN', 'NN')), ('below', ('RB', 'RB')), ('.', ('.', '.'))]
5: [('Behind', ('IN', 'IN')), ('its', ('PRP$', 'PRP$')), ('ornate', ('JJ', 'JJ')), ('facade', ('NN', 'NN')), ('the', ('DT', 'DT')), ('notorious', ('JJ', 'JJ')), ('dive', ('NN', 'NN')), ('clung', ('VBD', 'VBD')), ('like', ('IN', 'IN')), ('a', ('DT', 'DT')), ('bird', ('NN', 'NN')), ("'s", ('POS', 'POS')), ('nest', ('NN', 'NN')), ('to', ('TO', 'TO')), ('the', ('DT', 'DT')), ('rocky', ('JJ', 'NN')), ('ribs', ('NNS', 'NNS')), ('of', ('IN', 'IN')), ('the', ('DT', 'DT')), ('canyonside', ('NN', 'NN')), ('.', ('.', '.'))]
6: [('Cobb', ('NNP', 'RB')), ('watched', ('VBD', 'VBD')), ('this', ('DT', 'DT')), ('with', ('IN', 'IN')), ('hunted', ('JJ', 'JJ')), ('eyes', ('NNS', 'NNS')), (',', (',', ',')), ('his', ('PRP$', 'PRP$')), ('desperate', ('JJ', 'JJ')), ('hope', ('NN', 'NN')), ('waning', ('VBG', 'VBG')), ('by', ('IN', 'IN')), ('the', ('DT', 'DT')), ('moment', ('NN', 'NN')), ('.', ('.', '.'))]
7: [('Pat', ('NNP', 'RB')), ('pushed', ('VBD', 'VBN')), ('through', ('IN', 'IN')), ('first', ('JJ', 'JJ')), ('.', ('.', '.'))]
8: [('The', ('DT', 'DT')), ('animals', ('NNS', 'NNS')), ('thundered', ('VBD', 'VBN')), ('away', ('RB', 'RB')), ('into', ('IN', 'IN')), ('the', ('DT', 'DT')), ('moonlight', ('NN', 'NN')), (',', (',', ',')), ('heading', ('VBG', 'VBG')), ('for', ('IN', 'IN')), ('the', ('DT', 'DT')), ('ridges', ('NNS', 'NNS')), ('.', ('.', '.'))]
9: [('thanks', ('UH', 'NNS')), ("''", ("''", "''")), ('.', ('.', '.'))]
10: [('``', ('``', '``')), ('But', ('CC', 'CC')), ('one', ('CD', 'CD')), ('word', ('NN', 'NN')), ('at', ('IN', 'IN')), ('a', ('DT', 'DT')), ('time', ('NN', 'NN')), (',', (',', ',')), ('O.K.', ('UH', 'NNP')), ("''", ("''", "''")), ('?', ('.', '.')), ('?', ('.', '.'))]
========================End of OOV Sampling======================
==========================Confusion Matrix========================
| V P |
| N D I B R R J |
| N T N D . P B , J |
----+----------------------------------------------------------------+
NN | <11.5%> 0.0% 0.0% 0.0% . 0.0% 0.1% . 0.6% |
DT | 0.0% <10.3%> 0.0% . . . . . 0.0% |
IN | 0.0% 0.0% <9.3%> . . . 0.2% . 0.0% |
VBD | 0.1% . . <8.0%> . . . . 0.1% |
. | . . . . <8.4%> . . . . |
PRP | 0.0% . . . . <7.0%> 0.0% . 0.0% |
RB | 0.2% 0.1% 0.4% . . . <4.1%> . 0.1% |
, | . . . . . . . <4.3%> . |
JJ | 0.4% . 0.0% 0.0% . . 0.2% . <3.3%>|
----+----------------------------------------------------------------+
(row = reference; col = test)
========================End of Confusion Matrix======================
+-------+---------------------+---------------------+---------------------+
| label | precision | recall | f-score |
+-------+---------------------+---------------------+---------------------+
| '' | 1.0 | 0.9615384615384616 | 0.9803921568627451 |
| , | 1.0 | 1.0 | 1.0 |
| . | 1.0 | 1.0 | 1.0 |
| : | 1.0 | 1.0 | 1.0 |
| CC | 0.9948186528497409 | 0.9846153846153847 | 0.9896907216494846 |
| CD | 0.8837209302325582 | 0.9047619047619048 | 0.8941176470588236 |
| DT | 0.9851150202976996 | 0.9904761904761905 | 0.9877883310719133 |
| EX | 0.5294117647058824 | 1.0 | 0.6923076923076924 |
| FW | None | None | 0 |
| IN | 0.8940217391304348 | 0.9662261380323054 | 0.9287226534932956 |
| JJ | 0.7292307692307692 | 0.8200692041522492 | 0.7719869706840391 |
| JJR | 0.3 | 0.375 | 0.33333333333333326 |
| JJS | 0.875 | 1.0 | 0.9333333333333333 |
| MD | 1.0 | 0.9710144927536232 | 0.9852941176470589 |
| NN | 0.8890097932535365 | 0.8938730853391685 | 0.8914348063284233 |
| NNP | 0.8888888888888888 | 0.701195219123506 | 0.7839643652561247 |
| NNPS | 0.6666666666666666 | 0.5 | 0.5714285714285715 |
| NNS | 0.9512195121951219 | 0.9790794979079498 | 0.9649484536082473 |
| PDT | 0.6666666666666666 | 0.6666666666666666 | 0.6666666666666666 |
| POS | 0.6097560975609756 | 0.9615384615384616 | 0.7462686567164178 |
| PRP | 0.9920477137176938 | 0.9596153846153846 | 0.9755620723362659 |
| PRP$ | 0.89375 | 1.0 | 0.9438943894389439 |
| RB | 0.7901907356948229 | 0.8123249299719888 | 0.8011049723756906 |
| RBR | 0.6666666666666666 | 0.8888888888888888 | 0.761904761904762 |
| RBS | None | None | 0 |
| RP | None | None | 0 |
| TO | 1.0 | 1.0 | 1.0 |
| UH | 1.0 | 0.08333333333333333 | 0.15384615384615385 |
| VB | 0.8941176470588236 | 0.7487684729064039 | 0.8150134048257374 |
| VBD | 0.9496644295302014 | 0.8667687595712098 | 0.9063250600480385 |
| VBG | 0.9590643274853801 | 0.9111111111111111 | 0.9344729344729344 |
| VBN | 0.5934065934065934 | 0.782608695652174 | 0.675 |
| VBP | 0.5151515151515151 | 0.7727272727272727 | 0.6181818181818182 |
| VBZ | 0.6666666666666666 | 0.56 | 0.6086956521739131 |
| WDT | 0.47368421052631576 | 0.8181818181818182 | 0.6 |
| WP | 0.9230769230769231 | 0.48 | 0.631578947368421 |
| WRB | 1.0 | 0.9090909090909091 | 0.9523809523809523 |
| `` | 1.0 | 1.0 | 1.0 |
+-------+---------------------+---------------------+---------------------+
Occurences in Predicted (tagged result)= [('NN', 919), ('DT', 739), ('IN', 736), ('VBD', 596), ('.', 594), ('PRP', 503), ('RB', 367), ('JJ', 325), (',', 307), ('NNS', 246), ('NNP', 198), ('CC', 193), ('VBN', 182), ('VBG', 171), ('VB', 170), ('PRP$', 160), ('TO', 132), ("''", 100), ('MD', 67), ('VBP', 66), ('``', 64), ('CD', 43), ('POS', 41), (':', 33), ('VBZ', 21), ('WRB', 20), ('WDT', 19), ('EX', 17), ('WP', 13), ('RBR', 12), ('JJR', 10), ('JJS', 8), ('PDT', 3), ('NNPS', 3), ('RP', 2), ('UH', 1), ('FW', 1)]
Occurences in Test (Gold standard) = [('NN', 914), ('DT', 735), ('IN', 681), ('VBD', 653), ('.', 594), ('PRP', 520), ('RB', 357), (',', 307), ('JJ', 289), ('NNP', 251), ('NNS', 239), ('VB', 203), ('CC', 195), ('VBG', 180), ('PRP$', 143), ('VBN', 138), ('TO', 132), ("''", 104), ('MD', 69), ('``', 64), ('VBP', 44), ('CD', 42), ('RP', 33), (':', 33), ('POS', 26), ('VBZ', 25), ('WP', 25), ('WRB', 22), ('UH', 12), ('WDT', 11), ('RBR', 9), ('EX', 9), ('JJR', 8), ('JJS', 7), ('NNPS', 4), ('PDT', 3), ('RBS', 1)]
Dictionary of mislabelled tags = [(('VBN', 'VBD'), 69), (('JJ', 'NN'), 43), (('NN', 'JJ'), 31), (('IN', 'RB'), 25), (('RB', 'NNP'), 23), (('IN', 'RP'), 22), (('VBD', 'VBN'), 21), (('IN', 'NNP'), 19), (('NN', 'VB'), 17), (('PRP$', 'PRP'), 17), (('RB', 'IN'), 16), (('VBP', 'VB'), 15), (('VBP', 'NN'), 14), (('NN', 'RB'), 14), (('JJ', 'NNP'), 12), (('RB', 'JJ'), 12), (('POS', 'VBZ'), 11), (('RB', 'RP'), 11), (('NN', 'VBG'), 11), (('JJ', 'RB'), 10), (('VB', 'NN'), 9), (('WDT', 'WP'), 9), (('EX', 'RB'), 8), (('JJ', 'VBN'), 8), (('NNP', 'VB'), 8), (('NN', 'VBD'), 8), (('NN', 'NNP'), 7), (('NNS', 'NNP'), 7), (('VB', 'VBP'), 6), (('NN', 'UH'), 6), (('DT', 'RB'), 5), (('RB', 'NN'), 5), (('VBG', 'NN'), 5), (('JJR', 'NN'), 5), (('JJ', 'VBD'), 5), (('VBN', 'JJ'), 4), (('IN', 'VB'), 4), (('RBR', 'JJR'), 4), (('POS', "''"), 4), (('NNP', 'UH'), 4), (('CD', 'NN'), 4), (('NNP', 'NN'), 4), (('JJ', 'VB'), 3), (('VBZ', 'NNS'), 3), (('JJ', 'VBG'), 3), (('VBD', 'NNP'), 3), (('NN', 'IN'), 3), (('RB', 'VB'), 2), (('RB', 'WRB'), 2), (('NNS', 'NN'), 2), (('PRP', 'CD'), 2), (('IN', 'WP'), 2), (('NNS', 'NNPS'), 2), (('VBD', 'VBP'), 2), (('PRP', 'NN'), 2), (('VBP', 'VBD'), 2), (('IN', 'DT'), 2), (('VBZ', 'NNP'), 2), (('RB', 'CC'), 2), (('DT', 'NN'), 2), (('JJR', 'RB'), 2), (('VBD', 'VB'), 2), (('VBD', 'JJ'), 1), (('PDT', 'DT'), 1), (('VBD', 'NN'), 1), (('JJS', 'RBS'), 1), (('NN', 'VBP'), 1), (('NN', 'PRP'), 1), (('RP', 'RB'), 1), (('CD', 'JJ'), 1), (('WDT', 'DT'), 1), (('IN', 'JJ'), 1), (('VB', 'RB'), 1), (('VBZ', 'POS'), 1), (('DT', 'IN'), 1), (('JJ', 'IN'), 1), (('RP', 'IN'), 1), (('DT', 'WP'), 1), (('DT', 'CC'), 1), (('RB', 'PRP'), 1), (('RB', 'NNS'), 1), (('NN', 'JJR'), 1), (('VBN', 'VBG'), 1), (('VB', 'CD'), 1), (('VBG', 'VBD'), 1), (('WP', 'WDT'), 1), (('JJ', 'PRP'), 1), (('NNP', 'VBG'), 1), (('RB', 'RBR'), 1), (('DT', 'PDT'), 1), (('FW', 'NNP'), 1), (('VBP', 'RB'), 1), (('NNP', 'MD'), 1), (('CC', 'DT'), 1), (('JJ', 'DT'), 1), (('VB', 'VBD'), 1), (('NNP', 'VBD'), 1), (('NNP', 'WP'), 1), (('NN', 'DT'), 1), (('POS', 'PRP'), 1), (('NNP', 'NNS'), 1), (('JJ', 'CD'), 1), (('NNPS', 'NNP'), 1), (('NN', 'VBN'), 1), (('RB', 'MD'), 1), (('NNP', 'JJ'), 1), (('NNS', 'UH'), 1), (('IN', 'VBP'), 1), (('IN', 'WDT'), 1), (('VBG', 'JJ'), 1), (('VBZ', 'IN'), 1), (('IN', 'NN'), 1)]
Test dataset: Domain2Test, Model name: Domain2Train
Results on 544 sentences and 7082 words, of which 706 were unknown.
Unknown words right: 528 (74.7875%); wrong: 178 (25.2125%).
Total tags right: 6664 (94.0977%); wrong: 418 (5.9023%).
Total sentences right: 265 (48.7132%); wrong: 279 (51.2868%).
==========================OOV Sampling========================
Sampling: randomly selected 10 sentences that contain OOV words.
Format: [(word, (gold_tag, test_tag)), (word, (gold_tag, test_tag))]
1: [('Hush', ('NN', 'RB')), ('followed', ('VBD', 'VBD')), ('.', ('.', '.'))]
2: [('The', ('DT', 'DT')), ('three', ('CD', 'CD')), ('of', ('IN', 'IN')), ('them', ('PRP', 'PRP')), ('floundered', ('VBN', 'VBD')), ('through', ('IN', 'IN')), ('the', ('DT', 'DT')), ('door', ('NN', 'NN')), ('into', ('IN', 'IN')), ('the', ('DT', 'DT')), ('interior', ('NN', 'NN')), ('and', ('CC', 'CC')), ('fell', ('VBD', 'VBD')), ('in', ('IN', 'IN')), ('a', ('DT', 'DT')), ('heap', ('NN', 'NN')), ('.', ('.', '.'))]
3: [('Forced', ('VBN', 'VBD')), ('behind', ('IN', 'IN')), ('him', ('PRP', 'PRP')), ('momentarily', ('RB', 'RB')), (',', (',', ',')), ('Russ', ('NNP', 'NNP')), ('followed', ('VBD', 'VBD')), ('at', ('IN', 'IN')), ('once', ('RB', 'RB')), ('and', ('CC', 'CC')), ('halted', ('VBD', 'VBD')), ('two', ('CD', 'CD')), ('steps', ('NNS', 'NNS')), ('inside', ('RB', 'RB')), ('.', ('.', '.'))]
4: [('Russ', ('NNP', 'NNS')), ('visited', ('VBD', 'VBD')), ('two', ('CD', 'CD')), ('places', ('NNS', 'NNS')), ('without', ('IN', 'IN')), ('result', ('NN', 'NN')), ('and', ('CC', 'CC')), ('his', ('PRP$', 'PRP$')), ('blood', ('NN', 'NN')), ('pressure', ('NN', 'NN')), ('was', ('VBD', 'VBD')), ('down', ('IN', 'RB')), ('to', ('TO', 'TO')), ('zero', ('CD', 'VB')), ('.', ('.', '.'))]
5: [('Mike', ('NNP', 'RB')), ('turned', ('VBD', 'VBD')), ('away', ('RB', 'RB')), ('.', ('.', '.'))]
6: [('One', ('CD', 'CD')), ('screeched', ('VBD', 'VBN')), ('.', ('.', '.'))]
7: [('Mike', ('NNP', 'IN')), ('only', ('RB', 'RB')), ('said', ('VBD', 'VBD')), (',', (',', ',')), ('``', ('``', '``')), ('Later', ('RB', 'RB')), ("''", ("''", "''")), ('.', ('.', '.'))]
8: [('There', ('EX', 'EX')), ('was', ('VBD', 'VBD')), ('a', ('DT', 'DT')), ('feeling', ('NN', 'NN')), ('that', ('IN', 'IN')), ('this', ('DT', 'DT')), ('mission', ('NN', 'NN')), ('would', ('MD', 'MD')), ('be', ('VB', 'VB')), ('canceled', ('VBN', 'VBN')), ('like', ('IN', 'IN')), ('all', ('PDT', 'DT')), ('the', ('DT', 'DT')), ('others', ('NNS', 'NNS')), ('and', ('CC', 'CC')), ('that', ('IN', 'IN')), ('this', ('DT', 'DT')), ('muddy', ('JJ', 'NN')), ('wet', ('JJ', 'VB')), ('dark', ('JJ', 'JJ')), ('world', ('NN', 'NN')), ('of', ('IN', 'IN')), ('combat', ('NN', 'NN')), ('would', ('MD', 'MD')), ('go', ('VB', 'VB')), ('on', ('RB', 'IN')), ('forever', ('RB', 'NN')), ('.', ('.', '.'))]
9: [('He', ('PRP', 'PRP')), ('went', ('VBD', 'VBD')), ('into', ('IN', 'IN')), ('a', ('DT', 'DT')), ('whirling', ('VBG', 'JJ')), ('dance', ('NN', 'NN')), (',', (',', ',')), ('a', ('DT', 'DT')), ('sort', ('NN', 'NN')), ('of', ('IN', 'IN')), ('blind', ('JJ', 'JJ')), ('chasing', ('NN', 'NN')), ('of', ('IN', 'IN')), ('the', ('DT', 'DT')), ('tail', ('NN', 'NN')), ('.', ('.', '.'))]
10: [('Hand', ('NN', 'JJ')), ('grenades', ('NNS', 'NNS')), ('.', ('.', '.'))]
========================End of OOV Sampling======================
==========================Confusion Matrix========================
| V P |
| N D I B R R J |
| N T N D . P B , J |
----+----------------------------------------------------------------+
NN | <11.8%> 0.0% . . . . 0.1% . 0.3% |
DT | 0.0% <10.3%> 0.0% . . . . . . |
IN | 0.0% 0.0% <9.3%> . . . 0.3% . . |
VBD | 0.0% . . <8.9%> . . . . 0.1% |
. | . . . . <8.4%> . . . . |
PRP | 0.0% . . . . <7.2%> . . . |
RB | 0.1% 0.1% 0.2% . . . <4.5%> . 0.1% |
, | . . . . . . . <4.3%> . |
JJ | 0.2% . 0.0% 0.0% . . 0.2% . <3.5%>|
----+----------------------------------------------------------------+
(row = reference; col = test)
========================End of Confusion Matrix======================
+-------+--------------------+---------------------+---------------------+
| label | precision | recall | f-score |
+-------+--------------------+---------------------+---------------------+
| '' | 1.0 | 1.0 | 1.0 |
| , | 1.0 | 1.0 | 1.0 |
| . | 1.0 | 1.0 | 1.0 |
| : | 1.0 | 1.0 | 1.0 |
| CC | 0.9948186528497409 | 0.9846153846153847 | 0.9896907216494846 |
| CD | 0.8444444444444444 | 0.9047619047619048 | 0.8735632183908046 |
| DT | 0.9812080536912752 | 0.9945578231292517 | 0.9878378378378379 |
| EX | 0.875 | 0.7777777777777778 | 0.823529411764706 |
| FW | None | None | 0 |
| IN | 0.9384835479256081 | 0.9632892804698973 | 0.9507246376811596 |
| JJ | 0.8163934426229508 | 0.8615916955017301 | 0.8383838383838383 |
| JJR | 0.3333333333333333 | 0.375 | 0.35294117647058826 |
| JJS | 0.875 | 1.0 | 0.9333333333333333 |
| MD | 1.0 | 1.0 | 1.0 |
| NN | 0.9351955307262569 | 0.9157549234135668 | 0.9253731343283582 |
| NNP | 0.9082969432314411 | 0.8286852589641435 | 0.8666666666666666 |
| NNPS | None | None | 0 |
| NNS | 0.9367588932806324 | 0.9916317991631799 | 0.9634146341463414 |
| PDT | 1.0 | 0.3333333333333333 | 0.5 |
| POS | 0.8928571428571429 | 0.9615384615384616 | 0.9259259259259259 |
| PRP | 1.0 | 0.9865384615384616 | 0.9932236205227493 |
| PRP$ | 0.959731543624161 | 1.0 | 0.9794520547945206 |
| RB | 0.8142493638676844 | 0.896358543417367 | 0.8533333333333333 |
| RBR | 1.0 | 0.3333333333333333 | 0.5 |
| RBS | None | None | 0 |
| RP | 0.625 | 0.15151515151515152 | 0.24390243902439024 |
| TO | 1.0 | 1.0 | 1.0 |
| UH | 0.8333333333333334 | 0.4166666666666667 | 0.5555555555555556 |
| VB | 0.8277511961722488 | 0.8522167487684729 | 0.8398058252427184 |
| VBD | 0.9476047904191617 | 0.9693721286370597 | 0.9583648750946252 |
| VBG | 0.9453551912568307 | 0.9611111111111111 | 0.953168044077135 |
| VBN | 0.8461538461538461 | 0.717391304347826 | 0.776470588235294 |
| VBP | 0.8974358974358975 | 0.7954545454545454 | 0.8433734939759037 |
| VBZ | 0.9130434782608695 | 0.84 | 0.8749999999999999 |
| WDT | 1.0 | 0.7272727272727273 | 0.8421052631578948 |
| WP | 0.9473684210526315 | 0.72 | 0.8181818181818181 |
| WRB | 1.0 | 0.9545454545454546 | 0.9767441860465117 |
| `` | 1.0 | 1.0 | 1.0 |
+-------+--------------------+---------------------+---------------------+
Occurences in Predicted (tagged result)= [('NN', 895), ('DT', 745), ('IN', 699), ('VBD', 668), ('.', 594), ('PRP', 513), ('RB', 393), (',', 307), ('JJ', 305), ('NNS', 253), ('NNP', 229), ('VB', 209), ('CC', 193), ('VBG', 183), ('PRP$', 149), ('TO', 132), ('VBN', 117), ("''", 104), ('MD', 69), ('``', 64), ('CD', 45), ('VBP', 39), (':', 33), ('POS', 28), ('VBZ', 23), ('WRB', 21), ('WP', 19), ('JJR', 9), ('WDT', 8), ('EX', 8), ('JJS', 8), ('RP', 8), ('UH', 6), ('RBR', 3), ('NNPS', 1), ('PDT', 1), ('FW', 1)]
Occurences in Test (Gold standard) = [('NN', 914), ('DT', 735), ('IN', 681), ('VBD', 653), ('.', 594), ('PRP', 520), ('RB', 357), (',', 307), ('JJ', 289), ('NNP', 251), ('NNS', 239), ('VB', 203), ('CC', 195), ('VBG', 180), ('PRP$', 143), ('VBN', 138), ('TO', 132), ("''", 104), ('MD', 69), ('``', 64), ('VBP', 44), ('CD', 42), ('RP', 33), (':', 33), ('POS', 26), ('VBZ', 25), ('WP', 25), ('WRB', 22), ('UH', 12), ('WDT', 11), ('RBR', 9), ('EX', 9), ('JJR', 8), ('JJS', 7), ('NNPS', 4), ('PDT', 3), ('RBS', 1)]
Dictionary of mislabelled tags = [(('VBD', 'VBN'), 32), (('JJ', 'NN'), 23), (('RB', 'IN'), 20), (('VB', 'NN'), 20), (('NN', 'JJ'), 17), (('IN', 'RB'), 15), (('IN', 'RP'), 15), (('RB', 'NNP'), 14), (('RB', 'RP'), 13), (('JJ', 'NNP'), 13), (('NN', 'VB'), 13), (('VBN', 'VBD'), 11), (('RB', 'JJ'), 11), (('VB', 'VBP'), 8), (('NNS', 'NNP'), 8), (('VBG', 'NN'), 7), (('RB', 'NN'), 7), (('CD', 'NN'), 7), (('JJ', 'VBN'), 6), (('NNP', 'NN'), 6), (('PRP$', 'PRP'), 6), (('NN', 'RB'), 6), (('JJ', 'VBD'), 5), (('JJ', 'RB'), 5), (('NN', 'VBG'), 5), (('VBN', 'JJ'), 5), (('NNP', 'VB'), 5), (('NN', 'JJR'), 5), (('DT', 'RB'), 4), (('JJR', 'RBR'), 4), (('VBP', 'VB'), 3), (('NN', 'NNP'), 3), (('POS', 'VBZ'), 3), (('NNS', 'NNPS'), 3), (('DT', 'WP'), 3), (('IN', 'VB'), 3), (('IN', 'WP'), 3), (('NNP', 'UH'), 3), (('RB', 'VB'), 2), (('RB', 'EX'), 2), (('IN', 'NNP'), 2), (('VB', 'RB'), 2), (('RP', 'RB'), 2), (('DT', 'IN'), 2), (('JJR', 'NN'), 2), (('DT', 'PDT'), 2), (('IN', 'DT'), 2), (('VBD', 'VB'), 2), (('DT', 'NN'), 2), (('VB', 'JJ'), 2), (('JJ', 'VBG'), 2), (('NN', 'UH'), 2), (('IN', 'WDT'), 2), (('VBD', 'JJ'), 1), (('RB', 'WRB'), 1), (('VBG', 'VB'), 1), (('NN', 'RBR'), 1), (('JJS', 'RBS'), 1), (('EX', 'RB'), 1), (('VB', 'VBN'), 1), (('VBZ', 'POS'), 1), (('VB', 'NNP'), 1), (('VBN', 'NNS'), 1), (('NNP', 'NNPS'), 1), (('RP', 'IN'), 1), (('VB', 'VBD'), 1), (('DT', 'CC'), 1), (('NNS', 'JJ'), 1), (('NN', 'VBP'), 1), (('VB', 'CD'), 1), (('NNP', 'IN'), 1), (('WP', 'WDT'), 1), (('NN', 'PRP'), 1), (('JJ', 'RBR'), 1), (('NNP', 'JJ'), 1), (('VBN', 'NN'), 1), (('RB', 'UH'), 1), (('NNPS', 'NNP'), 1), (('NNS', 'NN'), 1), (('VBZ', 'NNS'), 1), (('IN', 'JJ'), 1), (('NNP', 'DT'), 1), (('RB', 'CC'), 1), (('CC', 'RB'), 1), (('NNP', 'VBD'), 1), (('NN', 'IN'), 1), (('NNP', 'WP'), 1), (('NN', 'DT'), 1), (('RB', 'CD'), 1), (('NN', 'CD'), 1), (('JJ', 'CD'), 1), (('NNS', 'VBZ'), 1), (('UH', 'NN'), 1), (('NNS', 'CC'), 1), (('NNS', 'UH'), 1), (('VBP', 'VBD'), 1), (('VBG', 'NNP'), 1), (('NNP', 'RB'), 1), (('FW', 'VB'), 1), (('VBG', 'JJ'), 1), (('NN', 'VBD'), 1)]
Test dataset: ELLTest, Model name: Domain1Train
Results on 500 sentences and 9591 words, of which 697 were unknown.
Unknown words right: 352 (50.5022%); wrong: 345 (49.4978%).
Total tags right: 8660 (90.2930%); wrong: 931 (9.7070%).
Total sentences right: 110 (22.0000%); wrong: 390 (78.0000%).
==========================OOV Sampling========================
Sampling: randomly selected 10 sentences that contain OOV words.
Format: [(word, (gold_tag, test_tag)), (word, (gold_tag, test_tag))]
1: [('Besides', ('RB', 'NNS')), (',', (',', ',')), ('I', ('PRP', 'PRP')), ('would', ('MD', 'MD')), ('prefer', ('VB', 'VB')), ('to', ('TO', 'TO')), ('stay', ('VB', 'NN')), ('in', ('IN', 'IN')), ('a', ('DT', 'DT')), ('tent', ('NN', 'JJ')), ('since', ('IN', 'IN')), ('I', ('PRP', 'PRP')), ('have', ('VBP', 'VBP')), ('never', ('RB', 'RB')), ('had', ('VBN', 'VBD')), ('the', ('DT', 'DT')), ('exciting', ('JJ', 'VBG')), ('experience', ('NN', 'NN')), ('to', ('TO', 'TO')), ('sleep', ('VB', 'VB')), ('in', ('IN', 'IN')), ('the', ('DT', 'DT')), ('middle', ('NN', 'NN')), ('of', ('IN', 'IN')), ('the', ('DT', 'DT')), ('nature', ('NN', 'NN')), ('.', ('.', '.'))]
2: [('We', ('PRP', 'PRP')), ('organized', ('VBD', 'VBD')), ('a', ('DT', 'DT')), ('end-of-conference', ('JJ', 'NN')), ('party', ('NN', 'NN')), ('on', ('IN', 'IN')), ('the', ('DT', 'DT')), ('day', ('NN', 'NN')), ('before', ('IN', 'IN')), ('your', ('PRP$', 'PRP$')), ('departure', ('NN', 'NN')), ('.', ('.', '.'))]
3: [('For', ('IN', 'IN')), ('me', ('PRP', 'PRP')), (',', (',', ',')), ('It', ('PRP', 'PRP')), ("'s", ('VBZ', 'VBZ')), ('not', ('RB', 'RB')), ('easy', ('JJ', 'JJ')), ('to', ('TO', 'TO')), ('have', ('VB', 'VB')), ('holiday', ('NN', 'JJ')), ('and', ('CC', 'CC')), ('your', ('PRP$', 'PRP$')), ('play', ('NN', 'NN')), ('just', ('RB', 'RB')), ('blew', ('VBD', 'JJ')), ('my', ('PRP$', 'PRP$')), ('precious', ('JJ', 'JJ')), ('time', ('NN', 'NN')), ('away', ('RP', 'RB')), ('.', ('.', '.'))]
4: [('I', ('PRP', 'PRP')), ('helped', ('VBD', 'VBD')), ('setting', ('VBG', 'VBG')), ('up', ('RP', 'IN')), ('the', ('DT', 'DT')), ('sound', ('NN', 'NN')), ('equipment', ('NN', 'NN')), (',', (',', ',')), ('programming', ('VBG', 'VBG')), ('the', ('DT', 'DT')), ('light', ('NN', 'JJ')), ('system', ('NN', 'NN')), ('on', ('IN', 'IN')), ('the', ('DT', 'DT')), ('computer', ('NN', 'NN')), (',', (',', ',')), ('tidy', ('VB', 'NN')), ('up', ('RP', 'IN')), ('Madonna', ('NNP', 'NNP')), ("'s", ('POS', 'POS')), ('room', ('NN', 'NN')), ('!', ('.', '.'))]
5: [('It', ('PRP', 'PRP')), ('must', ('MD', 'MD')), ('be', ('VB', 'VB')), ('very', ('RB', 'RB')), ('exhausted', ('JJ', 'VBN')), ('to', ('TO', 'TO')), ('have', ('VB', 'VB')), ('the', ('DT', 'DT')), ('medias', ('NNS', 'NNS')), ('around', ('IN', 'IN')), ('you', ('PRP', 'PRP')), ('days', ('NNS', 'NNS')), ('and', ('CC', 'CC')), ('nights', ('NNS', 'NNS')), ('because', ('IN', 'IN')), ('you', ('PRP', 'PRP')), ('ca', ('MD', 'MD')), ("n't", ('RB', 'RB')), ('be', ('VB', 'VB')), ('yourself', ('PRP', 'JJ')), (',', (',', ',')), ('you', ('PRP', 'PRP')), ('have', ('VBP', 'VBP')), ('to', ('TO', 'TO')), ('show', ('VB', 'VB')), ('your', ('PRP$', 'PRP$')), ('best', ('JJS', 'JJS')), ('side', ('NN', 'NN')), ('and', ('CC', 'CC')), ('character', ('NN', 'NN')), ('.', ('.', '.'))]
6: [('I', ('PRP', 'PRP')), ('am', ('VBP', 'VBP')), ('glad', ('JJ', 'JJ')), ('I', ('PRP', 'PRP')), ('had', ('VBD', 'VBD')), ('taken', ('VBN', 'VBN')), ('a', ('DT', 'DT')), ('part', ('NN', 'NN')), ('in', ('IN', 'IN')), ('a', ('DT', 'DT')), ('few', ('JJ', 'JJ')), ('competitions', ('NNS', 'NNS')), (',', (',', ',')), ('and', ('CC', 'CC')), ('I', ('PRP', 'PRP')), ('was', ('VBD', 'VBD')), ('so', ('RB', 'RB')), ('proud', ('JJ', 'JJ')), ('of', ('IN', 'IN')), ('our', ('PRP$', 'PRP$')), ('basketball', ('NN', 'JJ')), ('team', ('NN', 'NN')), ('.', ('.', '.'))]
7: [('Because', ('IN', 'RB')), (',', (',', ',')), ('althoug', ('IN', 'NN')), ('they', ('PRP', 'PRP')), ('are', ('VBP', 'VBP')), ('famous', ('JJ', 'JJ')), ('they', ('PRP', 'PRP')), ('have', ('VBP', 'VBP')), ('also', ('RB', 'RB')), ('their', ('PRP$', 'PRP$')), ('private', ('JJ', 'JJ')), ('life', ('NN', 'NN')), ('.', ('.', '.'))]
8: [('I', ('PRP', 'PRP')), ('think', ('VBP', 'VBP')), ('this', ('DT', 'DT')), ('side', ('NN', 'NN')), ('of', ('IN', 'IN')), ('the', ('DT', 'DT')), ('question', ('NN', 'NN')), ('is', ('VBZ', 'VBZ')), ('balanced', ('JJ', 'VBN')), ('because', ('IN', 'IN')), ('before', ('RB', 'IN')), ('effect', ('NN', 'NN')), ('on', ('IN', 'IN')), ('our', ('PRP$', 'PRP$')), ('health', ('NN', 'NN')), ('was', ('VBD', 'VBD')), ('less', ('JJR', 'RB')), ('than', ('IN', 'IN')), ('it', ('PRP', 'PRP')), ('is', ('VBZ', 'VBZ')), ('now', ('RB', 'RB')), ('.', ('.', '.'))]
9: [('Dear', ('JJ', 'NNP')), ('Mr', ('NNP', 'NNP')), ('or', ('CC', 'CC')), ('Madam', ('NNP', 'NNP')), (',', ('.', ','))]
10: [('You', ('PRP', 'PRP')), ('choose', ('VBP', 'VBP')), ('Larry', ('NNP', 'NNP')), (',', (',', ',')), ('but', ('CC', 'CC')), ('I', ('PRP', 'PRP')), ('thought', ('VBD', 'VBD')), ('that', ('IN', 'IN')), ('I', ('PRP', 'PRP')), ('was', ('VBD', 'VBD')), ('your', ('PRP$', 'PRP$')), ('best', ('JJS', 'JJS')), ('friend', ('NN', 'NN')), ('.', ('.', '.')), ('"', ("''", 'CD'))]
========================End of OOV Sampling======================
==========================Confusion Matrix========================
| P |
| N I R D R V J |
| N N P T B B J . , |
----+----------------------------------------------------------------+
NN | <10.4%> . . . 0.1% 0.2% 0.6% . . |
IN | 0.0% <9.2%> . 0.0% 0.1% . 0.1% . . |
PRP | 0.0% . <9.4%> . 0.0% . 0.0% . . |
DT | 0.0% 0.1% . <7.5%> . . 0.0% . . |
RB | 0.1% 0.3% . 0.1% <5.2%> 0.0% 0.1% . . |
VB | 0.3% 0.1% . . 0.0% <4.8%> 0.0% . . |
JJ | 0.1% 0.0% . 0.0% 0.2% 0.0% <4.6%> . . |
. | . . . . . . . <5.0%> 0.0% |
, | . . . . . . . . <4.3%>|
----+----------------------------------------------------------------+
(row = reference; col = test)
========================End of Confusion Matrix======================
+-------+---------------------+----------------------+--------------------+
| label | precision | recall | f-score |
+-------+---------------------+----------------------+--------------------+
| '' | 1.0 | 0.058823529411764705 | 0.1111111111111111 |
| ( | None | None | 0 |
| ) | None | None | 0 |
| , | 0.9975609756097561 | 0.9855421686746988 | 0.9915151515151515 |
| -LRB- | None | None | 0 |
| -RRB- | None | None | 0 |
| . | 1.0 | 0.9958592132505176 | 0.9979253112033196 |
| : | 0.35294117647058826 | 1.0 | 0.5217391304347826 |
| CC | 1.0 | 1.0 | 1.0 |
| CD | 0.7058823529411765 | 0.7058823529411765 | 0.7058823529411765 |
| DT | 0.9717741935483871 | 0.9757085020242915 | 0.9737373737373738 |
| EX | 0.6153846153846154 | 1.0 | 0.761904761904762 |
| FW | None | None | 0 |
| GW | None | None | 0 |
| HYPH | None | None | 0 |
| IN | 0.9238790406673618 | 0.8868868868868869 | 0.9050051072522982 |
| JJ | 0.8069216757741348 | 0.8280373831775701 | 0.8173431734317342 |
| JJR | 1.0 | 0.4 | 0.5714285714285715 |
| JJS | 0.7586206896551724 | 0.9166666666666666 | 0.830188679245283 |
| MD | 0.9757281553398058 | 0.9901477832512315 | 0.9828850855745721 |
| NN | 0.8968609865470852 | 0.8620689655172413 | 0.8791208791208791 |
| NNP | 0.7851239669421488 | 0.867579908675799 | 0.824295010845987 |
| NNPS | None | None | 0 |
| NNS | 0.916010498687664 | 0.9509536784741145 | 0.9331550802139038 |
| PDT | 1.0 | 0.4117647058823529 | 0.5833333333333334 |
| POS | 0.7567567567567568 | 1.0 | 0.8615384615384616 |
| PRP | 0.9988888888888889 | 0.9835886214442013 | 0.9911797133406837 |
| PRP$ | 0.956140350877193 | 0.9954337899543378 | 0.9753914988814317 |
| RB | 0.8714788732394366 | 0.8418367346938775 | 0.8564013840830451 |
| RBR | 0.64 | 0.8888888888888888 | 0.7441860465116279 |
| RBS | 0.42857142857142855 | 0.6 | 0.5 |
| RP | 1.0 | 0.05263157894736842 | 0.1 |
| SYM | None | None | 0 |
| TO | 0.7492163009404389 | 1.0 | 0.8566308243727598 |
| UH | 0.3333333333333333 | 0.07692307692307693 | 0.125 |
| VB | 0.9090909090909091 | 0.8471454880294659 | 0.877025738798856 |
| VBD | 0.9177718832891246 | 0.8917525773195877 | 0.9045751633986929 |
| VBG | 0.8 | 0.8831168831168831 | 0.8395061728395061 |
| VBN | 0.6646706586826348 | 0.8345864661654135 | 0.7399999999999999 |
| VBP | 0.8133704735376045 | 0.8957055214723927 | 0.8525547445255475 |
| VBZ | 0.9101796407185628 | 0.8786127167630058 | 0.8941176470588235 |
| WDT | 0.9148936170212766 | 0.7049180327868853 | 0.7962962962962963 |
| WP | 0.7906976744186046 | 0.9444444444444444 | 0.8607594936708861 |
| WRB | 0.9402985074626866 | 0.984375 | 0.9618320610687023 |
| `` | None | None | 0 |
+-------+---------------------+----------------------+--------------------+
Occurences in Predicted (tagged result)= [('NN', 1115), ('IN', 959), ('PRP', 900), ('DT', 744), ('RB', 568), ('JJ', 549), ('VB', 506), ('.', 481), (',', 410), ('NNS', 381), ('VBD', 377), ('VBP', 359), ('CC', 333), ('TO', 319), ('NNP', 242), ('PRP$', 228), ('MD', 206), ('VBG', 170), ('VBZ', 167), ('VBN', 167), ('CD', 68), ('WRB', 67), ('WDT', 47), ('WP', 43), ('POS', 37), ('JJS', 29), ('EX', 26), ('RBR', 25), (':', 17), ('FW', 10), ('JJR', 8), ('PDT', 7), ('RBS', 7), ('(', 6), (')', 6), ('UH', 3), ('NNPS', 2), ("''", 1), ('RP', 1)]
Occurences in Test (Gold standard) = [('NN', 1160), ('IN', 999), ('PRP', 914), ('DT', 741), ('RB', 588), ('VB', 543), ('JJ', 535), ('.', 483), (',', 415), ('VBD', 388), ('NNS', 367), ('CC', 333), ('VBP', 326), ('TO', 239), ('NNP', 219), ('PRP$', 219), ('MD', 203), ('VBZ', 173), ('VBG', 154), ('VBN', 133), ('CD', 68), ('WRB', 64), ('WDT', 61), ('WP', 36), ('POS', 28), ('JJS', 24), ('JJR', 20), ('RP', 19), ('RBR', 18), ('``', 18), ('PDT', 17), ("''", 17), ('EX', 16), ('UH', 13), ('-LRB-', 6), ('-RRB-', 6), (':', 6), ('HYPH', 6), ('RBS', 5), ('GW', 5), ('FW', 3), ('NNPS', 2), ('SYM', 1)]
Dictionary of mislabelled tags = [(('TO', 'IN'), 80), (('JJ', 'NN'), 55), (('VBP', 'VB'), 35), (('IN', 'RB'), 32), (('NN', 'VB'), 25), (('VBN', 'VBD'), 25), (('VBN', 'JJ'), 25), (('VBG', 'NN'), 24), (('NNP', 'NN'), 21), (('RB', 'JJ'), 18), (('VB', 'NN'), 17), (('NNS', 'NN'), 16), (('VBD', 'VBN'), 14), (('VBP', 'NN'), 14), (('NN', 'JJ'), 14), (('RB', 'IN'), 14), (('NN', 'RB'), 14), (('NN', 'VBG'), 13), (('JJ', 'RB'), 12), (('VB', 'VBP'), 12), (('NN', 'CD'), 11), (('EX', 'RB'), 10), (('RB', 'NNP'), 9), (('VBZ', 'NNS'), 9), (('VBG', 'JJ'), 9), (('RB', 'RP'), 9), (('IN', 'WDT'), 9), (('IN', 'RP'), 9), (('CD', "''"), 9), (('DT', 'PDT'), 8), (('IN', 'DT'), 8), (('PRP$', 'PRP'), 8), (('WP', 'WDT'), 8), (('RBR', 'JJR'), 7), (('NN', 'NNP'), 7), (('NN', 'VBP'), 7), (('NNS', 'VBZ'), 7), (('IN', 'VBP'), 7), (('JJ', 'IN'), 7), (('(', '-LRB-'), 6), ((')', '-RRB-'), 6), (('NNP', '``'), 6), (('POS', 'VBZ'), 6), ((':', 'HYPH'), 6), (('VBP', 'VBD'), 6), (('VB', 'UH'), 6), (('IN', 'VB'), 6), (('DT', 'RB'), 6), (('VBP', 'VBZ'), 6), (('VBD', 'VB'), 6), ((':', ','), 5), (('RB', 'NN'), 5), (('RB', 'JJR'), 5), (('NN', 'NNS'), 5), (('NNP', 'JJ'), 5), (('JJ', 'NNP'), 5), (('NNS', 'RB'), 5), (('CD', '``'), 5), (('NNP', 'CD'), 4), (('JJ', 'VBD'), 4), (('NN', 'VBN'), 4), (('JJS', 'JJ'), 4), (('JJ', 'CD'), 4), (('NNP', "''"), 4), (('NN', 'IN'), 4), (('JJ', 'VB'), 3), (('VBD', 'NNP'), 3), (('JJ', 'VBG'), 3), (('VBZ', 'VBP'), 3), (('JJ', 'VBN'), 3), (('NNP', 'IN'), 3), (('DT', 'JJ'), 3), (('VB', 'JJ'), 3), (('WRB', 'RB'), 3), (('NNP', 'RB'), 3), (('VBN', 'VB'), 3), (('RB', 'VB'), 3), (('NN', 'UH'), 3), (('JJ', '``'), 3), (('RB', 'GW'), 3), (('VBD', 'VBP'), 2), (('NN', "''"), 2), (('NNP', 'NNPS'), 2), (('MD', 'VBP'), 2), (('RB', 'RBR'), 2), (('RBS', 'RB'), 2), (('VBN', 'RB'), 2), (('VB', 'RB'), 2), (('FW', 'NNP'), 2), (('NN', 'PRP'), 2), (('FW', 'DT'), 2), (('VBD', 'VBZ'), 2), (('JJ', 'DT'), 2), (('RB', '``'), 2), (('MD', 'VBD'), 2), (('NNS', 'JJ'), 2), (('PRP$', 'DT'), 2), (('POS', '``'), 2), (('CD', 'PRP'), 2), (('VB', 'NNP'), 2), (('VBP', 'IN'), 2), (('WDT', 'DT'), 2), (('VBD', 'JJ'), 2), (('VBD', 'NN'), 2), (('CD', 'IN'), 2), (('WDT', 'WP'), 2), (('VB', 'VBD'), 1), (('JJS', 'RBS'), 1), (('DT', 'GW'), 1), (('VB', 'MD'), 1), (('MD', 'VB'), 1), (('JJ', 'VBP'), 1), (('JJS', 'NN'), 1), ((',', '.'), 1), (('CD', 'JJ'), 1), (('NN', 'GW'), 1), (('JJ', 'RBS'), 1), (('RB', 'PRP'), 1), (('IN', 'WRB'), 1), (('DT', 'UH'), 1), (('JJS', 'VB'), 1), (('VBP', 'JJ'), 1), (('UH', 'NN'), 1), (('RBR', 'RB'), 1), (('NN', 'FW'), 1), (('CD', 'NN'), 1), (('UH', 'RB'), 1), (('VBN', 'NN'), 1), (('FW', 'NN'), 1), (('FW', 'PRP'), 1), (('FW', 'VBD'), 1), (('FW', 'PDT'), 1), (('FW', 'JJ'), 1), (('FW', 'NNS'), 1), (('VBP', 'FW'), 1), (('NNP', '.'), 1), (('RBR', 'JJ'), 1), (('RB', 'UH'), 1), (('NNP', 'NNS'), 1), (('NNP', ','), 1), (('VBG', 'NNS'), 1), (('JJ', 'UH'), 1), (('DT', 'WDT'), 1), (('DT', 'IN'), 1), (('NNS', 'NNP'), 1), (('VBZ', 'PRP$'), 1), (('JJ', 'PDT'), 1), (('PRP', 'CD'), 1), (('JJ', 'PRP'), 1), (('NNPS', 'NNS'), 1), (('NNPS', 'JJ'), 1), (('VBZ', 'NN'), 1), (('VBP', 'MD'), 1), (('POS', "''"), 1), (('NNP', 'SYM'), 1), (('NN', 'VBD'), 1), (('IN', 'JJ'), 1), (('NN', 'DT'), 1), (('VB', 'VBG'), 1), (('RBS', 'JJS'), 1), (('WP', 'DT'), 1), (('VB', 'VBN'), 1), (('NNS', 'JJS'), 1), (('RBS', 'JJ'), 1), (('WRB', 'VBD'), 1), (('RB', 'FW'), 1), (('VBP', 'VBG'), 1), (('VBZ', 'VBD'), 1)]
Test dataset: ELLTest, Model name: Domain2Train
Results on 500 sentences and 9591 words, of which 743 were unknown.
Unknown words right: 391 (52.6245%); wrong: 352 (47.3755%).
Total tags right: 8708 (90.7935%); wrong: 883 (9.2065%).
Total sentences right: 119 (23.8000%); wrong: 381 (76.2000%).
==========================OOV Sampling========================
Sampling: randomly selected 10 sentences that contain OOV words.
Format: [(word, (gold_tag, test_tag)), (word, (gold_tag, test_tag))]
1: [('I', ('PRP', 'PRP')), ('can', ('MD', 'MD')), ('play', ('VB', 'VB')), ('tennis', ('NN', 'NNS')), ('quite', ('RB', 'RB')), ('well', ('RB', 'RB')), (',', (',', ',')), ('but', ('CC', 'CC')), ('I', ('PRP', 'PRP')), ('have', ('VBP', 'VBP')), ('never', ('RB', 'RB')), ('played', ('VBD', 'VBN')), ('golf', ('NN', 'NN')), (',', (',', ',')), ('so', ('RB', 'RB')), (',', (',', ',')), ('if', ('IN', 'IN')), ('it', ('PRP', 'PRP')), ('is', ('VBZ', 'VBZ')), ('possible', ('JJ', 'JJ')), (',', (',', ',')), ('I', ('PRP', 'PRP')), ('would', ('MD', 'MD')), ('like', ('VB', 'VB')), ('taking', ('VBG', 'VBG')), ('lessons', ('NNS', 'NNS')), ('with', ('IN', 'IN')), ('a', ('DT', 'DT')), ('teacher', ('NN', 'NN')), ('.', ('.', '.'))]
2: [('just', ('RB', 'RB')), ('as', ('IN', 'IN')), ('example', ('NN', 'NN')), (',', (',', ',')), ('internet', ('NN', 'NN')), ('gets', ('VBZ', 'NNS')), ('hundreds', ('NNS', 'NNS')), ('of', ('IN', 'IN')), ('new', ('JJ', 'JJ')), ('users', ('NNS', 'NNS')), ('every', ('DT', 'DT')), ('single', ('JJ', 'JJ')), ('day', ('NN', 'NN')), ('.', ('.', '.'))]
3: [('Nowadays', ('RB', 'NNS')), (',', (',', ',')), ('mordern', ('JJ', 'JJ')), ('technology', ('NN', 'NN')), ('has', ('VBZ', 'VBZ')), ('increasingly', ('RB', 'RB')), ('been', ('VBN', 'VBN')), ('developing', ('VBG', 'VBG')), ('.', ('.', '.'))]
4: [('After', ('RB', 'IN')), (',', (',', ',')), ('student', ('NN', 'JJ')), ('is', ('VBZ', 'VBZ')), ('making', ('VBG', 'VBG')), ('speech', ('NN', 'NN')), ('about', ('IN', 'IN')), ('what', ('WP', 'WP')), ('he', ('PRP', 'PRP')), ('or', ('CC', 'CC')), ('she', ('PRP', 'PRP')), ('have', ('VB', 'VBP')), ('learned', ('VBN', 'VBN')), ('.', ('.', '.'))]
5: [('I', ('PRP', 'PRP')), ('never', ('RB', 'RB')), ('did', ('VBD', 'VBD')), ('surfing', ('NN', 'VBG')), (',', (',', ',')), ('but', ('CC', 'CC')), ('every', ('DT', 'DT')), ('time', ('NN', 'NN')), ('I', ('PRP', 'PRP')), ('see', ('VBP', 'VBP')), ('surfing', ('NN', 'VBG')), ('on', ('IN', 'IN')), ('television', ('NN', 'NN')), ('I', ('PRP', 'PRP')), ('find', ('VBP', 'VBP')), ('this', ('DT', 'DT')), ('magic', ('NN', 'NN')), ('.', ('.', '.'))]
6: [('I', ('PRP', 'PRP')), ('think', ('VB', 'VBP')), ('that', ('IN', 'IN')), ('this', ('DT', 'DT')), ('kind', ('NN', 'NN')), ('of', ('IN', 'IN')), ('people', ('NNS', 'NNS')), ('have', ('VBP', 'VBP')), ('a', ('DT', 'DT')), ('poor', ('JJ', 'JJ')), ('life', ('NN', 'NN')), ('because', ('IN', 'IN')), ('they', ('PRP', 'PRP')), ('never', ('RB', 'RB')), ('can', ('MD', 'MD')), ('be', ('VB', 'VB')), ('quiet', ('JJ', 'JJ')), ('because', ('IN', 'IN')), ('all', ('PDT', 'PDT')), ('the', ('DT', 'DT')), ('poeple', ('NNS', 'NN')), ('know', ('VBP', 'VB')), ('them', ('PRP', 'PRP')), ('and', ('CC', 'CC')), ('want', ('VB', 'VB')), ('to', ('TO', 'TO')), ('know', ('VB', 'VB')), ('a', ('DT', 'DT')), ('lot', ('NN', 'NN')), ('of', ('IN', 'IN')), ('things', ('NNS', 'NNS')), ('about', ('IN', 'IN')), ('them', ('PRP', 'PRP')), ('.', ('.', '.'))]
7: [('I', ('PRP', 'PRP')), ('kept', ('VBD', 'VBD')), ('asking', ('VBG', 'VBG')), ('myself', ('PRP', 'PRP')), ("'", ('``', "''")), ('should', ('MD', 'MD')), ('I', ('PRP', 'PRP')), ('wake', ('VB', 'VB')), ('him', ('PRP', 'PRP')), ('up', ('RB', 'RP')), ('or', ('CC', 'CC')), ('try', ('VB', 'VB')), ('to', ('TO', 'TO')), ('use', ('VB', 'VB')), ('my', ('PRP$', 'PRP$')), ('unsure', ('JJ', 'NN')), ('first', ('JJ', 'JJ')), ('aid', ('NN', 'JJ')), ('skill', ('NN', 'NN')), ('to', ('TO', 'TO')), ('deal', ('VB', 'VB')), ('with', ('IN', 'IN')), ('him', ('PRP', 'PRP')), ('.', ('.', '.'))]
8: [('And', ('CC', 'CC')), ('thanks', ('NN', 'NNS')), ('again', ('RB', 'RB')), ('for', ('IN', 'IN')), ('offer', ('VB', 'NN')), ('me', ('PRP', 'PRP')), ('two', ('CD', 'CD')), ('weeks', ('NNS', 'NNS')), ('at', ('IN', 'IN')), ('Camp', ('NNP', 'NNP')), ('California', ('NNP', 'NNP')), ('in', ('IN', 'IN')), ('the', ('DT', 'DT')), ('U.S.A.', ('NNP', 'NN'))]
9: [('Finally', ('RB', 'RB')), (',', (',', ',')), ('I', ('PRP', 'PRP')), ('am', ('VBP', 'VBP')), ('writing', ('VBG', 'VBG')), ('to', ('TO', 'TO')), ('request', ('VB', 'VB')), ('a', ('DT', 'DT')), ('full-refund', ('NN', 'JJ')), ('for', ('IN', 'IN')), ('my', ('PRP$', 'PRP$')), ('friends', ('NNS', 'NNS')), ('and', ('CC', 'CC')), ('I', ('PRP', 'PRP')), (',', (',', ',')), ('because', ('IN', 'IN')), ('I', ('PRP', 'PRP')), ('felt', ('VBD', 'VBD')), ('your', ('PRP$', 'PRP$')), ('advertisement', ('NN', 'NN')), ('was', ('VBD', 'VBD')), ('misleading', ('JJ', 'VBG')), ('.', ('.', '.'))]
10: [('Suddenly', ('RB', 'RB')), (',', (',', ',')), ('I', ('PRP', 'PRP')), ('heard', ('VBD', 'VBD')), ('of', ('IN', 'IN')), ('"', ('``', 'CD')), ('help', ('UH', 'NN')), (',', (',', ',')), ('help', ('UH', 'VB')), ('"', ("''", 'RB')), ('in', ('IN', 'IN')), ('my', ('PRP$', 'PRP$')), ('distance', ('NN', 'NN')), ('.', ('.', '.'))]
========================End of OOV Sampling======================
==========================Confusion Matrix========================
| P |
| N I R D R V J |
| N N P T B B J . , |
----+----------------------------------------------------------------+
NN | <10.5%> 0.0% . . 0.0% 0.2% 0.6% . . |
IN | 0.1% <9.3%> . 0.1% 0.1% 0.0% 0.0% . . |
PRP | 0.0% . <9.4%> . 0.0% . . . . |
DT | 0.0% 0.1% . <7.6%> 0.0% . . . . |
RB | 0.2% 0.3% . 0.1% <5.3%> 0.0% 0.1% . . |
VB | 0.2% 0.0% . . 0.1% <5.1%> 0.0% . . |
JJ | 0.3% 0.0% . 0.0% 0.2% 0.1% <4.5%> . . |
. | . . . . . . . <5.0%> 0.0% |
, | . . . . . . . . <4.3%>|
----+----------------------------------------------------------------+
(row = reference; col = test)
========================End of Confusion Matrix======================
+-------+--------------------+---------------------+---------------------+
| label | precision | recall | f-score |
+-------+--------------------+---------------------+---------------------+
| '' | 0.5 | 0.11764705882352941 | 0.19047619047619047 |
| ( | None | None | 0 |
| ) | None | None | 0 |
| , | 0.9975609756097561 | 0.9855421686746988 | 0.9915151515151515 |
| -LRB- | None | None | 0 |
| -RRB- | None | None | 0 |
| . | 1.0 | 0.9958592132505176 | 0.9979253112033196 |
| : | 0.5454545454545454 | 1.0 | 0.7058823529411764 |
| CC | 1.0 | 1.0 | 1.0 |
| CD | 0.75 | 0.75 | 0.75 |
| DT | 0.968 | 0.979757085020243 | 0.9738430583501007 |
| EX | 0.9230769230769231 | 0.75 | 0.8275862068965517 |
| FW | None | None | 0 |
| GW | None | None | 0 |
| HYPH | None | None | 0 |
| IN | 0.9322210636079249 | 0.8948948948948949 | 0.9131767109295199 |
| JJ | 0.8212927756653993 | 0.8074766355140187 | 0.8143261074458059 |
| JJR | 0.5882352941176471 | 0.5 | 0.5405405405405405 |
| JJS | 0.75 | 0.875 | 0.8076923076923077 |
| MD | 0.9804878048780488 | 0.9901477832512315 | 0.9852941176470588 |
| NN | 0.9036036036036036 | 0.8646551724137931 | 0.8837004405286343 |
| NNP | 0.7860082304526749 | 0.8721461187214612 | 0.8268398268398268 |
| NNPS | None | None | 0 |
| NNS | 0.8954081632653061 | 0.9564032697547684 | 0.9249011857707511 |
| PDT | 1.0 | 0.5294117647058824 | 0.6923076923076924 |
| POS | 0.9655172413793104 | 1.0 | 0.9824561403508771 |
| PRP | 1.0 | 0.9912472647702407 | 0.9956043956043955 |
| PRP$ | 0.9775784753363229 | 0.9954337899543378 | 0.9864253393665159 |
| RB | 0.8542372881355932 | 0.8571428571428571 | 0.8556876061120542 |
| RBR | 0.7857142857142857 | 0.6111111111111112 | 0.6875000000000001 |
| RBS | 0.4444444444444444 | 0.8 | 0.5714285714285714 |
| RP | 0.6666666666666666 | 0.3157894736842105 | 0.42857142857142855 |
| SYM | None | None | 0 |
| TO | 0.7492163009404389 | 1.0 | 0.8566308243727598 |
| UH | None | None | 0 |
| VB | 0.8578947368421053 | 0.9005524861878453 | 0.8787061994609164 |
| VBD | 0.8980099502487562 | 0.9304123711340206 | 0.9139240506329114 |
| VBG | 0.7795698924731183 | 0.9415584415584416 | 0.8529411764705881 |
| VBN | 0.7297297297297297 | 0.8120300751879699 | 0.7686832740213524 |
| VBP | 0.9036544850498339 | 0.8343558282208589 | 0.8676236044657097 |
| VBZ | 0.9259259259259259 | 0.8670520231213873 | 0.8955223880597015 |
| WDT | 0.95 | 0.6229508196721312 | 0.7524752475247525 |
| WP | 0.7391304347826086 | 0.9444444444444444 | 0.8292682926829269 |
| WRB | 0.9545454545454546 | 0.984375 | 0.9692307692307692 |
| `` | None | None | 0 |
+-------+--------------------+---------------------+---------------------+
Occurences in Predicted (tagged result)= [('NN', 1110), ('IN', 959), ('PRP', 906), ('DT', 750), ('RB', 590), ('VB', 570), ('JJ', 526), ('.', 481), (',', 410), ('VBD', 402), ('NNS', 392), ('CC', 333), ('TO', 319), ('VBP', 301), ('NNP', 243), ('PRP$', 223), ('MD', 205), ('VBG', 186), ('VBZ', 162), ('VBN', 148), ('CD', 68), ('WRB', 66), ('WP', 46), ('WDT', 40), ('POS', 29), ('JJS', 28), ('JJR', 17), ('RBR', 14), ('EX', 13), (':', 11), ('RBS', 9), ('PDT', 9), ('RP', 9), ('(', 6), (')', 6), ("''", 4)]
Occurences in Test (Gold standard) = [('NN', 1160), ('IN', 999), ('PRP', 914), ('DT', 741), ('RB', 588), ('VB', 543), ('JJ', 535), ('.', 483), (',', 415), ('VBD', 388), ('NNS', 367), ('CC', 333), ('VBP', 326), ('TO', 239), ('NNP', 219), ('PRP$', 219), ('MD', 203), ('VBZ', 173), ('VBG', 154), ('VBN', 133), ('CD', 68), ('WRB', 64), ('WDT', 61), ('WP', 36), ('POS', 28), ('JJS', 24), ('JJR', 20), ('RP', 19), ('RBR', 18), ('``', 18), ('PDT', 17), ("''", 17), ('EX', 16), ('UH', 13), ('-LRB-', 6), ('-RRB-', 6), (':', 6), ('HYPH', 6), ('RBS', 5), ('GW', 5), ('FW', 3), ('NNPS', 2), ('SYM', 1)]
Dictionary of mislabelled tags = [(('TO', 'IN'), 80), (('JJ', 'NN'), 56), (('VB', 'VBP'), 42), (('IN', 'RB'), 28), (('VBG', 'NN'), 25), (('NN', 'JJ'), 24), (('VBN', 'JJ'), 24), (('NNP', 'NN'), 22), (('VBD', 'VBN'), 20), (('NNS', 'NN'), 20), (('VB', 'NN'), 19), (('NN', 'RB'), 19), (('NN', 'VB'), 18), (('RB', 'JJ'), 15), (('VBP', 'VB'), 15), (('NNS', 'VBZ'), 14), (('RB', 'NNP'), 14), (('VBG', 'JJ'), 14), (('VBN', 'VBD'), 13), (('IN', 'WDT'), 12), (('JJ', 'RB'), 11), (('WP', 'WDT'), 10), (('VBZ', 'NNS'), 8), (('NN', 'IN'), 8), (('RB', "''"), 8), (('RB', 'RP'), 8), (('RB', 'IN'), 8), (('DT', 'PDT'), 7), (('VBD', 'VB'), 7), (('(', '-LRB-'), 6), ((')', '-RRB-'), 6), (('NNP', 'CD'), 6), (('NN', 'VBG'), 6), (('VB', 'JJ'), 6), (('IN', 'DT'), 6), (('DT', 'RB'), 6), (('RB', 'VB'), 5), (('JJ', 'NNP'), 5), (('JJ', 'CD'), 5), ((':', ','), 5), (('RB', 'JJR'), 5), (('NN', 'NNS'), 5), (('JJR', 'RBR'), 5), (('DT', 'IN'), 5), (('CD', '``'), 5), (('CD', 'NN'), 5), (('IN', 'RP'), 5), (('RB', 'NN'), 4), (('NNP', '``'), 4), (('RB', 'EX'), 4), (('JJ', 'VBD'), 4), (('JJ', 'UH'), 4), (('JJS', 'JJ'), 4), (('NN', 'CD'), 4), (('NNP', "''"), 4), (('VBP', 'VBZ'), 4), (('NNP', 'JJ'), 4), (('VB', 'VBD'), 3), (('PRP$', 'PRP'), 3), (('NN', 'VBP'), 3), (('DT', 'JJ'), 3), (('NN', 'NNP'), 3), (('NNS', 'RB'), 3), (('NN', 'JJR'), 3), (('CD', "''"), 3), (('NN', 'UH'), 3), (('IN', 'VB'), 3), (('VB', 'UH'), 3), (('NN', 'VBN'), 3), (('RP', 'RB'), 3), (('RB', '``'), 3), (('RB', 'GW'), 3), (('VBD', 'NNP'), 3), (('VBP', 'NN'), 2), (('IN', 'NNP'), 2), (('NNP', 'RB'), 2), (('VBZ', 'VBP'), 2), (('IN', 'VBP'), 2), (('NNP', 'NNPS'), 2), (('VBP', 'VBD'), 2), (('MD', 'VBP'), 2), (('RB', 'RBR'), 2), (('CD', 'PRP'), 2), (('RBS', 'RB'), 2), (('JJ', 'VB'), 2), (('RBR', 'JJR'), 2), (('WRB', 'RB'), 2), (('JJR', 'RB'), 2), (('RBS', 'JJS'), 2), (('VB', 'VBZ'), 2), (('NNP', 'DT'), 2), (('VBD', 'JJ'), 2), (('VBD', 'VBZ'), 2), (('NN', 'DT'), 2), (('VBD', 'VBP'), 2), (('MD', 'VBD'), 2), (('JJ', 'VBN'), 2), (('VB', 'RB'), 2), (('NNS', 'JJ'), 2), (('PRP$', 'DT'), 2), (("''", '``'), 2), (('VBP', 'HYPH'), 2), (('WP', 'DT'), 2), (('VBG', 'VB'), 2), (('IN', 'JJ'), 2), (('JJ', 'IN'), 2), (('WDT', 'WP'), 2), (('EX', 'RB'), 1), (('DT', 'GW'), 1), (('VB', 'MD'), 1), (('JJS', 'NN'), 1), (('POS', 'VBZ'), 1), ((',', '.'), 1), (('RB', 'HYPH'), 1), (('CD', 'JJ'), 1), (('VBD', 'NN'), 1), (('NN', 'GW'), 1), (('JJ', 'RBS'), 1), (('IN', 'WRB'), 1), (('VBD', 'RB'), 1), (('JJ', 'VBP'), 1), (('DT', 'UH'), 1), (('JJS', 'VB'), 1), (('NN', 'FW'), 1), (('VBN', 'VB'), 1), (('NNP', 'PRP'), 1), (('NNP', 'PDT'), 1), (('RB', 'NNS'), 1), (('VBP', 'FW'), 1), (('NNP', '.'), 1), (('RBR', 'JJ'), 1), (('IN', 'UH'), 1), (('NNP', 'NNS'), 1), (('VBD', '``'), 1), (('NN', 'HYPH'), 1), (('VBN', 'NN'), 1), (('NN', 'VBD'), 1), (('NNP', ','), 1), (('VB', 'NNS'), 1), (('DT', 'WDT'), 1), (('JJS', 'RB'), 1), (('NNP', 'HYPH'), 1), (('NNS', 'NNP'), 1), (('VBZ', 'PRP$'), 1), (('VB', 'IN'), 1), (('RB', 'CD'), 1), (('VBD', 'CD'), 1), (('RB', 'PRP'), 1), (('VBP', 'IN'), 1), (('JJ', '``'), 1), (('VBP', 'MD'), 1), (('NN', '``'), 1), (('CD', 'SYM'), 1), (('RB', 'DT'), 1), (('VBP', 'VBG'), 1), (('VB', 'VBG'), 1), (('RB', 'UH'), 1), (('IN', 'HYPH'), 1), (('NNS', 'JJS'), 1), (('RBS', 'JJ'), 1), (('WRB', 'VBD'), 1), (('NN', 'PRP'), 1), (('RB', 'FW'), 1), (('VBN', 'RB'), 1), (('IN', '``'), 1), (('VBD', 'VBG'), 1), (('IN', 'NN'), 1), (('VBZ', 'VBD'), 1)]
Test dataset: ELLTest, Model name: ELLTrain
Results on 500 sentences and 9591 words, of which 340 were unknown.
Unknown words right: 249 (73.2353%); wrong: 91 (26.7647%).
Total tags right: 9157 (95.4749%); wrong: 434 (4.5251%).
Total sentences right: 237 (47.4000%); wrong: 263 (52.6000%).
==========================OOV Sampling========================
Sampling: randomly selected 10 sentences that contain OOV words.
Format: [(word, (gold_tag, test_tag)), (word, (gold_tag, test_tag))]
1: [('Every', ('DT', 'DT')), ('one', ('CD', 'CD')), ('hopes', ('VBZ', 'NNS')), ('to', ('TO', 'TO')), ('enjoying', ('VBG', 'VBG')), ('himself', ('PRP', 'PRP')), ('and', ('CC', 'CC')), ('comes', ('VBZ', 'VBZ')), ('back', ('RB', 'RB')), ('with', ('IN', 'IN')), ('a', ('DT', 'DT')), ('lot', ('NN', 'NN')), ('of', ('IN', 'IN')), ('knowledge', ('NN', 'NN')), ('.', ('.', '.'))]
2: [('Contrary', ('RB', 'JJ')), ('to', ('IN', 'IN')), ('the', ('DT', 'DT')), ('advertisment', ('NN', 'NN')), ('everything', ('NN', 'NN')), ('was', ('VBD', 'VBD')), ('disapointing', ('JJ', 'VBG')), ('.', ('.', '.'))]
3: [('Pat', ('NNP', 'NNP')), ('was', ('VBD', 'VBD')), ('not', ('RB', 'RB')), ('a', ('DT', 'DT')), ('very', ('RB', 'RB')), ('clever', ('JJ', 'JJ')), ('teen-ager', ('NN', 'NN')), ('and', ('CC', 'CC')), ('while', ('IN', 'IN')), ('she', ('PRP', 'PRP')), ('was', ('VBD', 'VBD')), ('attending', ('VBG', 'VBG')), ('the', ('DT', 'DT')), ('Secondary', ('JJ', 'RB')), ('School', ('NN', 'NN')), ('she', ('PRP', 'PRP')), ('had', ('VBD', 'VBD')), ('many', ('JJ', 'JJ')), ('troubles', ('NNS', 'NNS')), ('with', ('IN', 'IN')), ('Chemistry', ('NN', 'NNP')), ('.', ('.', '.'))]
4: [('The', ('DT', 'DT')), ('two', ('CD', 'CD')), ('activities', ('NN', 'NNS')), ('that', ('IN', 'IN')), ('I', ('PRP', 'PRP')), ('prefer', ('VBP', 'VBP')), ('will', ('MD', 'MD')), ('be', ('VB', 'VB')), ('Basketball', ('NN', 'NN')), (',', (',', ',')), ('because', ('IN', 'IN')), ('I', ('PRP', 'PRP')), ('had', ('VBD', 'VBD')), ('the', ('DT', 'DT')), ('experience', ('NN', 'NN')), ('to', ('TO', 'TO')), ('play', ('VB', 'VB')), ('in', ('IN', 'IN')), ('my', ('PRP$', 'PRP$')), ('school', ('NN', 'NN')), ("'s", ('POS', 'POS')), ('big', ('JJ', 'JJ')), ('for', ('IN', 'IN')), ('almoust', ('RB', 'VB')), ('a', ('DT', 'DT')), ('year', ('NN', 'NN')), (',', (',', ',')), ('and', ('CC', 'CC')), ('Photography', ('NN', 'NN')), (',', (',', ',')), ('which', ('WDT', 'WDT')), ('is', ('VBZ', 'VBZ')), ('a', ('DT', 'DT')), ('hobby', ('NN', 'NN')), ('that', ('IN', 'IN')), ('I', ('PRP', 'PRP')), ('am', ('VBP', 'VBP')), ('good', ('JJ', 'JJ')), ('at', ('IN', 'IN')), ('and', ('CC', 'CC')), ('I', ('PRP', 'PRP')), ('practice', ('VBP', 'VBP')), ('since', ('IN', 'IN')), ('I', ('PRP', 'PRP')), ('was', ('VBD', 'VBD')), ('ten', ('CD', 'CD')), ('.', ('.', '.'))]
5: [('Because', ('IN', 'IN')), ('I', ('PRP', 'PRP')), ('usually', ('RB', 'RB')), ('go', ('VB', 'VBP')), ('on', ('IN', 'IN')), ('camping-site', ('NN', 'JJ')), ('holiday', ('NN', 'NN')), (',', (',', ',')), ('with', ('IN', 'IN')), ('my', ('PRP$', 'PRP$')), ('friends', ('NNS', 'NNS')), ('.', ('.', '.'))]
6: [('The', ('DT', 'DT')), ('headmaster', ('NN', 'NN')), ('was', ('VBD', 'VBD')), ('extremely', ('RB', 'RB')), ('angry', ('JJ', 'JJ')), ('about', ('IN', 'IN')), ('it', ('PRP', 'PRP')), ('and', ('CC', 'CC')), ('he', ('PRP', 'PRP')), ('gave', ('VBD', 'VBD')), ('both', ('DT', 'DT')), ('students', ('NNS', 'NNS')), ('a', ('DT', 'DT')), ('heavy', ('JJ', 'JJ')), ('punishment', ('NN', 'NN')), (',', (',', ',')), ('which', ('WDT', 'WDT')), ('was', ('VBD', 'VBD')), ('suspended', ('VBD', 'VBN')), ('from', ('IN', 'IN')), ('school', ('NN', 'NN')), ('for', ('IN', 'IN')), ('a', ('DT', 'DT')), ('week', ('NN', 'NN')), ('.', ('.', '.'))]
7: [('Internet', ('NNP', 'NN')), ('is', ('VBZ', 'VBZ')), ('very', ('RB', 'RB')), ('handy', ('JJ', 'JJ')), (',', (',', ',')), ('you', ('PRP', 'PRP')), ('can', ('MD', 'MD')), ('buy', ('VB', 'VB')), (',', (',', ',')), ('send', ('VB', 'VB')), ('e-mail', ('NN', 'NN')), ('messages', ('NN', 'NNS')), (',', (',', ',')), ('or', ('CC', 'CC')), ('even', ('RB', 'RB')), ('have', ('VB', 'VBP')), ('sex', ('NN', 'NN')), (',', (',', ',')), ('that', ('WDT', 'WDT')), ('is', ('VBZ', 'VBZ')), ('really', ('RB', 'RB')), ('funny', ('JJ', 'JJ')), ('.', ('.', '.'))]
8: [('I', ('PRP', 'PRP')), ('hope', ('VBP', 'VBP')), ('that', ('IN', 'IN')), ('after', ('IN', 'IN')), ('jully', ('NNP', 'RB')), ('wo', ('MD', 'MD')), ("n't", ('RB', 'RB')), ('disturb', ('VB', 'VB')), ('the', ('DT', 'DT')), ('Camp', ('NNP', 'NNP')), ('California', ('NNP', 'NNP')), ("'s", ('POS', 'POS')), ('plans', ('NNS', 'NNS')), ('for', ('IN', 'IN')), ('me', ('PRP', 'PRP')), ('.', ('.', '.'))]
9: [('His', ('PRP$', 'PRP$')), ('message', ('NN', 'NN')), ('is', ('VBZ', 'VBZ')), ('still', ('RB', 'RB')), ('as', ('RB', 'IN')), ('impacting', ('JJ', 'VBG')), ('today', ('NN', 'NN')), ('and', ('CC', 'CC')), ('will', ('MD', 'MD')), ('always', ('RB', 'RB')), ('be', ('VB', 'VB')), ('even', ('RB', 'RB')), ('if', ('IN', 'IN')), ('this', ('DT', 'DT')), ('story', ('NN', 'NN')), ('happened', ('VBD', 'VBD')), ('a', ('DT', 'DT')), ('long', ('JJ', 'JJ')), ('time', ('NN', 'NN')), ('ago', ('RB', 'RB')), ('.', ('.', '.'))]
10: [('Even', ('RB', 'RB')), ('if', ('IN', 'IN')), ('the', ('DT', 'DT')), ('photograph', ('NN', 'NN')), ('made', ('VBN', 'VBD')), ('are', ('VBP', 'VBP')), ('modest', ('JJ', 'JJS')), ('work', ('NN', 'NN')), (',', (',', ',')), ('I', ('PRP', 'PRP')), ('am', ('VBP', 'VBP')), ('really', ('RB', 'RB')), ('interested', ('JJ', 'JJ')), ('in', ('IN', 'IN')), ('this', ('DT', 'DT')), ('activity', ('NN', 'NN')), ('.', ('.', '.'))]
========================End of OOV Sampling======================
==========================Confusion Matrix========================
| P |
| N I R D R V J |
| N N P T B B J . , |
----+----------------------------------------------------------------+
NN | <11.6%> . . . 0.0% 0.1% 0.1% . . |
IN | . <10.2%> . 0.0% 0.1% 0.0% . . . |
PRP | 0.0% . <9.5%> . . . . . . |
DT | 0.0% 0.1% . <7.6%> . . . . . |
RB | 0.0% 0.1% . 0.1% <5.6%> 0.0% 0.1% . . |
VB | 0.1% 0.0% . . 0.0% <5.2%> 0.1% . . |
JJ | 0.2% 0.0% . 0.0% 0.1% 0.0% <5.1%> . . |
. | . . . . . . . <5.0%> 0.0% |
, | . . . . . . . . <4.3%>|
----+----------------------------------------------------------------+
(row = reference; col = test)
========================End of Confusion Matrix======================
+-------+--------------------+---------------------+--------------------+
| label | precision | recall | f-score |
+-------+--------------------+---------------------+--------------------+
| '' | 0.8 | 0.9411764705882353 | 0.8648648648648648 |
| , | 0.9975961538461539 | 1.0 | 0.9987966305655837 |
| -LRB- | 1.0 | 1.0 | 1.0 |
| -RRB- | 1.0 | 1.0 | 1.0 |
| . | 1.0 | 0.9979296066252588 | 0.9989637305699481 |
| : | 1.0 | 1.0 | 1.0 |
| CC | 1.0 | 1.0 | 1.0 |
| CD | 0.927536231884058 | 0.9411764705882353 | 0.9343065693430658 |
| DT | 0.9784366576819407 | 0.979757085020243 | 0.9790964261631827 |
| EX | 0.9411764705882353 | 1.0 | 0.9696969696969697 |
| FW | 1.0 | 0.3333333333333333 | 0.5 |
| GW | 0.75 | 0.6 | 0.6666666666666665 |
| HYPH | 1.0 | 1.0 | 1.0 |
| IN | 0.9682539682539683 | 0.9769769769769769 | 0.9725959142999502 |
| JJ | 0.8998144712430427 | 0.9065420560747663 | 0.9031657355679702 |
| JJR | 0.7142857142857143 | 0.75 | 0.7317073170731706 |
| JJS | 0.7692307692307693 | 0.8333333333333334 | 0.8 |
| MD | 0.9757281553398058 | 0.9901477832512315 | 0.9828850855745721 |
| NN | 0.9397794741306191 | 0.9551724137931035 | 0.9474134245404019 |
| NNP | 0.9488372093023256 | 0.9315068493150684 | 0.9400921658986175 |
| NNPS | None | None | 0 |
| NNS | 0.9329896907216495 | 0.9863760217983651 | 0.9589403973509935 |
| PDT | 0.7333333333333333 | 0.6470588235294118 | 0.6875 |
| POS | 0.9333333333333333 | 1.0 | 0.9655172413793104 |
| PRP | 0.9989023051591658 | 0.9956236323851203 | 0.9972602739726028 |
| PRP$ | 0.9819004524886877 | 0.9908675799086758 | 0.9863636363636363 |
| RB | 0.9342560553633218 | 0.9183673469387755 | 0.9262435677530018 |
| RBR | 0.9230769230769231 | 0.6666666666666666 | 0.7741935483870968 |
| RBS | 0.8 | 0.8 | 0.8000000000000002 |
| RP | 0.6428571428571429 | 0.47368421052631576 | 0.5454545454545454 |
| SYM | 1.0 | 1.0 | 1.0 |
| TO | 0.967479674796748 | 0.99581589958159 | 0.9814432989690722 |
| UH | 0.875 | 0.5384615384615384 | 0.6666666666666667 |
| VB | 0.9395085066162571 | 0.9152854511970534 | 0.9272388059701493 |
| VBD | 0.9411764705882353 | 0.9484536082474226 | 0.9448010269576379 |
| VBG | 0.9315068493150684 | 0.8831168831168831 | 0.9066666666666665 |
| VBN | 0.8907563025210085 | 0.7969924812030075 | 0.8412698412698413 |
| VBP | 0.8994082840236687 | 0.9325153374233128 | 0.9156626506024096 |
| VBZ | 0.9751552795031055 | 0.9075144508670521 | 0.940119760479042 |
| WDT | 0.9411764705882353 | 0.7868852459016393 | 0.8571428571428571 |
| WP | 0.8536585365853658 | 0.9722222222222222 | 0.9090909090909091 |
| WRB | 0.9411764705882353 | 1.0 | 0.9696969696969697 |
| `` | 0.9333333333333333 | 0.7777777777777778 | 0.8484848484848485 |
+-------+--------------------+---------------------+--------------------+
Occurences in Predicted (tagged result)= [('NN', 1179), ('IN', 1008), ('PRP', 911), ('DT', 742), ('RB', 578), ('JJ', 539), ('VB', 529), ('.', 482), (',', 416), ('VBD', 391), ('NNS', 388), ('VBP', 338), ('CC', 333), ('TO', 246), ('PRP$', 221), ('NNP', 215), ('MD', 206), ('VBZ', 161), ('VBG', 146), ('VBN', 119), ('CD', 69), ('WRB', 68), ('WDT', 51), ('WP', 41), ('POS', 30), ('JJS', 26), ('JJR', 21), ("''", 20), ('EX', 17), ('``', 15), ('PDT', 15), ('RP', 14), ('RBR', 13), ('UH', 8), ('-LRB-', 6), ('-RRB-', 6), (':', 6), ('HYPH', 6), ('RBS', 5), ('GW', 4), ('FW', 1), ('SYM', 1)]
Occurences in Test (Gold standard) = [('NN', 1160), ('IN', 999), ('PRP', 914), ('DT', 741), ('RB', 588), ('VB', 543), ('JJ', 535), ('.', 483), (',', 415), ('VBD', 388), ('NNS', 367), ('CC', 333), ('VBP', 326), ('TO', 239), ('NNP', 219), ('PRP$', 219), ('MD', 203), ('VBZ', 173), ('VBG', 154), ('VBN', 133), ('CD', 68), ('WRB', 64), ('WDT', 61), ('WP', 36), ('POS', 28), ('JJS', 24), ('JJR', 20), ('RP', 19), ('RBR', 18), ('``', 18), ('PDT', 17), ("''", 17), ('EX', 16), ('UH', 13), ('-LRB-', 6), ('-RRB-', 6), (':', 6), ('HYPH', 6), ('RBS', 5), ('GW', 5), ('FW', 3), ('NNPS', 2), ('SYM', 1)]
Dictionary of mislabelled tags = [(('VBP', 'VB'), 26), (('VBD', 'VBN'), 16), (('VB', 'VBP'), 16), (('NN', 'JJ'), 16), (('JJ', 'RB'), 14), (('NNS', 'NN'), 13), (('RB', 'IN'), 13), (('JJ', 'NN'), 13), (('NN', 'VBG'), 13), (('IN', 'RB'), 12), (('RB', 'JJ'), 10), (('NN', 'VB'), 8), (('TO', 'IN'), 8), (('NN', 'NNP'), 8), (('VBN', 'VBD'), 7), (('IN', 'WDT'), 7), (('RB', 'RP'), 7), (('NNS', 'VBZ'), 6), (('WP', 'WDT'), 6), (('VB', 'NN'), 6), (('JJ', 'VBN'), 6), (('VBP', 'VBZ'), 6), (('JJ', 'VB'), 5), (('VBG', 'JJ'), 5), (('DT', 'PDT'), 5), (('IN', 'DT'), 5), (('JJ', 'VBD'), 5), (('JJS', 'JJ'), 5), (('DT', 'RB'), 5), (('CD', 'NN'), 5), (('VBG', 'NN'), 5), (('RP', 'RB'), 5), (('JJR', 'RBR'), 4), (("''", '``'), 4), (('NN', 'UH'), 4), (('NNP', 'NN'), 4), (('NN', 'NNS'), 4), (('RB', 'NN'), 3), (('NN', 'CD'), 3), (('DT', 'JJ'), 3), (('NN', 'VBN'), 3), (('JJ', 'JJR'), 3), (('IN', 'RP'), 3), (('VBD', 'VB'), 3), (('PDT', 'DT'), 3), (('WDT', 'DT'), 3), (('VBN', 'JJ'), 3), (('VB', 'VBN'), 2), (('PRP$', 'PRP'), 2), (('VBZ', 'VBP'), 2), (('JJ', 'JJS'), 2), (('RB', 'NNP'), 2), (('VB', 'RB'), 2), (('JJ', 'NNP'), 2), (('NNP', 'NNPS'), 2), (('POS', 'VBZ'), 2), (('MD', 'VBP'), 2), (('WRB', 'RB'), 2), (('VBN', 'VB'), 2), (('NNP', 'JJ'), 2), (('NNS', 'NNP'), 2), (('VBD', 'VBZ'), 2), (('MD', 'VBD'), 2), (('NN', 'VBD'), 2), (('NNS', 'JJ'), 2), (('PRP$', 'DT'), 2), (('VB', 'VBG'), 2), (('NN', 'PRP'), 2), (('JJ', 'VBG'), 2), (('VB', 'VBD'), 1), (('NNS', 'CD'), 1), (('NN', 'RB'), 1), (('NNS', 'RB'), 1), (('DT', 'GW'), 1), (('VB', 'MD'), 1), (('DT', 'IN'), 1), (('JJS', 'NN'), 1), ((',', '.'), 1), (('NN', 'JJR'), 1), (('NN', 'GW'), 1), (('RB', 'RBS'), 1), (('JJ', 'VBP'), 1), (('IN', 'VB'), 1), (('EX', 'RB'), 1), (('DT', 'UH'), 1), (('NN', 'FW'), 1), (('UH', 'RB'), 1), (('IN', 'TO'), 1), (('IN', 'PDT'), 1), (('NNP', 'DT'), 1), (('NNP', 'NNS'), 1), (('IN', 'NNP'), 1), (('JJR', 'JJ'), 1), (('PDT', 'RB'), 1), (('WRB', 'NN'), 1), (('NNP', 'UH'), 1), (('PRP', 'PRP$'), 1), (('RBS', 'RB'), 1), (('VB', 'JJ'), 1), (('JJR', 'NN'), 1), (('NN', 'VBP'), 1), (('VBD', 'RB'), 1), (('JJ', 'RBR'), 1), (('NN', 'JJS'), 1), (('RB', 'VB'), 1), (('VBZ', 'PRP$'), 1), (('VB', 'IN'), 1), (('VBD', 'JJ'), 1), (('VBP', 'MD'), 1), (('IN', 'JJ'), 1), (('NN', 'DT'), 1), (('NNS', 'JJS'), 1), (('WRB', 'VBD'), 1), (('RB', 'RBR'), 1), (('GW', 'FW'), 1), (('RBR', 'JJR'), 1), (('VBP', 'VBD'), 1), (('VBN', 'RB'), 1), (('``', "''"), 1), (('MD', 'VBG'), 1), (('VBZ', 'VBD'), 1), (('NN', 'WP'), 1)]