-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmaxdiff_to_scale.pl
executable file
·140 lines (140 loc) · 3.23 KB
/
maxdiff_to_scale.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/perl
#
#
# maxdiff_to_scale.pl
#
# - convert MaxDiff answers to a list of word pairs rated on a scale
#
#
#
#
# Peter Turney
# December 19, 2011
#
#
#
#
#
#
# check command line arguments
#
if ($#ARGV != 1) {
print "\n\nUsage:\n\n";
print "maxdiff_to_scale.pl <input file of MaxDiff answers> <output file of rated pairs>\n\n";
exit;
}
#
# input file of MaxDiff answers
#
$max_file = $ARGV[0];
#
# output file of rated pairs
#
$out_file = $ARGV[1];
#
#
#
#
#
#
#
# read the MaxDiff answers
#
%pair2least = (); # $pair2least{$pair} = <number of times $pair is chosen as least illustrative>
%pair2most = (); # $pair2most{$pair} = <number of times $pair is chosen as most illustrative>
%pair2num = (); # $pair2num{$pair} = <number of MaxDiff questions including $pair>
$num_pairs = 0; # number of unique pairs
$num_max = 0; # number of MaxDiff questions
#
print "reading file of MaxDiff answers $max_file ...\n";
#
open(INF, "< $max_file");
#
while ($line = <INF>) {
#
# typical $line:
#
# # relation1 relation2 relation3 relation4 least_illustrative most_illustrative user_selected_relation
# "school:fish" "library:book" "flock:sheep" "flock:bird" "flock:bird" "flock:sheep" "X is made from a collection of Y"
# "school:fish" "library:book" "flock:sheep" "flock:bird" "flock:sheep" "library:book" "X is made from a collection of Y"
#
if ($line =~ /^\#/) { next; } # skip comments
@fields = split(/\s+/, $line);
for ($i = 0; $i < 4; $i++) {
$pair = $fields[$i];
if (! defined($pair2num{$pair})) { # if new $pair, then initialize hash tables
$pair2least{$pair} = 0;
$pair2most{$pair} = 0;
$pair2num{$pair} = 0;
$num_pairs++;
}
$pair2num{$pair}++;
}
$least = $fields[4];
$most = $fields[5];
$pair2least{$least}++;
$pair2most{$most}++;
$num_max++;
}
#
close(INF);
#
print "... read $num_max MaxDiff answers ...\n";
print "... read $num_pairs unique pairs ...\n";
print "... done.\n";
#
#
#
#
#
# score the pairs
#
print "processing pairs and writing to $out_file ...\n";
#
@scores = ();
@pairs = keys %pair2num;
#
foreach $pair (@pairs) {
$num_least = $pair2least{$pair};
$num_most = $pair2most{$pair};
$num_quest = $pair2num{$pair};
$pct_least = 100 * $num_least / $num_quest;
$pct_most = 100 * $num_most / $num_quest;
$score = $pct_most - $pct_least;
$item = sprintf("%5.1f %s", $score, $pair);
push(@scores, $item);
}
#
# sort
#
@sorted = sort {$b <=> $a} @scores;
#
# output
#
open(OUTF, "> $out_file");
#
print OUTF "#\n";
print OUTF "# Generated by: maxdiff_to_scale.pl\n";
print OUTF "# MaxDiff File: $max_file\n";
print OUTF "# Scaled File: $out_file\n";
print OUTF "# Number of MaxDiff Answers: $num_max\n";
print OUTF "# Number of Unique Pairs: $num_pairs\n";
print OUTF "#\n";
print OUTF "# Score = Most - Least\n";
print OUTF "# Most = Percentage of times given pair was chosen as Most Illustrative\n";
print OUTF "# Least = Percentage of times given pair was chosen as Least Illustrative\n";
print OUTF "#\n";
#
foreach $item (@sorted) {
print OUTF "$item\n";
}
#
close(OUTF);
#
print "... done.\n";
#
#
#
#
#
#