forked from sudhof/politeness
-
Notifications
You must be signed in to change notification settings - Fork 0
/
model.py
118 lines (88 loc) · 3.17 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import sys
import os
import cPickle
"""
This file provides an interface to
a pre-trained politeness SVM.
"""
#####
# Ensure the proper python dependencies exist
try:
import numpy as np
except:
sys.stderr.write("Package not found: Politeness model requires python package numpy\n")
sys.exit(2)
try:
import scipy
from scipy.sparse import csr_matrix
except:
sys.stderr.write("Package not found: Politeness model requires python package scipy\n")
sys.exit(2)
try:
import sklearn
except:
sys.stderr.write("Package not found: Politeness model requires python package scikit-learn\n")
sys.exit(2)
try:
import nltk
except:
sys.stderr.write("Package not found: Politeness model requires python package nltk\n")
sys.exit(2)
####
# Check versions for sklearn, scipy, numpy, nltk
# Don't error out, just notify
packages2versions = [("scikit-learn", sklearn, "0.15.1"), ("numpy", np, "1.9.0"), ("nltk", nltk, "3.0.0"), ("scipy", scipy, "0.12.0")]
for name, package, expected_v in packages2versions:
if package.__version__ < expected_v:
sys.stderr.write("Warning: package '%s', expected version >= %s, detected %s. Code functionality not guaranteed.\n" % (name, expected_v, package.__version__))
####
from features.vectorizer import PolitenessFeatureVectorizer
####
# Serialized model filename
MODEL_FILENAME = os.path.join(os.path.split(__file__)[0], 'politeness-svm.p')
####
# Load model, initialize vectorizer
clf = cPickle.load(open(MODEL_FILENAME))
vectorizer = PolitenessFeatureVectorizer()
def score(request):
"""
:param request - The request document to score
:type request - dict with 'sentences' and 'parses' field
sample (taken from test_documents.py)--
{
'sentences': [
"Have you found the answer for your question?",
"If yes would you please share it?"
],
'parses': [
["csubj(found-3, Have-1)", "dobj(Have-1, you-2)", "root(ROOT-0, found-3)", "det(answer-5, the-4)", "dobj(found-3, answer-5)", "poss(question-8, your-7)", "prep_for(found-3, question-8)"],
["prep_if(would-3, yes-2)", "root(ROOT-0, would-3)", "nsubj(would-3, you-4)", "ccomp(would-3, please-5)", "nsubj(it-7, share-6)", "xcomp(please-5, it-7)"]
]
}
returns class probabilities as a dict
{
'polite': float,
'impolite': float
}
"""
# vectorizer returns {feature-name: value} dict
features = vectorizer.features(request)
fv = [features[f] for f in sorted(features.iterkeys())]
# Single-row sparse matrix
X = csr_matrix(np.asarray([fv]))
probs = clf.predict_proba(X)
# Massage return format
probs = {"polite": probs[0][1], "impolite": probs[0][0]}
return probs
if __name__ == "__main__":
"""
Sample classification of requests
"""
from test_documents import TEST_DOCUMENTS
for doc in TEST_DOCUMENTS:
probs = score(doc)
print "===================="
print "Text: ", doc['text']
print "\tP(polite) = %.3f" % probs['polite']
print "\tP(impolite) = %.3f" % probs['impolite']
print "\n"