-
Notifications
You must be signed in to change notification settings - Fork 1
/
anon.py
218 lines (195 loc) · 6.6 KB
/
anon.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#---------------1st Function------------------------
#Strips every label value so that there is no extra white space on either ends
#Also deletes any variable labels starting with variable name starting with "SCO" and on
import spss, re
def removeScoLabels(filename):
spss.Submit(filename)
spss.StartDataStep()
datasetObj = spss.Dataset()
varListObj = datasetObj.varlist
count = len(varListObj)
change = False
for i in datasetObj.varlist:
if i.label:
temp = str(i.label)
temp = str.strip(temp)
i.label = unicode(temp)
else:
s = 1-1
for i in range(count):
if re.search('\ASC0', datasetObj.varlist[i].name):
change = True
if change:
datasetObj.varlist[i].label = ''
spss.EndDataStep()
#-----------------2nd Function---------------------------
#Removes all digits in the Please rate variable labels
#so that the order of the question doesn't matter.
#Also removes the words "items" or "issues" because
#some questions had them and some didn't.
#Last it correctly prefixes the variable names.
import spss, re
def renameDoopLabels(filename):
spss.Submit(filename)
spss.StartDataStep()
datasetObj = spss.Dataset()
varListObj = datasetObj.varlist
prefix = ''
pren = 'EE'
for i in datasetObj.varlist:
print i.name
if i.label:
if re.search('\APlease rate the', i.label):
i.label = ''.join(k for k in i.label if not k.isdigit())
if re.search('\APlease rate the following items', i.label):
i.label = re.sub("items ", "", i.label, count=1)
if re.search('\APlease rate the following issues', i.label):
i.label = re.sub("issues ", "", i.label, count=1)
if re.search('\AExample ', i.label):
prefix = 'ex '
pren = 'EE'
i.name = 'EE' + i.name
if re.search('\AQuality of',i.label):
prefix = 'qh '
pren = 'EE'
i.name = 'EE' + i.name
if re.search('\ASoap Box', i.label):
prefix = 'sb '
pren = 'EE'
i.name = 'EE' + i.name
if re.search('\AInternational Aid', i.label):
prefix = 'ai '
pren = 'EE'
i.name = 'EE' + i.name
if re.search('\AFlood-Control', i.label):
prefix = 'fc '
pren = 'EE'
i.name = 'EE' + i.name
if re.search('\AFlood Control', i.label):
prefix = 'fc '
pren = 'EE'
i.name = 'EE' + i.name
if re.search('\ANurse Schedule', i.label):
prefix = 'ns '
pren = 'EE'
i.name = 'EE' + i.name
if re.search('\AWater Quality', i.label):
prefix = 'wq '
pren = 'EE'
i.name = 'EE' + i.name
if re.search('\AGrant Proposal', i.label):
prefix = 'gp '
pren = 'EE'
i.name = 'EE' + i.name
if re.search('\APedestrian Bridge', i.label):
prefix = 'pb '
pren = 'EE'
i.name = 'EE' + i.name
if re.search('\AFamine', i.label):
prefix = 'fam '
pren = 'DD'
i.name = 'DD' + i.name
if re.search('\AReporter', i.label):
prefix = 'rep '
pren = 'DD'
i.name = 'DD' + i.name
if re.search('\ASchool Board', i.label):
prefix = 'sch '
pren = 'DD'
i.name = 'DD' + i.name
if re.search('\ACancer', i.label):
prefix = 'can '
pren = 'DD'
i.name = 'DD' + i.name
if re.search('\ADemonstration', i.label):
prefix = 'dem '
pren = 'DD'
i.name = 'DD' + i.name
if re.search('\AConsider', i.label):
i.label = prefix + i.label
if re.search('\APlease rate', i.label):
i.label = prefix + i.label
if re.search('\ARate the', i.label):
i.label = prefix + i.label
if re.search('\AQ', i.name):
i.name = pren + i.name
if re.search('\A[a-d]_', i.name):
i.name = pren + i.name
if re.search('\A[a-b].', i.name):
i.name = pren + i.name
else:
print 'caught'
spss.EndDataStep()
#-----------------------3rd Function-----------------------------
#Creates a dictionary of variable names and labels.
#Run through each file pairs the label and name according to
#the dictionary adding to it if needed.
import spss, re
dict = {}
def rename(filename, dictionary):
prefix = 'GG'
spss.Submit(filename)
spss.StartDataStep()
datasetObj = spss.Dataset()
varListObj = datasetObj.varlist
for i in datasetObj.varlist:
if i.label:
if i.label in dict:
print i.label + ' ' + dict[i.label]
i.name = dict[i.label]
else:
if re.search('\AEE', i.name):
prefix = 'EE'
elif re.search('\ADD', i.name):
prefix = 'DD'
elif re.search('\ATeam', i.name):
prefix = 'TT'
else:
prefix = 'GG'
dict[i.label] = prefix + str(len(dict) + 1)
i.name = dict[i.label]
else:
print 'caught'
spss.EndDataStep()
#--------------------4th function------------------------
#Runs through each file in folder and adds a
#new variable called source_file so that when
#merged the original file will be known.
#This function automatically saves after completion
import os, spss
dir = 'C:\Users\Sean\Downloads\Spss' # Specify folder containing .sav files.
fils = sorted([fil for fil in os.listdir(dir) if fil.endswith('.sav')])
vallabs = ' '.join([str(num + 1) + '"%s"'%fil for num,fil in enumerate(fils)])
for num,fil in enumerate(fils):
num += 1
spss.Submit('''
get fil '%(dir)s/%(fil)s'.
compute source_file=%(num)d.
val lab source_file %(vallabs)s.
sav out '%(dir)s/%(fil)s'.
'''%locals())
spss.Submit('new fil.')
#------------------5th function----------------
#Merges each file in the folder
import os, spss
rdir = 'C:\Users\Sean\Downloads\Spss' #Please specify folder containing .sav files.
fils = sorted([fil for fil in os.listdir(rdir) if fil.endswith('.sav')])
spss.Submit('get file "%s/%s".'%(rdir,fils.pop(0)))
for rep in range(len(fils)/49 + 1):
spss.Submit('add files file=*/%s.'%'/'.join(['file="%s"'%os.path.join(rdir,fil) for fil in fils[49*rep:49*rep + min(49,len(fils)-49*rep)]]))
spss.Submit('exe.')
#----------Test function-----------------------------
#searches for matching label and prints with string length
import spss, re
def stringLen(filename):
spss.Submit(filename)
spss.StartDataStep()
datasetObj = spss.Dataset()
varListObj = datasetObj.varlist
for i in datasetObj.varlist:
if i.label:
if re.search('\Aqh', i.label):
print i.label + ' ' + str(len(i.label))
else:
s = 1 + 1
spss.EndDataStep()