-
Notifications
You must be signed in to change notification settings - Fork 1
/
dumpIntoFile.py
executable file
·98 lines (83 loc) · 2.84 KB
/
dumpIntoFile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/python
# -*- coding: utf-8 -*-
import MySQLdb as mdb
import sys, json
def dumpIntoDir():
# All the retrieved data are stored in this hash table structure
# { 'sender 1': [mail 1, mail 2, ...]
# 'sender 2': ...
# ...
# }
#
# And the mail object is as follows:
# mail =
# { 'mid': ..
# 'recipient': ..
# 'rtype': ..
# 'date': ..
# 'subject': ..
# 'body': ..
# }
doc = {}
con = mdb.connect('localhost', 'root', 'Fgla4Zp0', 'enron')
with con:
cur = con.cursor(mdb.cursors.DictCursor)
slist = ['jeff.dasovich@enron.com',
'kay.mann@enron.com',
'sara.shackleton@enron.com',
'tana.jones@enron.com',
'vince.kaminski@enron.com']
for sender in slist:
query = """
SELECT m.mid as mid, m.sender as sender, m.date as date,
m.subject as subject, m.body as body, r.rtype as type,
r.rvalue as recipient
FROM message m
inner join recipientinfo r
on m.mid = r.mid
WHERE m.sender = '%s' and
m.mid in (
select mid
from (
SELECT m.mid as mid, r.rvalue as recipient, count(*) as count
FROM message m
inner join recipientinfo r
on m.mid = r.mid
where m.sender in
(select * from topSenders) and
r.rtype = 'TO'
GROUP by mid) as T
WHERE T.count = 1
) and
r.rvalue in (
select recipient from (
select m.mid as mid, m.sender as sender, r.rvalue as recipient,
count(*) as sentcount
from message m
inner join recipientinfo r
on m.mid = r.mid
where m.sender = '%s' and
r.rtype = 'TO'
group by r.rvalue
order by sentcount asc) as S
where S.sentcount > 10
)
""" % (sender, sender)
cur.execute(query)
while True:
row = cur.fetchone()
if not row:
break
# construct a mail object
mail = {'mid': row['mid'],
'recipient': row['recipient'],
'rtype': row['type'],
'date': unicode(row['date'].replace(microsecond=0)),
'subject': row['subject'],
'body': row['body']}
if not sender in doc:
doc[sender] = []
doc[sender].append(mail)
return doc
if __name__ == "__main__":
print json.dumps(dumpIntoDir())