-
Notifications
You must be signed in to change notification settings - Fork 1
/
clus_top.py
51 lines (43 loc) · 1.5 KB
/
clus_top.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from datetime import datetime
from config import *
from common import *
def get_top_cluster_tb_tm(tb,tm, conn):
"""
We assume the feature records may be stored in multiple tables, this function
generate the top-level clusters at a given time.
:param tb: the table where the feature records from time "tm" are being stored.
:param tm: the time where the target feature records were collected.
Returns: A dict stores the top-level clusters, the dict looks like:
<GID,CTITLE,CTMPLE,CKWS,HSERVER>: [<IP_1,TIME,CHASH_1>,... ,<IP_N,TIME,CHASH_N>]
"""
res={}
# ip_reg is where stores the target IP address.
sql="SELECT ip, chash, gid, ctitle, ctmpl, ckws, hserver FROM %s WHERE time = '%s' AND ip IN (SELECT ip FROM ip_reg) " % (tb,tm)
buff=run_sql_with_return(sql, conn)
for k in buff:
k1 = (k[0],tm,k[1])
k2 = k[2:]
if k2 in res:
res[k2].append(k1)
else:
res[k2] = [k1]
return res
def merge_top_clusters(target_clus,res_clus):
"""
Merge target_clus into res_clus.
Returns: res_clus. After merging all top-level clusters the dict would looks like:
<GID,CTITLE,CTMPLE,CKWS,HSERVER>: [<IP_1,TIME_1,CHASH_1>,... ,<IP_N,TIME_N,CHASH_N>]
"""
for k in target_clus:
res_clus[k] = res_clus[k] + target_clus[k] if k in res_clus else target_clus[k]
return res_clus
if __name__ == '__main__':
pass
"""
usage example:
res_clus={}
for tb,tm in [("test0","2012-12-21"),("test1","2012-12-22")]
clus=get_top_cluster_tb_tm(tb,tm)
res_clus=merge_top_clusters(clus,res_clus)
export_res(res_clus)
"""