From 9db61ce65da65c2a7dd557417b3fef2138fd9bc9 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Tue, 16 Apr 2024 12:03:17 -0700 Subject: [PATCH 01/35] Compute CFD of traffic --- imrs/imrs_frequency.py | 78 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 imrs/imrs_frequency.py diff --git a/imrs/imrs_frequency.py b/imrs/imrs_frequency.py new file mode 100644 index 0000000..3acc9c9 --- /dev/null +++ b/imrs/imrs_frequency.py @@ -0,0 +1,78 @@ +# +# This script will try to build a sample of the input file. +# The purpose of the sample is, get a realistic test file +# that is small enough for iterative development, measures, +# etc., yet big enough to obtain statistically significant +# results. +# +# Usage: imrs_sample.py +# + +import sys +import traceback +import random +import time +import concurrent.futures +import math +import os +from os import listdir +from os.path import isfile, isdir, join + +def parse_imrs(line): + ok = False + ip = "" + count = 0 + try: + parts = line.split(",") + ip = parts[0].strip() + count = int(parts[1].strip()) + ok = True + except Exception as e: + traceback.print_exc() + print("Cannot parse IMRS Record:\n" + line.strip() + "\nException: " + str(e)) + return ok, ip, count + + +# main + +if len(sys.argv) < 3 or len(sys.argv) > 4: + print("Usage: imrs_frequency.py [load_step%]") + exit(1) +imrs_file = sys.argv[1] +output_file = sys.argv[2] +load_step = 0 +if len(sys.argv) == 4: + s_load_step = sys.argv[3] + if not s_load_step.endswidth("%s"): + print("Load step should be %, e.g. 1%, 0.1%, not " + s_load_step) + exit(1) + else: + load_step = float(s_load_step[:-1])/100.0 + +load_vec = [] + +total_load = 0 +for line in open(imrs_file,"r"): + ok, ip, use_count = parse_imrs(line) + if ok: + load_vec.append(use_count) + total_load += use_count + +load_vec.sort(reverse=True) + +with open(output_file, "w") as F: + cumulative_use = 0 + cumulative_count = 0 + delta_threshold = int(total_load*load_step) + threshold = 0 + last_written = 0 + F.write("Count, Queries, frequency,\n") + for use_count in load_vec: + cumulative_count += 1 + cumulative_use += use_count + if cumulative_use >= threshold: + F.write(str(cumulative_count) + "," + str(cumulative_use) + "," + str(cumulative_use/total_load) + ",\n") + threshold += delta_threshold + last_written = cumulative_count + if last_written < cumulative_count: + F.write(str(cumulative_count) + "," + str(cumulative_use) + "," + str(cumulative_use/total_load)) \ No newline at end of file From e141e43782642b899b44dddf7797db9bc0438ad8 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Tue, 16 Apr 2024 12:07:46 -0700 Subject: [PATCH 02/35] Fix cfd computation --- imrs/imrs_frequency.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imrs/imrs_frequency.py b/imrs/imrs_frequency.py index 3acc9c9..623ff7c 100644 --- a/imrs/imrs_frequency.py +++ b/imrs/imrs_frequency.py @@ -43,7 +43,7 @@ def parse_imrs(line): load_step = 0 if len(sys.argv) == 4: s_load_step = sys.argv[3] - if not s_load_step.endswidth("%s"): + if not s_load_step.endswith("%"): print("Load step should be %, e.g. 1%, 0.1%, not " + s_load_step) exit(1) else: From 4c5ee256f930d05a4553dd44f15a74c0b7474e82 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Tue, 16 Apr 2024 13:19:34 -0700 Subject: [PATCH 03/35] Add linal new line. --- imrs/imrs_frequency.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imrs/imrs_frequency.py b/imrs/imrs_frequency.py index 623ff7c..97dca88 100644 --- a/imrs/imrs_frequency.py +++ b/imrs/imrs_frequency.py @@ -75,4 +75,4 @@ def parse_imrs(line): threshold += delta_threshold last_written = cumulative_count if last_written < cumulative_count: - F.write(str(cumulative_count) + "," + str(cumulative_use) + "," + str(cumulative_use/total_load)) \ No newline at end of file + F.write(str(cumulative_count) + "," + str(cumulative_use) + "," + str(cumulative_use/total_load) + ",\n") \ No newline at end of file From 5569a15f531ba01e5f5189983bbf7bcd807c4180 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Fri, 19 Apr 2024 11:47:30 -0700 Subject: [PATCH 04/35] Add list extraction script --- imrs/imrs_apnic_list.py | 107 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 imrs/imrs_apnic_list.py diff --git a/imrs/imrs_apnic_list.py b/imrs/imrs_apnic_list.py new file mode 100644 index 0000000..2e34a5c --- /dev/null +++ b/imrs/imrs_apnic_list.py @@ -0,0 +1,107 @@ +# +# This script will try to build a sample of the input file. +# The purpose of the sample is, get a realistic test file +# that is small enough for iterative development, measures, +# etc., yet big enough to obtain statistically significant +# results. +# +# Usage: imrs_sample.py +# + +import sys +import traceback +import random +import time +import concurrent.futures +import math +import os +from os import listdir +from os.path import isfile, isdir, join + +class imrs_apnic_item: + def __init__(self, ip, apnic_use, imrs_use): + self.ip = ip + self.apnic_use = apnic_use + self.imrs_use = imrs_use + + def head(): + s = "IP, apnic_use, imrs_use," + return s + + + def text(self): + s = ip + "," + str(apnic_use) + "," + str(imrs_use) + "," + return s + +class apnic_record: + def __init__(self): + self.ip = "" + self.use_count = 0 + self.seen_in_imrs = False + self.imrs_count = 0 + + def parse(self, line): + parts = line.split(",") + nb_parts = len(parts) + if nb_parts >= 4: + try: + self.ip = parts[0].strip() + self.use_count = int(parts[3].strip()) + except Exception as e: + traceback.print_exc() + print("Cannot parse APNIC Record:\n" + line.strip() + "\nException: " + str(e)) + return False + return True + +def parse_imrs(line): + ok = False + ip = "" + count = 0 + try: + parts = line.split(",") + ip = parts[0].strip() + count = int(parts[1].strip()) + ok = True + except Exception as e: + traceback.print_exc() + print("Cannot parse IMRS Record:\n" + line.strip() + "\nException: " + str(e)) + return ok, ip, count + + +# main + +if len(sys.argv) != 4: + print("Usage: imrs_frequency.py ") + exit(1) +imrs_file = sys.argv[1] +apnic_file = sys.argv[2] +output_file = sys.argv[2] + +if len(sys.argv) == 4: + s_load_step = sys.argv[3] + if not s_load_step.endswith("%"): + print("Load step should be %, e.g. 1%, 0.1%, not " + s_load_step) + exit(1) + else: + load_step = float(s_load_step[:-1])/100.0 + +apnic_dict = dict() + +for line in open(apnic_file,"r"): + apnic = apnic_record() + if apnic.parse(line): + apnic_dict[apnic.ip] = apnic + +for line in open(imrs_file,"r"): + ok, ip, count = parse_imrs(line) + if ok: + if ip in apnic_dict: + apnic_dict[ip].seen_in_imrs = True + apnic_dict[ip].imrs_count = count + +with open(output_file, "w") as F: + F.write("IP, apnic_use, imrs_use,\n") + for ip in apnic_dict: + apnic_entry = apnic_dict[ip] + if apnic_entry.seen_in_imrs: + F.write(apnic_entry.ip + "," + str(apnic_entry.use_count) + "," + str(apnic_entry.imrs_count) + "\n") From 1ce2dce7dc46cdbb8e5116177a50c71d71bca69f Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Fri, 19 Apr 2024 11:53:39 -0700 Subject: [PATCH 05/35] Fix order --- imrs/imrs_apnic_list.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/imrs/imrs_apnic_list.py b/imrs/imrs_apnic_list.py index 2e34a5c..35aecf6 100644 --- a/imrs/imrs_apnic_list.py +++ b/imrs/imrs_apnic_list.py @@ -73,17 +73,9 @@ def parse_imrs(line): if len(sys.argv) != 4: print("Usage: imrs_frequency.py ") exit(1) -imrs_file = sys.argv[1] -apnic_file = sys.argv[2] -output_file = sys.argv[2] - -if len(sys.argv) == 4: - s_load_step = sys.argv[3] - if not s_load_step.endswith("%"): - print("Load step should be %, e.g. 1%, 0.1%, not " + s_load_step) - exit(1) - else: - load_step = float(s_load_step[:-1])/100.0 +apnic_file = sys.argv[1] +imrs_file = sys.argv[2] +output_file = sys.argv[3] apnic_dict = dict() From 36218bc929e7167f0725cc1ca703d6009ad2d06f Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Fri, 19 Apr 2024 11:55:32 -0700 Subject: [PATCH 06/35] Fix usage --- imrs/imrs_apnic_list.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/imrs/imrs_apnic_list.py b/imrs/imrs_apnic_list.py index 35aecf6..4981377 100644 --- a/imrs/imrs_apnic_list.py +++ b/imrs/imrs_apnic_list.py @@ -71,10 +71,10 @@ def parse_imrs(line): # main if len(sys.argv) != 4: - print("Usage: imrs_frequency.py ") + print("Usage: imrs_apnic_list.py ") exit(1) -apnic_file = sys.argv[1] -imrs_file = sys.argv[2] +imrs_file = sys.argv[1] +apnic_file = sys.argv[2] output_file = sys.argv[3] apnic_dict = dict() From 15aada5bdb48777e632636ac1e457ea59a6490a0 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 15:33:18 -0700 Subject: [PATCH 07/35] Compute monthly instances summary --- imrs/imrs_record.py | 90 ++++++++++++++++++++++++++++++++++++++++ src/imrs_instances.py | 95 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 185 insertions(+) create mode 100644 imrs/imrs_record.py create mode 100644 src/imrs_instances.py diff --git a/imrs/imrs_record.py b/imrs/imrs_record.py new file mode 100644 index 0000000..9a6984c --- /dev/null +++ b/imrs/imrs_record.py @@ -0,0 +1,90 @@ +# +# This script will try to build a sample of the input file. +# The purpose of the sample is, get a realistic test file +# that is small enough for iterative development, measures, +# etc., yet big enough to obtain statistically significant +# results. +# +# Usage: imrs_sample.py +# + +import sys +import traceback +import random +import time +import concurrent.futures +import math +import os +from os import listdir +from os.path import isfile, isdir, join + + +def imrs_parse_one_number(parts, parsed): + v = 0 + p = parts[parsed].strip() + v = int(parts[parsed]) + parsed += 1 + return v, parsed + +def imrs_parse_one_vector(parts, parsed, v): + for i in range(0, len(v)): + v[i],parsed = imrs_parse_one_number(parts, parsed) + return parsed + +class imrs_hyperloglog: + def __init__(self): + self.E = 0.0 + self.hllv=[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0] + pass; + def parse(self, parts, parsed): + self.E = float(parts[parsed].strip()) + parsed += 1 + for i in range(0, len(self.hllv)): + self.hllv[i], parsed = imrs_parse_one_number(parts,parsed) + return parsed + +class imrs_record: + def __init__(self): + self.ip = "" + self.query_volume = 0 + self.hourly_volume = [ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] + self.daily_volume = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] + self.arpa_count = 0 + self.no_such_domain_queries = 0 + self.no_such_domain_reserved = 0 + self.no_such_domain_frequent = 0 + self.no_such_domain_chromioids = 0 + self.tld_counts = [0,0,0,0,0,0,0,0] + self.tld_hyperlog = imrs_hyperloglog() + self.sld_counts = [0,0,0,0,0,0,0,0] + self.sld_hyperlog = imrs_hyperloglog() + self.name_parts = [0,0,0,0,0,0,0,0] + self.rr_types = [0,0,0,0,0,0,0,0] + self.locales = [0,0,0,0,0,0,0,0] + + def parse_imrs(self, line): + ok = False + try: + parts = line.split(",") + self.ip = parts[0].strip() + parsed = 1 + self.query_volume, parsed = imrs_parse_one_number(parts, parsed) + parsed = imrs_parse_one_vector(parts, parsed, self.hourly_volume) + parsed = imrs_parse_one_vector(parts, parsed, self.daily_volume) + self.arpa_count, parsed = imrs_parse_one_number(parts, parsed) + self.no_such_domain_queries, parsed = imrs_parse_one_number(parts, parsed) + self.no_such_domain_reserved, parsed = imrs_parse_one_number(parts, parsed) + self.no_such_domain_frequent, parsed = imrs_parse_one_number(parts, parsed) + self.no_such_domain_chromioids, parsed = imrs_parse_one_number(parts, parsed) + parsed = imrs_parse_one_vector(parts, parsed, self.tld_counts) + parsed = self.tld_hyperlog = imrs_hyperloglog(parts, parsed) + parsed = imrs_parse_one_vector(parts, parsed, self.sld_counts) + parsed = self.sld_hyperlog = imrs_hyperloglog(parts, parsed) + parsed = imrs_parse_one_vector(parts, parsed, self.name_parts) + parsed = imrs_parse_one_vector(parts, parsed, self.rr_types) + parsed = imrs_parse_one_vector(parts, parsed, self.locales) + ok = True + except Exception as e: + traceback.print_exc() + print("Cannot parse IMRS Record after " + str(parsed) + " parts:\n" + line.strip() + "\nException: " + str(e)) + return ok diff --git a/src/imrs_instances.py b/src/imrs_instances.py new file mode 100644 index 0000000..99c89d1 --- /dev/null +++ b/src/imrs_instances.py @@ -0,0 +1,95 @@ +#!/usr/bin/python +# coding=utf-8 +# +# This computes the montly totals for all the instances found +# in the east and west folders. The raw data is organized as: +# - ipstats / west / one folder per instance / files per date +# / east / one folder per instance / files per date +# The first processing step is to collect the list of file names +# for each instance: one file per date, possibly more if the +# same instance is present in east and west. +# +# + +import sys +import traceback +import random +import time +import concurrent.futures +import os +from os import listdir +from os.path import isfile, isdir, join + +def prepare_instance_list(ipstats_folder, month): + instances = dict() + for pole in [ "east", "west" ]: + pole_dir = join(ipstats_folder, pole) + folder_pole = listdir(pole_dir) + for instance_id in folder_pole: + instance_path = join(folder_pole, instance_id) + if is_dir(instance_path): + if not instance_id in instances: + instances[instance_id] = [] + file_list = listdir(instance_folder) + for file_name in file_list: + instances[instance_id].append(join(instance_folder, file_name)) + return instances + +def check_or_create_dir(dir_path): + if not isdir(dir_path): + try: + os.mkdir(dir_path) + except Exception as e: + traceback.print_exc() + print("Cannot create <" + dir_path + ">\nException: " + str(e)) + return False + return True + +def process_instance(instance_id, month, result_folder, tmp_folder, ithitool, instances, do_debug): + result_file = instance-id + "_" + month + "-ipstats.csv" + result_path = join(result_folder, result_file) + tmp_file = instance-id + "_" + month + "-file-list.txt" + tmp_path = join(tmp_folder, tmp_file) + with open(tmp_path,"wt") as F: + for file_name in instances[instance_id]: + F.write(file_name + "\n") + merge_cmd = ithitool + ' -I ' + result_path + " " + result_path + cmd_ret = os.system(merge_cmd) + if cmd_ret == 0: + if do_debug: + print(result_file + ": computed.") + else: + print(result_file + ": computation failed, error:" + str(cmd_ret)) + return False + return True + +# main +if len(sys.argv) < 4 or len(sys.argv) > 5 or \ + (len(sys.argv) == 5 and sys.argv[5] != "debug"): + print("Usage: imrs_instances [debug]") + print("There are just " + str(len(sys.argv)) + " arguments.") + exit (1) +ipstats_folder = sys.argv[1] +month = sys.argv[2] +ithitool = sys.argv[3] +do_debug = len(sys.argv) == 5 + +print("Writing instance monthly files for: " + ipstats_folder) +try: + instances = prepare_instances_list(ipstats_folder, month) + result_folder = join(ipstats_folder, "instances") + tmp_folder = join(ipstats_folder, "tmp") + if check_or_create_dir(result_folder) and \ + check_or_create_dir(tmp_folder): + for instance_id in instances: + if len(instances[instance_id]) > 0: + if not process_instance(instance_id, month, result_folder, tmp_folder, ithitool, instances, do_debug): + exit(1) +except Exception as exc: + traceback.print_exc() + print('\nCode generated an exception: %s' % (exc)) + + + + + From 452f2c295e79cccc2048701ae41d63da0ec0750e Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 15:37:09 -0700 Subject: [PATCH 08/35] fix typo --- src/imrs_instances.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imrs_instances.py b/src/imrs_instances.py index 99c89d1..eb23d70 100644 --- a/src/imrs_instances.py +++ b/src/imrs_instances.py @@ -65,7 +65,7 @@ def process_instance(instance_id, month, result_folder, tmp_folder, ithitool, in # main if len(sys.argv) < 4 or len(sys.argv) > 5 or \ - (len(sys.argv) == 5 and sys.argv[5] != "debug"): + (len(sys.argv) == 5 and sys.argv[4] != "debug"): print("Usage: imrs_instances [debug]") print("There are just " + str(len(sys.argv)) + " arguments.") exit (1) From d89b1b71122464d4bda36317d770222c1b94278e Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 15:38:26 -0700 Subject: [PATCH 09/35] One more typo --- src/imrs_instances.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imrs_instances.py b/src/imrs_instances.py index eb23d70..cb13e2d 100644 --- a/src/imrs_instances.py +++ b/src/imrs_instances.py @@ -20,7 +20,7 @@ from os import listdir from os.path import isfile, isdir, join -def prepare_instance_list(ipstats_folder, month): +def prepare_instances_list(ipstats_folder, month): instances = dict() for pole in [ "east", "west" ]: pole_dir = join(ipstats_folder, pole) From f5f7fc03dec374d02ae0678b2f3bbf00c4b35f13 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 15:41:33 -0700 Subject: [PATCH 10/35] typo in prepare_instances_list --- src/imrs_instances.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imrs_instances.py b/src/imrs_instances.py index cb13e2d..443e7ab 100644 --- a/src/imrs_instances.py +++ b/src/imrs_instances.py @@ -26,7 +26,7 @@ def prepare_instances_list(ipstats_folder, month): pole_dir = join(ipstats_folder, pole) folder_pole = listdir(pole_dir) for instance_id in folder_pole: - instance_path = join(folder_pole, instance_id) + instance_path = join(pole_dir, instance_id) if is_dir(instance_path): if not instance_id in instances: instances[instance_id] = [] From f1a234f0a69c4fbf7d1a19767d968b16282202af Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 15:42:15 -0700 Subject: [PATCH 11/35] Slightly better list of instances --- src/imrs_instances.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/imrs_instances.py b/src/imrs_instances.py index 443e7ab..b07aaec 100644 --- a/src/imrs_instances.py +++ b/src/imrs_instances.py @@ -30,9 +30,9 @@ def prepare_instances_list(ipstats_folder, month): if is_dir(instance_path): if not instance_id in instances: instances[instance_id] = [] - file_list = listdir(instance_folder) - for file_name in file_list: - instances[instance_id].append(join(instance_folder, file_name)) + file_list = listdir(instance_folder) + for file_name in file_list: + instances[instance_id].append(join(instance_folder, file_name)) return instances def check_or_create_dir(dir_path): From be281b9bed60375b9b1ca81db3f6251bf79f7969 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 15:43:11 -0700 Subject: [PATCH 12/35] fix typo is_dir --- src/imrs_instances.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imrs_instances.py b/src/imrs_instances.py index b07aaec..9472e87 100644 --- a/src/imrs_instances.py +++ b/src/imrs_instances.py @@ -27,7 +27,7 @@ def prepare_instances_list(ipstats_folder, month): folder_pole = listdir(pole_dir) for instance_id in folder_pole: instance_path = join(pole_dir, instance_id) - if is_dir(instance_path): + if isdir(instance_path): if not instance_id in instances: instances[instance_id] = [] file_list = listdir(instance_folder) From ae96f9ae83d19700ec1ffe2e48232f9b421abc8b Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 15:44:24 -0700 Subject: [PATCH 13/35] folder variable to instance_folder --- src/imrs_instances.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/imrs_instances.py b/src/imrs_instances.py index 9472e87..4001803 100644 --- a/src/imrs_instances.py +++ b/src/imrs_instances.py @@ -26,8 +26,8 @@ def prepare_instances_list(ipstats_folder, month): pole_dir = join(ipstats_folder, pole) folder_pole = listdir(pole_dir) for instance_id in folder_pole: - instance_path = join(pole_dir, instance_id) - if isdir(instance_path): + instance_folder = join(pole_dir, instance_id) + if isdir(instance_folder): if not instance_id in instances: instances[instance_id] = [] file_list = listdir(instance_folder) From 87e69e1b7bf335c336a8d40b086156f2e39296a6 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 15:45:22 -0700 Subject: [PATCH 14/35] fix instance-id typo --- src/imrs_instances.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imrs_instances.py b/src/imrs_instances.py index 4001803..6563a9e 100644 --- a/src/imrs_instances.py +++ b/src/imrs_instances.py @@ -48,7 +48,7 @@ def check_or_create_dir(dir_path): def process_instance(instance_id, month, result_folder, tmp_folder, ithitool, instances, do_debug): result_file = instance-id + "_" + month + "-ipstats.csv" result_path = join(result_folder, result_file) - tmp_file = instance-id + "_" + month + "-file-list.txt" + tmp_file = instance_id + "_" + month + "-file-list.txt" tmp_path = join(tmp_folder, tmp_file) with open(tmp_path,"wt") as F: for file_name in instances[instance_id]: From 1d8e9db46f8f6f97c495bd97791f6f6ebabbc01d Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 15:46:40 -0700 Subject: [PATCH 15/35] another fix --- src/imrs_instances.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imrs_instances.py b/src/imrs_instances.py index 6563a9e..23c76ba 100644 --- a/src/imrs_instances.py +++ b/src/imrs_instances.py @@ -46,7 +46,7 @@ def check_or_create_dir(dir_path): return True def process_instance(instance_id, month, result_folder, tmp_folder, ithitool, instances, do_debug): - result_file = instance-id + "_" + month + "-ipstats.csv" + result_file = instance_id + "_" + month + "-ipstats.csv" result_path = join(result_folder, result_file) tmp_file = instance_id + "_" + month + "-file-list.txt" tmp_path = join(tmp_folder, tmp_file) From 32aa60af6008cfacb6e04edc3f74d4b46fcf185c Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 15:48:34 -0700 Subject: [PATCH 16/35] fix tmp_path --- src/imrs_instances.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imrs_instances.py b/src/imrs_instances.py index 23c76ba..6051d40 100644 --- a/src/imrs_instances.py +++ b/src/imrs_instances.py @@ -53,7 +53,7 @@ def process_instance(instance_id, month, result_folder, tmp_folder, ithitool, in with open(tmp_path,"wt") as F: for file_name in instances[instance_id]: F.write(file_name + "\n") - merge_cmd = ithitool + ' -I ' + result_path + " " + result_path + merge_cmd = ithitool + ' -I ' + result_path + " " + tmp_path cmd_ret = os.system(merge_cmd) if cmd_ret == 0: if do_debug: From 181a7522fd036fa11d194e8b3e5b2b4076dc9d56 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 18:08:07 -0700 Subject: [PATCH 17/35] Add monthly IP count script --- src/imrs_monthly_ip.py | 91 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 src/imrs_monthly_ip.py diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py new file mode 100644 index 0000000..668bd8f --- /dev/null +++ b/src/imrs_monthly_ip.py @@ -0,0 +1,91 @@ +#!/usr/bin/python +# coding=utf-8 +# +# This processes a list of monthly files (instances or clusters) +# and extracts a table, with entries: +# instance, nb_ip, nb_queries +# or +# cluster, instances, nb_ip, nb_queries +# where "nb_ip" is the number of IP addresses seens by this cluster, +# and "nb_queries" is the total number of queries +# +# usage: imrs_montly_ip input_folder output_file + +import sys +import traceback +import random +import time +import concurrent.futures +import os +from os import listdir +from os.path import isfile, isdir, join + +def parse_imrs(line): + ok = False + ip = "" + count = 0 + try: + parts = line.split(",") + ip = parts[0].strip() + count = int(parts[1].strip()) + ok = True + except Exception as e: + traceback.print_exc() + print("Cannot parse IMRS Record:\n" + line.strip() + "\nException: " + str(e)) + return ok, ip, count + +# main +if len(sys.argv) != 3: + print("usage: imrs_montly_ip input_folder output_file") + exit(1) +input_folder = argv[2] +output_file = argv[3] +sep = '_' +column_1 = "instances" +is_instances = True +if input_folder[:-1].endswith("monthly"): + sep = '.' + column_1 = clusters + is_instances = False +clusters = dict() +if is_instances: + file_list = listdir(input_folder) + for file_name in file_list: + parts = file_name.split("_") + first_parts = parts[0].split('-') + cluster_id = parts[1] + "-" + parts[2] + if not cluster_id in clusters: + clusters[cluster_id] = [] + clusters[cluster_id].append(file_name) +else: + file_list = listdir(input_folder) + for file_name in file_list: + parts = file_name.split(".") + cluster_id = parts[0] + if not cluster_id in clusters: + clusters[cluster_id] = [] + clusters[cluster_id].append(file_name) + +id_list = sorted(list(clusters.keys())) + +with open(output_file) as F: + F.write("Cluster, Instance, nb_IP, nb_queries,\n") + for cluster_id in id_list: + total_ip = 0 + total_queries = 0 + for file_name in clusters[cluster_id]: + file_path = join(input_folder, file_name) + nb_ip = 0 + nb_queries = 0 + for line in file_path: + ok,ip,count = parse_imrs(line) + if ok: + nb_ip += 1 + nb_queries += count + if is_instances: + file_parts = file_name.split("_") + instance_id = parts[0] + F.write(cluster_id + "," + instance_id + "," + str(nb_ip) + "," + str(nb_queries)) + total_ip += nb_ip + total_queries += nb_queries + F.write(cluster_id + "," + total + "," + str(total_ip) + "," + str(total_queries) \ No newline at end of file From 72539c56f551c212107f28f05c4f6e3960de350f Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 18:10:06 -0700 Subject: [PATCH 18/35] Close parenthesis --- src/imrs_monthly_ip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py index 668bd8f..4b691e4 100644 --- a/src/imrs_monthly_ip.py +++ b/src/imrs_monthly_ip.py @@ -88,4 +88,4 @@ def parse_imrs(line): F.write(cluster_id + "," + instance_id + "," + str(nb_ip) + "," + str(nb_queries)) total_ip += nb_ip total_queries += nb_queries - F.write(cluster_id + "," + total + "," + str(total_ip) + "," + str(total_queries) \ No newline at end of file + F.write(cluster_id + "," + total + "," + str(total_ip) + "," + str(total_queries)) From 9b71d23314621462d73ccac0443b0844a8b8aeb1 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 18:13:24 -0700 Subject: [PATCH 19/35] Revise parameters parsing --- src/imrs_monthly_ip.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py index 4b691e4..606aca5 100644 --- a/src/imrs_monthly_ip.py +++ b/src/imrs_monthly_ip.py @@ -38,15 +38,13 @@ def parse_imrs(line): if len(sys.argv) != 3: print("usage: imrs_montly_ip input_folder output_file") exit(1) -input_folder = argv[2] -output_file = argv[3] -sep = '_' -column_1 = "instances" -is_instances = True -if input_folder[:-1].endswith("monthly"): - sep = '.' - column_1 = clusters - is_instances = False +input_folder = sys.argv[2] +output_file = sys.argv[3] +is_instances = not input_folder[:-1].endswith("monthly") +if is_instances: + print("From cluster monthly, " + input_folder + " compute " + output_file) +else: + print("From alliances monthly, " + input_folder + " compute " + output_file) clusters = dict() if is_instances: file_list = listdir(input_folder) From 8139e33aa30751de73c3ac685dafe188b51d3a78 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 18:13:50 -0700 Subject: [PATCH 20/35] Fix typo --- src/imrs_monthly_ip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py index 606aca5..0674ab0 100644 --- a/src/imrs_monthly_ip.py +++ b/src/imrs_monthly_ip.py @@ -44,7 +44,7 @@ def parse_imrs(line): if is_instances: print("From cluster monthly, " + input_folder + " compute " + output_file) else: - print("From alliances monthly, " + input_folder + " compute " + output_file) + print("From instances monthly, " + input_folder + " compute " + output_file) clusters = dict() if is_instances: file_list = listdir(input_folder) From 401ea44a3fb95e55e8fc2256261338133366ce1f Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 18:15:10 -0700 Subject: [PATCH 21/35] Fix arg index --- src/imrs_monthly_ip.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py index 0674ab0..9e34bbb 100644 --- a/src/imrs_monthly_ip.py +++ b/src/imrs_monthly_ip.py @@ -38,8 +38,8 @@ def parse_imrs(line): if len(sys.argv) != 3: print("usage: imrs_montly_ip input_folder output_file") exit(1) -input_folder = sys.argv[2] -output_file = sys.argv[3] +input_folder = sys.argv[1] +output_file = sys.argv[2] is_instances = not input_folder[:-1].endswith("monthly") if is_instances: print("From cluster monthly, " + input_folder + " compute " + output_file) From 41a1f890c1ce5d5d97fb1582b179223807b482a7 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 18:16:36 -0700 Subject: [PATCH 22/35] fix open call --- src/imrs_monthly_ip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py index 9e34bbb..1c9cf79 100644 --- a/src/imrs_monthly_ip.py +++ b/src/imrs_monthly_ip.py @@ -66,7 +66,7 @@ def parse_imrs(line): id_list = sorted(list(clusters.keys())) -with open(output_file) as F: +with open(output_file, "w") as F: F.write("Cluster, Instance, nb_IP, nb_queries,\n") for cluster_id in id_list: total_ip = 0 From 77f2dca2378e38279e99e376f75fa14423870eaa Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 18:22:35 -0700 Subject: [PATCH 23/35] Add test of input line --- src/imrs_monthly_ip.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py index 1c9cf79..086925b 100644 --- a/src/imrs_monthly_ip.py +++ b/src/imrs_monthly_ip.py @@ -26,9 +26,12 @@ def parse_imrs(line): count = 0 try: parts = line.split(",") - ip = parts[0].strip() - count = int(parts[1].strip()) - ok = True + if len(parts) >= 2: + ip = parts[0].strip() + count = int(parts[1].strip()) + ok = True + else: + print("Line <" + line.strip() + " has only " + str(len(parts)) + " parts.") except Exception as e: traceback.print_exc() print("Cannot parse IMRS Record:\n" + line.strip() + "\nException: " + str(e)) @@ -42,9 +45,9 @@ def parse_imrs(line): output_file = sys.argv[2] is_instances = not input_folder[:-1].endswith("monthly") if is_instances: - print("From cluster monthly, " + input_folder + " compute " + output_file) -else: print("From instances monthly, " + input_folder + " compute " + output_file) +else: + print("From cluster monthly, " + input_folder + " compute " + output_file) clusters = dict() if is_instances: file_list = listdir(input_folder) From 5af35da757ed7f8ec73ba8921002984fc4197089 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 18:23:19 -0700 Subject: [PATCH 24/35] Fix last output line --- src/imrs_monthly_ip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py index 086925b..d6557bd 100644 --- a/src/imrs_monthly_ip.py +++ b/src/imrs_monthly_ip.py @@ -89,4 +89,4 @@ def parse_imrs(line): F.write(cluster_id + "," + instance_id + "," + str(nb_ip) + "," + str(nb_queries)) total_ip += nb_ip total_queries += nb_queries - F.write(cluster_id + "," + total + "," + str(total_ip) + "," + str(total_queries)) + F.write(cluster_id + ", total ," + str(total_ip) + "," + str(total_queries)) From 3d794b5025672b6e0f06b69b42df22b746c81a0d Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 18:26:24 -0700 Subject: [PATCH 25/35] propertly open data files --- src/imrs_monthly_ip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py index d6557bd..27a8c37 100644 --- a/src/imrs_monthly_ip.py +++ b/src/imrs_monthly_ip.py @@ -78,7 +78,7 @@ def parse_imrs(line): file_path = join(input_folder, file_name) nb_ip = 0 nb_queries = 0 - for line in file_path: + for line in open(file_path, "r"): ok,ip,count = parse_imrs(line) if ok: nb_ip += 1 From 195616b846bbc640a05dd1975d4016b6bc96a587 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 18:37:15 -0700 Subject: [PATCH 26/35] Add progress reports --- src/imrs_monthly_ip.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py index 27a8c37..0ac8c0c 100644 --- a/src/imrs_monthly_ip.py +++ b/src/imrs_monthly_ip.py @@ -49,6 +49,7 @@ def parse_imrs(line): else: print("From cluster monthly, " + input_folder + " compute " + output_file) clusters = dict() +nb_files = 0 if is_instances: file_list = listdir(input_folder) for file_name in file_list: @@ -58,6 +59,7 @@ def parse_imrs(line): if not cluster_id in clusters: clusters[cluster_id] = [] clusters[cluster_id].append(file_name) + nb_files += 1 else: file_list = listdir(input_folder) for file_name in file_list: @@ -66,15 +68,21 @@ def parse_imrs(line): if not cluster_id in clusters: clusters[cluster_id] = [] clusters[cluster_id].append(file_name) + nb_files +=1 + +print("Found " + str(len(clusters)) + " clusters, " + str(nb_files) + " files.") id_list = sorted(list(clusters.keys())) with open(output_file, "w") as F: F.write("Cluster, Instance, nb_IP, nb_queries,\n") for cluster_id in id_list: + sys.stdout.write(cluster_id) total_ip = 0 total_queries = 0 for file_name in clusters[cluster_id]: + sys.stdout.write(".") + sys.stdout.flush() file_path = join(input_folder, file_name) nb_ip = 0 nb_queries = 0 @@ -86,7 +94,7 @@ def parse_imrs(line): if is_instances: file_parts = file_name.split("_") instance_id = parts[0] - F.write(cluster_id + "," + instance_id + "," + str(nb_ip) + "," + str(nb_queries)) + F.write(cluster_id + "," + instance_id + "," + str(nb_ip) + "," + str(nb_queries) + ",\n") total_ip += nb_ip total_queries += nb_queries - F.write(cluster_id + ", total ," + str(total_ip) + "," + str(total_queries)) + F.write(cluster_id + ", total ," + str(total_ip) + "," + str(total_queries) + ",\n") From e8c4dbd002022a15bf25942b4f0337fbd9649851 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 18:39:45 -0700 Subject: [PATCH 27/35] Better logs --- src/imrs_monthly_ip.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py index 0ac8c0c..6dae7fc 100644 --- a/src/imrs_monthly_ip.py +++ b/src/imrs_monthly_ip.py @@ -98,3 +98,5 @@ def parse_imrs(line): total_ip += nb_ip total_queries += nb_queries F.write(cluster_id + ", total ," + str(total_ip) + "," + str(total_queries) + ",\n") + sys.stdout.write("\n") + print("All done.") From 6ddda9a9ff37fb3d00797a6ec04570ea8cd75ab6 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 18:53:08 -0700 Subject: [PATCH 28/35] compute aggregated ip count. --- src/imrs_monthly_ip.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py index 6dae7fc..ffbaea5 100644 --- a/src/imrs_monthly_ip.py +++ b/src/imrs_monthly_ip.py @@ -84,19 +84,20 @@ def parse_imrs(line): sys.stdout.write(".") sys.stdout.flush() file_path = join(input_folder, file_name) - nb_ip = 0 + ip_list = set() nb_queries = 0 for line in open(file_path, "r"): ok,ip,count = parse_imrs(line) if ok: nb_ip += 1 + if not ip in ip_list: + ip_list.add(ip) nb_queries += count if is_instances: file_parts = file_name.split("_") instance_id = parts[0] F.write(cluster_id + "," + instance_id + "," + str(nb_ip) + "," + str(nb_queries) + ",\n") - total_ip += nb_ip total_queries += nb_queries - F.write(cluster_id + ", total ," + str(total_ip) + "," + str(total_queries) + ",\n") - sys.stdout.write("\n") - print("All done.") + total_ip = len(ip_list) + F.write(cluster_id + ",total," + str(total_ip) + "," + str(total_queries) + ",\n") + print("\nAll done.") From 7fab24f4acde87c0ae2a0f7dcd2a19828e22a7b0 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 18:57:58 -0700 Subject: [PATCH 29/35] Check extraction of cluster ID --- src/imrs_monthly_ip.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py index ffbaea5..a942736 100644 --- a/src/imrs_monthly_ip.py +++ b/src/imrs_monthly_ip.py @@ -55,11 +55,17 @@ def parse_imrs(line): for file_name in file_list: parts = file_name.split("_") first_parts = parts[0].split('-') - cluster_id = parts[1] + "-" + parts[2] - if not cluster_id in clusters: - clusters[cluster_id] = [] - clusters[cluster_id].append(file_name) - nb_files += 1 + if len(first_parts) != 3 or \ + len(parts[0]) != 4 or \ + len(parts[1]) != 2 or \ + len(parts[2]) != 3: + print("Cannot get cluster ID from: " + file_name) + else: + cluster_id = parts[1] + "-" + parts[2] + if not cluster_id in clusters: + clusters[cluster_id] = [] + clusters[cluster_id].append(file_name) + nb_files += 1 else: file_list = listdir(input_folder) for file_name in file_list: From 7fffdeefd7e1a959ff6459c57f1430933a8e4a3a Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 19:00:58 -0700 Subject: [PATCH 30/35] debug instance parsing --- src/imrs_monthly_ip.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py index a942736..ae59e13 100644 --- a/src/imrs_monthly_ip.py +++ b/src/imrs_monthly_ip.py @@ -54,11 +54,12 @@ def parse_imrs(line): file_list = listdir(input_folder) for file_name in file_list: parts = file_name.split("_") + print(file_name + " -> " + parts[0]) first_parts = parts[0].split('-') if len(first_parts) != 3 or \ - len(parts[0]) != 4 or \ - len(parts[1]) != 2 or \ - len(parts[2]) != 3: + len(first_parts[0]) != 4 or \ + len(first_parts[1]) != 2 or \ + len(first_parts[2]) != 3: print("Cannot get cluster ID from: " + file_name) else: cluster_id = parts[1] + "-" + parts[2] From f71cbf60ddeae249655d3234c35826ca042b4b33 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 19:02:19 -0700 Subject: [PATCH 31/35] More cluster parsing fixes. --- src/imrs_monthly_ip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py index ae59e13..e7d28b1 100644 --- a/src/imrs_monthly_ip.py +++ b/src/imrs_monthly_ip.py @@ -62,7 +62,7 @@ def parse_imrs(line): len(first_parts[2]) != 3: print("Cannot get cluster ID from: " + file_name) else: - cluster_id = parts[1] + "-" + parts[2] + cluster_id = first_parts[1] + "-" + first_parts[2] if not cluster_id in clusters: clusters[cluster_id] = [] clusters[cluster_id].append(file_name) From b46628fedf6015a9f06d615c92b0c3e4d1f2a1b9 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 19:04:28 -0700 Subject: [PATCH 32/35] fix ip aggregate counting --- src/imrs_monthly_ip.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py index e7d28b1..08be0e9 100644 --- a/src/imrs_monthly_ip.py +++ b/src/imrs_monthly_ip.py @@ -87,12 +87,13 @@ def parse_imrs(line): sys.stdout.write(cluster_id) total_ip = 0 total_queries = 0 + ip_list = set() for file_name in clusters[cluster_id]: sys.stdout.write(".") sys.stdout.flush() file_path = join(input_folder, file_name) - ip_list = set() nb_queries = 0 + nb_ip = 0 for line in open(file_path, "r"): ok,ip,count = parse_imrs(line) if ok: From 08c1c2cd29d6b5cd0bb2c667018a8738adad57f5 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 19:19:22 -0700 Subject: [PATCH 33/35] Debugging file names --- src/imrs_monthly_ip.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py index 08be0e9..0727aa6 100644 --- a/src/imrs_monthly_ip.py +++ b/src/imrs_monthly_ip.py @@ -54,7 +54,6 @@ def parse_imrs(line): file_list = listdir(input_folder) for file_name in file_list: parts = file_name.split("_") - print(file_name + " -> " + parts[0]) first_parts = parts[0].split('-') if len(first_parts) != 3 or \ len(first_parts[0]) != 4 or \ @@ -79,7 +78,10 @@ def parse_imrs(line): print("Found " + str(len(clusters)) + " clusters, " + str(nb_files) + " files.") -id_list = sorted(list(clusters.keys())) +for cluster_id in clusters: + for file_name in clusters[cluster_id]: + print(cluster + ", " + file-name) +id_list = sorted(list(clusters.keys()) with open(output_file, "w") as F: F.write("Cluster, Instance, nb_IP, nb_queries,\n") From e69524c8c268abfe5c74321acc0ae56ea18d52a2 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 19:23:43 -0700 Subject: [PATCH 34/35] Missing parenthesis --- src/imrs_monthly_ip.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py index 0727aa6..c3c897e 100644 --- a/src/imrs_monthly_ip.py +++ b/src/imrs_monthly_ip.py @@ -80,8 +80,10 @@ def parse_imrs(line): for cluster_id in clusters: for file_name in clusters[cluster_id]: - print(cluster + ", " + file-name) -id_list = sorted(list(clusters.keys()) + print(cluster_id + ", " + file_name) + pass + +id_list = sorted(list(clusters.keys())) with open(output_file, "w") as F: F.write("Cluster, Instance, nb_IP, nb_queries,\n") From 354634e56789eb16574b842a639a567d52c231a0 Mon Sep 17 00:00:00 2001 From: Christian Huitema Date: Sat, 20 Apr 2024 19:25:30 -0700 Subject: [PATCH 35/35] remove debugging print --- src/imrs_monthly_ip.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/imrs_monthly_ip.py b/src/imrs_monthly_ip.py index c3c897e..7b365c1 100644 --- a/src/imrs_monthly_ip.py +++ b/src/imrs_monthly_ip.py @@ -78,11 +78,6 @@ def parse_imrs(line): print("Found " + str(len(clusters)) + " clusters, " + str(nb_files) + " files.") -for cluster_id in clusters: - for file_name in clusters[cluster_id]: - print(cluster_id + ", " + file_name) - pass - id_list = sorted(list(clusters.keys())) with open(output_file, "w") as F: @@ -107,7 +102,7 @@ def parse_imrs(line): nb_queries += count if is_instances: file_parts = file_name.split("_") - instance_id = parts[0] + instance_id = file_parts[0] F.write(cluster_id + "," + instance_id + "," + str(nb_ip) + "," + str(nb_queries) + ",\n") total_queries += nb_queries total_ip = len(ip_list)