-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpost_processing_kde_predicted_outliers.py
42 lines (33 loc) · 1.56 KB
/
post_processing_kde_predicted_outliers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import argparse
from pyspark.sql import functions as F, SparkSession
from utils import visualize_outliers
def post_processing(dataset_path):
spark = SparkSession.builder.master("local[*]"). \
appName("post_processing"). \
getOrCreate()
dataset_with_predicted_outliers = spark.read.json(dataset_path)
dataset_with_predicted_outliers.persist()
tp = dataset_with_predicted_outliers.filter((F.col("prediction") == 1.0) & (F.col("outlier") == 1.0)).count()
fp = dataset_with_predicted_outliers.filter((F.col("prediction") == 1.0) & (F.col("outlier") == 0.0)).count()
tn = dataset_with_predicted_outliers.filter((F.col("prediction") == 0.0) & (F.col("outlier") == 0.0)).count()
fn = dataset_with_predicted_outliers.filter((F.col("prediction") == 0.0) & (F.col("outlier") == 1.0)).count()
visualize_outliers(dataset_with_predicted_outliers, dataset_path.split("/")[-1], prediction=True)
dataset_with_predicted_outliers.unpersist()
accuracy = (tp + tn) / (tp + tn + fp + fn)
recall_macro = (tp / (tp + fn) + tn / (tn + fp)) / 2
precision_macro = (tp / (tp + fp) + tn / (tn + fn)) / 2
f1_macro = (2 * recall_macro * precision_macro) / (precision_macro + recall_macro)
print("Accuracy: ", accuracy)
print("Recall (macro): ", recall_macro)
print("Precision (macro): ", precision_macro)
print("F1 (macro): ", f1_macro)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
"--dataset_path",
"-d",
help="path of the dataset",
default="dataset_with_predicted_outliers_kde_0.5"
)
args = parser.parse_args()
post_processing(args.dataset_path)