-
Notifications
You must be signed in to change notification settings - Fork 0
/
titanic_workflow.bs
44 lines (34 loc) · 2.2 KB
/
titanic_workflow.bs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import brane_compute;
import brane_visualize;
// Function that mounts the train.csv and test.csv files in this brane package in the /data folder
print("### mounting the train.csv and test.csv file on /data ###");
print(mount());
// Function that creates /img folder for the visualizing graphs
print("### creating /img folder for visualization graphs ###");
print(create_img());
// Function that returns the shape of the dataframe after reading the data from a file.
print("### Printing shape on train and test dataset ###");
print(shape("/data/train.csv"));
print(shape("/data/test.csv"));
// it first fetches the dataframe and calls the internal methods to plot graphs used for understanding the data (EDA) - plots histograms and bar plots
print("### Running visualization function for EDA. 0 -> success, non-zero code -> error ###");
print(visualize_EDA("/data/train.csv"));
// Function that preprocesses the dataset (train, test) - 1) preprocesses the name feature to extract the title and create a different feature out of it. 2) imputs some logical value for the null/na value in a feature 3) changes categorical data to numerical data 4) handles the missing age for instances. 1 for training dataset and 0 for test dataset
print("### Preprocessing the dataset ###");
preprocess("/data/train.csv", 1);
preprocess("/data/test.csv", 0);
// Modelling and accuracy for random forest classifier
print("### Modelling and accuracy for random forest classifier ###");
model("/data/prep_data1.csv", "/data/prep_data0.csv", "rfc");
print(accuracy("/data/prep_data1.csv", "rfc"));
// Modelling and accuracy for decision tree classifier
print("### Modelling and accuracy for decision tree classifier ###");
model("/data/prep_data1.csv", "/data/prep_data0.csv", "dtc");
print(accuracy("/data/prep_data1.csv", "dtc"));
// Modelling and accuracy for Naive Bayes
print("### Modelling and accuracy for Naive Bayes classifier ###");
model("/data/prep_data1.csv", "/data/prep_data0.csv", "bnb");
print(accuracy("/data/prep_data1.csv", "bnb"));
// Visualizing results for Random forest classifiers
print("### Running visualization function for results. 0 -> success, non-zero code -> error ###");
print(visualize_results("/data/prep_data1.csv", "rfc"));