-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile
67 lines (60 loc) · 2.14 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
all : report/wine_quality_analysis.html
# download data
data/raw/winequality-red.csv \
data/raw/winequality-white.csv \
data/raw/winequality.names : scripts/download_and_extract.py
python scripts/download_and_extract.py \
--url "https://archive.ics.uci.edu/static/public/186/wine+quality.zip" \
--output_dir data/raw
# clean data
data/processed/cleaned_data.csv : scripts/clean_data.py data/raw/winequality-red.csv
python scripts/clean_data.py \
--input data/raw/winequality-red.csv \
--output_dir data/processed
# split data
data/processed/train_data.csv \
data/processed/test_data.csv : scripts/train_test_split.py data/processed/cleaned_data.csv
PYTHONPATH=. python scripts/train_test_split.py \
--input-data data/processed/cleaned_data.csv \
--output-dir data/processed
# EDA
results/data_describe.csv \
results/example.csv \
results/eda_plot.png : scripts/eda.py data/processed/train_data.csv
python scripts/eda.py \
--raw-data=data/raw/winequality-red.csv \
--training-data=data/processed/train_data.csv \
--plot-to=results
# fit and evaluate
results/results.png \
results/confmatrix.png \
results/feature_importance.csv \
results/model_score_dataframe.csv \
results/conf_matrix_df.csv : scripts/fit_and_evaluate.py data/processed/train_data.csv data/processed/test_data.csv
python scripts/fit_and_evaluate.py \
--training-data=data/processed/train_data.csv \
--test-data=data/processed/test_data.csv \
--plot-to=results
# write the report
report/wine_quality_analysis.html : report/wine_quality_analysis.qmd \
results/data_describe.csv \
results/example.csv \
results/eda_plot.png \
results/results.png \
results/confmatrix.png \
results/feature_importance.csv \
results/model_score_dataframe.csv \
results/conf_matrix_df.csv
quarto render report/wine_quality_analysis.qmd
cp report/wine_quality_analysis.html .
mv wine_quality_analysis.html index.html
# clean up all intermediate and output files
clean :
rm -f results/*
rm -f data/raw/wine_quality.zip
rm -f data/raw/winequality-red.csv
rm -f data/raw/winequality-white.csv
rm -f data/raw/winequality.names
rm -f data/processed/*
rm -f report/wine_quality_analysis.html
rm -f index.html