-
Notifications
You must be signed in to change notification settings - Fork 627
/
Copy pathget_started_monitoring.py
163 lines (143 loc) · 5.58 KB
/
get_started_monitoring.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import datetime
from sklearn import datasets
from evidently.metrics import ColumnDriftMetric
from evidently.metrics import ColumnSummaryMetric
from evidently.metrics import DatasetDriftMetric
from evidently.metrics import DatasetMissingValuesMetric
from evidently.report import Report
from evidently.test_preset import DataDriftTestPreset
from evidently.test_suite import TestSuite
from evidently.ui.dashboards import CounterAgg
from evidently.ui.dashboards import DashboardPanelCounter
from evidently.ui.dashboards import DashboardPanelPlot
from evidently.ui.dashboards import PanelValue
from evidently.ui.dashboards import PlotType
from evidently.ui.dashboards import ReportFilter
from evidently.ui.remote import RemoteWorkspace
from evidently.ui.workspace import Workspace
from evidently.ui.workspace import WorkspaceBase
adult_data = datasets.fetch_openml(name="adult", version=2, as_frame="auto")
adult = adult_data.frame
adult_ref = adult[~adult.education.isin(["Some-college", "HS-grad", "Bachelors"])]
adult_cur = adult[adult.education.isin(["Some-college", "HS-grad", "Bachelors"])]
WORKSPACE = "workspace"
YOUR_PROJECT_NAME = "New Project"
YOUR_PROJECT_DESCRIPTION = "Test project using Adult dataset."
def create_report(i: int):
data_drift_report = Report(
metrics=[
DatasetDriftMetric(),
DatasetMissingValuesMetric(),
ColumnDriftMetric(column_name="age", stattest="wasserstein"),
ColumnSummaryMetric(column_name="age"),
ColumnDriftMetric(column_name="education-num", stattest="wasserstein"),
ColumnSummaryMetric(column_name="education-num"),
],
timestamp=datetime.datetime.now() + datetime.timedelta(days=i),
)
data_drift_report.run(reference_data=adult_ref, current_data=adult_cur.iloc[100 * i : 100 * (i + 1), :])
return data_drift_report
def create_test_suite(i: int):
data_drift_test_suite = TestSuite(
tests=[DataDriftTestPreset()],
timestamp=datetime.datetime.now() + datetime.timedelta(days=i),
)
data_drift_test_suite.run(reference_data=adult_ref, current_data=adult_cur.iloc[100 * i : 100 * (i + 1), :])
return data_drift_test_suite
def create_project(workspace: WorkspaceBase):
project = workspace.create_project(YOUR_PROJECT_NAME)
project.description = YOUR_PROJECT_DESCRIPTION
project.dashboard.add_panel(
DashboardPanelCounter(
filter=ReportFilter(metadata_values={}, tag_values=[]),
agg=CounterAgg.NONE,
title="Census Income Dataset (Adult)",
)
)
project.dashboard.add_panel(
DashboardPanelCounter(
title="Model Calls",
filter=ReportFilter(metadata_values={}, tag_values=[]),
value=PanelValue(
metric_id="DatasetMissingValuesMetric",
field_path=DatasetMissingValuesMetric.fields.current.number_of_rows,
legend="count",
),
text="count",
agg=CounterAgg.SUM,
size=1,
)
)
project.dashboard.add_panel(
DashboardPanelCounter(
title="Share of Drifted Features",
filter=ReportFilter(metadata_values={}, tag_values=[]),
value=PanelValue(
metric_id="DatasetDriftMetric",
field_path="share_of_drifted_columns",
legend="share",
),
text="share",
agg=CounterAgg.LAST,
size=1,
)
)
project.dashboard.add_panel(
DashboardPanelPlot(
title="Dataset Quality",
filter=ReportFilter(metadata_values={}, tag_values=[]),
values=[
PanelValue(metric_id="DatasetDriftMetric", field_path="share_of_drifted_columns", legend="Drift Share"),
PanelValue(
metric_id="DatasetMissingValuesMetric",
field_path=DatasetMissingValuesMetric.fields.current.share_of_missing_values,
legend="Missing Values Share",
),
],
plot_type=PlotType.LINE,
)
)
project.dashboard.add_panel(
DashboardPanelPlot(
title="Age: Wasserstein drift distance",
filter=ReportFilter(metadata_values={}, tag_values=[]),
values=[
PanelValue(
metric_id="ColumnDriftMetric",
metric_args={"column_name.name": "age"},
field_path=ColumnDriftMetric.fields.drift_score,
legend="Drift Score",
),
],
plot_type=PlotType.BAR,
size=1,
)
)
project.dashboard.add_panel(
DashboardPanelPlot(
title="Education-num: Wasserstein drift distance",
filter=ReportFilter(metadata_values={}, tag_values=[]),
values=[
PanelValue(
metric_id="ColumnDriftMetric",
metric_args={"column_name.name": "education-num"},
field_path=ColumnDriftMetric.fields.drift_score,
legend="Drift Score",
),
],
plot_type=PlotType.BAR,
size=1,
)
)
project.save()
return project
def create_demo_project(workspace: str):
ws = Workspace.create(workspace)
project = create_project(ws)
for i in range(0, 5):
report = create_report(i=i)
ws.add_report(project.id, report)
test_suite = create_test_suite(i=i)
ws.add_test_suite(project.id, test_suite)
if __name__ == "__main__":
create_demo_project(WORKSPACE)