-
Notifications
You must be signed in to change notification settings - Fork 3.9k
/
Copy pathprediction.py
48 lines (32 loc) · 1.29 KB
/
prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import dask.array as da
from distributed import Client, LocalCluster
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
if __name__ == "__main__":
print("loading data")
X, y = make_regression(n_samples=1000, n_features=50)
print("initializing a Dask cluster")
cluster = LocalCluster(n_workers=2)
client = Client(cluster)
print("created a Dask LocalCluster")
print("distributing training data on the Dask cluster")
dX = da.from_array(X, chunks=(100, 50))
dy = da.from_array(y, chunks=(100,))
print("beginning training")
dask_model = lgb.DaskLGBMRegressor(n_estimators=10)
dask_model.fit(dX, dy)
assert dask_model.fitted_
print("done training")
print("predicting on the training data")
preds = dask_model.predict(dX)
# the code below uses sklearn.metrics, but this requires pulling all of the
# predictions and target values back from workers to the client
#
# for larger datasets, consider the metrics from dask-ml instead
# https://ml.dask.org/modules/api.html#dask-ml-metrics-metrics
print("computing MSE")
preds_local = preds.compute()
actuals_local = dy.compute()
mse = mean_squared_error(actuals_local, preds_local)
print(f"MSE: {mse}")