diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/reference/learn/reference.po b/docs/source/locale/zh_CN/LC_MESSAGES/reference/learn/reference.po
index e5c8e9f471..e0b0a0ae99 100644
--- a/docs/source/locale/zh_CN/LC_MESSAGES/reference/learn/reference.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/reference/learn/reference.po
@@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: mars 0.5.0a2\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2021-08-23 16:36+0800\n"
+"POT-Creation-Date: 2021-09-02 18:08+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language-Team: LANGUAGE <LL@li.org>\n"
@@ -101,483 +101,567 @@ msgstr ""
 msgid "Matrix Decomposition"
 msgstr "矩阵分解"
 
-#: ../../source/reference/learn/reference.rst:79:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:78:<autosummary>:1
 msgid ""
 ":obj:`decomposition.PCA <mars.learn.decomposition.PCA>`\\ "
 "\\(\\[n\\_components\\, copy\\, ...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:79:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:78:<autosummary>:1
 msgid "Principal component analysis (PCA)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:79:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:78:<autosummary>:1
 msgid ""
 ":obj:`decomposition.TruncatedSVD "
 "<mars.learn.decomposition.TruncatedSVD>`\\ \\(\\[n\\_components\\, "
 "...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:79:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:78:<autosummary>:1
 msgid "Dimensionality reduction using truncated SVD (aka LSA)."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:83
+#: ../../source/reference/learn/reference.rst:82
+msgid "Ensemble Methods"
+msgstr "集成方法"
+
+#: ../../source/reference/learn/reference.rst:95:<autosummary>:1
+msgid ""
+":obj:`ensemble.BlockwiseVotingClassifier "
+"<mars.learn.ensemble.BlockwiseVotingClassifier>`\\ \\(estimator\\)"
+msgstr ""
+
+#: ../../source/reference/learn/reference.rst:95:<autosummary>:1
+msgid "Blockwise training and ensemble voting classifier."
+msgstr ""
+
+#: ../../source/reference/learn/reference.rst:95:<autosummary>:1
+msgid ""
+":obj:`ensemble.BlockwiseVotingRegressor "
+"<mars.learn.ensemble.BlockwiseVotingRegressor>`\\ \\(estimator\\)"
+msgstr ""
+
+#: ../../source/reference/learn/reference.rst:95:<autosummary>:1
+msgid "Blockwise training and ensemble voting regressor."
+msgstr ""
+
+#: ../../source/reference/learn/reference.rst:99
+msgid "Linear Models"
+msgstr "线性模型"
+
+#: ../../source/reference/learn/reference.rst:102
+msgid "Classical linear regressors"
+msgstr ""
+
+#: ../../source/reference/learn/reference.rst:110:<autosummary>:1
+msgid ""
+":obj:`linear_model.LinearRegression "
+"<mars.learn.linear_model.LinearRegression>`\\ \\(\\*\\[\\, ...\\]\\)"
+msgstr ""
+
+#: ../../source/reference/learn/reference.rst:110:<autosummary>:1
+msgid "Ordinary least squares Linear Regression."
+msgstr ""
+
+#: ../../source/reference/learn/reference.rst:114
 msgid "Metrics"
 msgstr "评估"
 
-#: ../../source/reference/learn/reference.rst:92
+#: ../../source/reference/learn/reference.rst:123
 msgid "Classification metrics"
 msgstr "分类评估"
 
-#: ../../source/reference/learn/reference.rst:100:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:131:<autosummary>:1
 msgid ""
 ":obj:`metrics.accuracy_score <mars.learn.metrics.accuracy_score>`\\ "
 "\\(y\\_true\\, y\\_pred\\[\\, ...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:100:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:131:<autosummary>:1
 msgid "Accuracy classification score."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:100:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:131:<autosummary>:1
 msgid ""
 ":obj:`metrics.auc <mars.learn.metrics.auc>`\\ \\(x\\, y\\[\\, session\\, "
 "run\\_kwargs\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:100:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:131:<autosummary>:1
 msgid "Compute Area Under the Curve (AUC) using the trapezoidal rule"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:100:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:131:<autosummary>:1
 msgid ""
 ":obj:`metrics.roc_curve <mars.learn.metrics.roc_curve>`\\ \\(y\\_true\\, "
 "y\\_score\\[\\, ...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:100:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:131:<autosummary>:1
 msgid "Compute Receiver operating characteristic (ROC)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:102
+#: ../../source/reference/learn/reference.rst:133
+msgid "Regression metrics"
+msgstr "分类评估"
+
+#: ../../source/reference/learn/reference.rst:140:<autosummary>:1
+msgid ""
+":obj:`metrics.r2_score <mars.learn.metrics.r2_score>`\\ \\(y\\_true\\, "
+"y\\_pred\\, \\*\\[\\, ...\\]\\)"
+msgstr ""
+
+#: ../../source/reference/learn/reference.rst:140:<autosummary>:1
+msgid ":math:`R^2` (coefficient of determination) regression score function."
+msgstr ""
+
+#: ../../source/reference/learn/reference.rst:142
 msgid "Pairwise metrics"
 msgstr "Pairwise 评估"
 
-#: ../../source/reference/learn/reference.rst:120:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:160:<autosummary>:1
 msgid ""
 ":obj:`metrics.pairwise.cosine_similarity "
 "<mars.learn.metrics.pairwise.cosine_similarity>`\\ \\(X\\[\\, Y\\, "
 "...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:120:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:160:<autosummary>:1
 msgid "Compute cosine similarity between samples in X and Y."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:120:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:160:<autosummary>:1
 msgid ""
 ":obj:`metrics.pairwise.cosine_distances "
 "<mars.learn.metrics.pairwise.cosine_distances>`\\ \\(X\\[\\, Y\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:120:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:160:<autosummary>:1
 msgid "Compute cosine distance between samples in X and Y."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:120:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:160:<autosummary>:1
 msgid ""
 ":obj:`metrics.pairwise.euclidean_distances "
 "<mars.learn.metrics.pairwise.euclidean_distances>`\\ \\(X\\[\\, Y\\, "
 "...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:120:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:160:<autosummary>:1
 msgid ""
 "Considering the rows of X (and Y=X) as vectors, compute the distance "
 "matrix between each pair of vectors."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:120:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:160:<autosummary>:1
 msgid ""
 ":obj:`metrics.pairwise.haversine_distances "
 "<mars.learn.metrics.pairwise.haversine_distances>`\\ \\(X\\[\\, Y\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:120:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:160:<autosummary>:1
 msgid "Compute the Haversine distance between samples in X and Y"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:120:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:160:<autosummary>:1
 msgid ""
 ":obj:`metrics.pairwise.manhattan_distances "
 "<mars.learn.metrics.pairwise.manhattan_distances>`\\ \\(X\\[\\, Y\\, "
 "...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:120:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:160:<autosummary>:1
 msgid "Compute the L1 distances between the vectors in X and Y."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:120:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:160:<autosummary>:1
 msgid ""
 ":obj:`metrics.pairwise.rbf_kernel "
 "<mars.learn.metrics.pairwise.rbf_kernel>`\\ \\(X\\[\\, Y\\, gamma\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:120:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:160:<autosummary>:1
 msgid "Compute the rbf (gaussian) kernel between X and Y."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:120:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:160:<autosummary>:1
 msgid ""
 ":obj:`metrics.pairwise_distances "
 "<mars.learn.metrics.pairwise_distances>`\\ \\(X\\[\\, Y\\, metric\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:124
+#: ../../source/reference/learn/reference.rst:164
 msgid "Model Selection"
 msgstr "模型选择"
 
-#: ../../source/reference/learn/reference.rst:127
+#: ../../source/reference/learn/reference.rst:167
 msgid "Splitter Classes"
 msgstr "划分类"
 
-#: ../../source/reference/learn/reference.rst:135:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:175:<autosummary>:1
 msgid ""
 ":obj:`model_selection.KFold <mars.learn.model_selection.KFold>`\\ "
 "\\(\\[n\\_splits\\, shuffle\\, ...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:135:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:175:<autosummary>:1
 msgid "K-Folds cross-validator"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:137
+#: ../../source/reference/learn/reference.rst:177
 msgid "Splitter Functions"
 msgstr "划分函数"
 
-#: ../../source/reference/learn/reference.rst:145:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:185:<autosummary>:1
 msgid ""
 ":obj:`model_selection.train_test_split "
 "<mars.learn.model_selection.train_test_split>`\\ \\(\\*arrays\\, ...\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:145:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:185:<autosummary>:1
 msgid "Split arrays or matrices into random train and test subsets"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:149
+#: ../../source/reference/learn/reference.rst:189
 msgid "Nearest Neighbors"
 msgstr "最邻近"
 
-#: ../../source/reference/learn/reference.rst:161:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:201:<autosummary>:1
 msgid ""
 ":obj:`neighbors.NearestNeighbors "
 "<mars.learn.neighbors.NearestNeighbors>`\\ \\(\\[n\\_neighbors\\, "
 "...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:165
+#: ../../source/reference/learn/reference.rst:205
 msgid "Preprocessing and Normalization"
 msgstr "预处理和标准化"
 
-#: ../../source/reference/learn/reference.rst:179:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:221:<autosummary>:1
+msgid ""
+":obj:`preprocessing.LabelBinarizer "
+"<mars.learn.preprocessing.LabelBinarizer>`\\ \\(\\*\\[\\, neg\\_label\\, "
+"...\\]\\)"
+msgstr ""
+
+#: ../../source/reference/learn/reference.rst:221:<autosummary>:1
+msgid "Binarize labels in a one-vs-all fashion."
+msgstr ""
+
+#: ../../source/reference/learn/reference.rst:221:<autosummary>:1
 msgid ""
 ":obj:`preprocessing.MinMaxScaler "
 "<mars.learn.preprocessing.MinMaxScaler>`\\ \\(\\[feature\\_range\\, "
 "...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:179:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:221:<autosummary>:1
 msgid "Transform features by scaling each feature to a given range."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:179:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:221:<autosummary>:1
 msgid ""
 ":obj:`preprocessing.minmax_scale "
 "<mars.learn.preprocessing.minmax_scale>`\\ \\(X\\[\\, ...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:179:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:221:<autosummary>:1
+msgid ""
+":obj:`preprocessing.label_binarize "
+"<mars.learn.preprocessing.label_binarize>`\\ \\(y\\, \\*\\, classes\\)"
+msgstr ""
+
+#: ../../source/reference/learn/reference.rst:221:<autosummary>:1
 msgid ""
 ":obj:`preprocessing.normalize <mars.learn.preprocessing.normalize>`\\ "
 "\\(X\\[\\, norm\\, axis\\, ...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:179:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:221:<autosummary>:1
 msgid "Scale input vectors individually to unit norm (vector length)."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:183
+#: ../../source/reference/learn/reference.rst:225
 msgid "Semi-Supervised Learning"
 msgstr "半监督学习"
 
-#: ../../source/reference/learn/reference.rst:195:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:237:<autosummary>:1
 msgid ""
 ":obj:`semi_supervised.LabelPropagation "
 "<mars.learn.semi_supervised.LabelPropagation>`\\ \\(\\[kernel\\, "
 "...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:195:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:237:<autosummary>:1
 msgid "Label Propagation classifier"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:199
+#: ../../source/reference/learn/reference.rst:241
 msgid "Utilities"
 msgstr "工具"
 
-#: ../../source/reference/learn/reference.rst:219:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:261:<autosummary>:1
 msgid ""
 ":obj:`utils.assert_all_finite <mars.learn.utils.assert_all_finite>`\\ "
 "\\(X\\[\\, allow\\_nan\\, ...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:219:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:261:<autosummary>:1
 msgid ""
 ":obj:`utils.check_X_y <mars.learn.utils.check_X_y>`\\ \\(X\\, y\\[\\, "
 "accept\\_sparse\\, ...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:219:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:261:<autosummary>:1
 msgid "Input validation for standard estimators."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:219:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:261:<autosummary>:1
 msgid ""
 ":obj:`utils.check_array <mars.learn.utils.check_array>`\\ \\(array\\[\\, "
 "accept\\_sparse\\, ...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:219:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:261:<autosummary>:1
 msgid "Input validation on a tensor, list, sparse matrix or similar."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:219:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:261:<autosummary>:1
 msgid ""
 ":obj:`utils.check_consistent_length "
 "<mars.learn.utils.check_consistent_length>`\\ \\(\\*arrays\\[\\, "
 "...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:219:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:261:<autosummary>:1
 msgid "Check that all arrays have consistent first dimensions."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:219:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:261:<autosummary>:1
 msgid ""
 ":obj:`utils.multiclass.type_of_target "
 "<mars.learn.utils.multiclass.type_of_target>`\\ \\(y\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:219:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:261:<autosummary>:1
 msgid "Determine the type of data indicated by the target."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:219:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:261:<autosummary>:1
 msgid ""
 ":obj:`utils.multiclass.is_multilabel "
 "<mars.learn.utils.multiclass.is_multilabel>`\\ \\(y\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:219:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:261:<autosummary>:1
 msgid "Check if ``y`` is in a multilabel format."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:219:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:261:<autosummary>:1
 msgid ""
 ":obj:`utils.shuffle <mars.learn.utils.shuffle>`\\ \\(\\*arrays\\, "
 "\\*\\*options\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:219:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:261:<autosummary>:1
 msgid ""
 ":obj:`utils.validation.check_is_fitted "
 "<mars.learn.utils.validation.check_is_fitted>`\\ \\(estimator\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:219:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:261:<autosummary>:1
 msgid "Perform is_fitted validation for estimator."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:219:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:261:<autosummary>:1
 msgid ""
 ":obj:`utils.validation.column_or_1d "
 "<mars.learn.utils.validation.column_or_1d>`\\ \\(y\\[\\, warn\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:219:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:261:<autosummary>:1
 msgid "Ravel column or 1d numpy array, else raises an error"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:223
+#: ../../source/reference/learn/reference.rst:265
 msgid "LightGBM Integration"
 msgstr "LightGBM 集成"
 
-#: ../../source/reference/learn/reference.rst:237:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:279:<autosummary>:1
 msgid ""
 ":obj:`contrib.lightgbm.LGBMClassifier "
 "<mars.learn.contrib.lightgbm.LGBMClassifier>`\\ \\(\\*args\\, "
 "\\*\\*kwargs\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:237:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:279:<autosummary>:1
 msgid ""
 ":obj:`contrib.lightgbm.LGBMRegressor "
 "<mars.learn.contrib.lightgbm.LGBMRegressor>`\\ \\(\\*args\\, "
 "\\*\\*kwargs\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:237:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:279:<autosummary>:1
 msgid ""
 ":obj:`contrib.lightgbm.LGBMRanker "
 "<mars.learn.contrib.lightgbm.LGBMRanker>`\\ \\(\\*args\\, \\*\\*kwargs\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:241
+#: ../../source/reference/learn/reference.rst:283
 msgid "PyTorch Integration"
 msgstr "PyTorch 集成"
 
-#: ../../source/reference/learn/reference.rst:258:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:300:<autosummary>:1
 msgid ""
 ":obj:`contrib.pytorch.run_pytorch_script "
 "<mars.learn.contrib.pytorch.run_pytorch_script>`\\ \\(script\\, ...\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:258:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:300:<autosummary>:1
 msgid "Run PyTorch script in Mars cluster."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:258:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:300:<autosummary>:1
 msgid ""
 ":obj:`contrib.pytorch.MarsDataset "
 "<mars.learn.contrib.pytorch.MarsDataset>`\\ \\(\\*tileables\\[\\, "
 "...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:258:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:300:<autosummary>:1
 msgid "MarsDataset that inherit from torch.utils.data.Dataset."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:258:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:300:<autosummary>:1
 msgid ""
 ":obj:`contrib.pytorch.SequentialSampler "
 "<mars.learn.contrib.pytorch.SequentialSampler>`\\ \\(data\\_source\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:258:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:300:<autosummary>:1
 msgid "\"Samples elements sequentially, always in the same order."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:258:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:300:<autosummary>:1
 msgid ""
 ":obj:`contrib.pytorch.RandomSampler "
 "<mars.learn.contrib.pytorch.RandomSampler>`\\ \\(data\\_source\\[\\, "
 "...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:258:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:300:<autosummary>:1
 msgid "\""
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:258:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:300:<autosummary>:1
 msgid ""
 ":obj:`contrib.pytorch.SubsetRandomSampler "
 "<mars.learn.contrib.pytorch.SubsetRandomSampler>`\\ \\(indices\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:258:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:300:<autosummary>:1
 msgid ""
 "Samples elements randomly from a given list of indices, without "
 "replacement."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:258:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:300:<autosummary>:1
 msgid ""
 ":obj:`contrib.pytorch.DistributedSampler "
 "<mars.learn.contrib.pytorch.DistributedSampler>`\\ \\(dataset\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:258:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:300:<autosummary>:1
 msgid "Sampler that restricts data loading to a subset of the dataset."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:262
+#: ../../source/reference/learn/reference.rst:304
 msgid "StatsModels Integration"
 msgstr "StatsModels 集成"
 
-#: ../../source/reference/learn/reference.rst:275:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:317:<autosummary>:1
 msgid ""
 ":obj:`contrib.statsmodels.MarsDistributedModel "
 "<mars.learn.contrib.statsmodels.MarsDistributedModel>`\\ \\(\\[...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:275:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:317:<autosummary>:1
 msgid ""
 ":obj:`contrib.statsmodels.MarsResults "
 "<mars.learn.contrib.statsmodels.MarsResults>`\\ \\(model\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:279
+#: ../../source/reference/learn/reference.rst:321
 msgid "TensorFlow Integration"
 msgstr "TensorFlow 集成"
 
-#: ../../source/reference/learn/reference.rst:291:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:334:<autosummary>:1
 msgid ""
 ":obj:`contrib.tensorflow.run_tensorflow_script "
 "<mars.learn.contrib.tensorflow.run_tensorflow_script>`\\ \\(...\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:291:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:334:<autosummary>:1
 msgid "Run TensorFlow script in Mars cluster."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:295
+#: ../../source/reference/learn/reference.rst:334:<autosummary>:1
+msgid ""
+":obj:`contrib.tensorflow.gen_tensorflow_dataset "
+"<mars.learn.contrib.tensorflow.gen_tensorflow_dataset>`\\ \\(tensors\\)"
+msgstr ""
+
+#: ../../source/reference/learn/reference.rst:334:<autosummary>:1
+msgid "convert mars data type to tf.data.Dataset."
+msgstr ""
+
+#: ../../source/reference/learn/reference.rst:338
 msgid "XGBoost Integration"
 msgstr "XGBoost 集成"
 
-#: ../../source/reference/learn/reference.rst:310:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:353:<autosummary>:1
 msgid ""
 ":obj:`contrib.xgboost.MarsDMatrix "
 "<mars.learn.contrib.xgboost.MarsDMatrix>`\\ \\(data\\[\\, label\\, "
 "...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:310:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:353:<autosummary>:1
 msgid ""
 ":obj:`contrib.xgboost.train <mars.learn.contrib.xgboost.train>`\\ "
 "\\(params\\, dtrain\\[\\, evals\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:310:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:353:<autosummary>:1
 msgid "Train XGBoost model in Mars manner."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:310:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:353:<autosummary>:1
 msgid ""
 ":obj:`contrib.xgboost.predict <mars.learn.contrib.xgboost.predict>`\\ "
 "\\(model\\, data\\[\\, ...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:310:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:353:<autosummary>:1
 msgid ""
 ":obj:`contrib.xgboost.XGBClassifier "
 "<mars.learn.contrib.xgboost.XGBClassifier>`\\ \\(\\[max\\_depth\\, "
 "...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:310:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:353:<autosummary>:1
 msgid "Implementation of the scikit-learn API for XGBoost classification."
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:310:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:353:<autosummary>:1
 msgid ""
 ":obj:`contrib.xgboost.XGBRegressor "
 "<mars.learn.contrib.xgboost.XGBRegressor>`\\ \\(\\[max\\_depth\\, "
 "...\\]\\)"
 msgstr ""
 
-#: ../../source/reference/learn/reference.rst:310:<autosummary>:1
+#: ../../source/reference/learn/reference.rst:353:<autosummary>:1
 msgid "Implementation of the scikit-learn API for XGBoost regressor."
 msgstr ""
+
diff --git a/docs/source/reference/learn/reference.rst b/docs/source/reference/learn/reference.rst
index cda324cf71..d7091278d9 100644
--- a/docs/source/reference/learn/reference.rst
+++ b/docs/source/reference/learn/reference.rst
@@ -93,6 +93,21 @@ Ensemble Methods
    ensemble.BlockwiseVotingClassifier
    ensemble.BlockwiseVotingRegressor
 
+.. _linear_model_ref:
+
+Linear Models
+=============
+
+Classical linear regressors
+---------------------------
+
+.. currentmodule:: mars.learn
+
+.. autosummary::
+   :toctree: generated/
+
+   linear_model.LinearRegression
+
 .. _metrics_ref:
 
 Metrics
@@ -198,8 +213,10 @@ Preprocessing and Normalization
 .. autosummary::
    :toctree: generated/
 
+   preprocessing.LabelBinarizer
    preprocessing.MinMaxScaler
    preprocessing.minmax_scale
+   preprocessing.label_binarize
    preprocessing.normalize
 
 .. _semi_supervised_ref:
diff --git a/mars/learn/contrib/tensorflow/dataset.py b/mars/learn/contrib/tensorflow/dataset.py
index 26e95052eb..fed327c380 100644
--- a/mars/learn/contrib/tensorflow/dataset.py
+++ b/mars/learn/contrib/tensorflow/dataset.py
@@ -35,8 +35,8 @@
 @require_not_none(tf)
 class MarsDataset:
     def __init__(self, tensors,
-                 output_shapes=None,
-                 output_types=None,
+                 output_shapes: Tuple[int, ...]=None,
+                 output_types: Tuple[np.dtype, ...]=None,
                  fetch_kwargs=None):
 
         self._context = get_context()
@@ -123,8 +123,8 @@ def make_generator():   # pragma: no cover
 
 
 def gen_tensorflow_dataset(tensors,
-                           output_shapes=None,
-                           output_types=None,
+                           output_shapes: Tuple[int, ...]=None,
+                           output_types: Tuple[np.dtype, ...]=None,
                            fetch_kwargs=None):
     """
     convert mars data type to tf.data.Dataset. Note this is based tensorflow 2.0
diff --git a/mars/learn/preprocessing/__init__.py b/mars/learn/preprocessing/__init__.py
index 37887ec535..2a9dd4502a 100644
--- a/mars/learn/preprocessing/__init__.py
+++ b/mars/learn/preprocessing/__init__.py
@@ -12,10 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-try:
-    from ._data import MinMaxScaler
-    from ._data import minmax_scale
-except ImportError:  # pragma: no cover
-    # sklearn not installed
-    pass
+from ._data import MinMaxScaler
+from ._data import minmax_scale
+from ._label import LabelBinarizer, label_binarize
 from .normalize import normalize
diff --git a/mars/learn/preprocessing/_label.py b/mars/learn/preprocessing/_label.py
new file mode 100644
index 0000000000..57f51af8b2
--- /dev/null
+++ b/mars/learn/preprocessing/_label.py
@@ -0,0 +1,661 @@
+# Copyright 1999-2021 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Union
+
+import numpy as np
+import scipy.sparse as sp
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.utils.sparsefuncs import min_max_axis
+
+from ... import execute, fetch
+from ... import opcodes
+from ... import tensor as mt
+from ...core import ENTITY_TYPE, OutputType, recursive_tile
+from ...core.context import get_context, Context
+from ...lib.sparse import SparseNDArray
+from ...serialization.serializables import AnyField, BoolField, \
+    Int32Field, StringField
+from ...tensor.core import TensorOrder
+from ...typing import TileableType
+from ...utils import has_unknown_shape
+from ..operands import LearnOperand, LearnOperandMixin
+from ..utils import column_or_1d
+from ..utils.multiclass import unique_labels, type_of_target
+from ..utils.validation import _num_samples, check_is_fitted, check_array
+
+
+class LabelBinarizer(TransformerMixin, BaseEstimator):
+    """Binarize labels in a one-vs-all fashion.
+
+    Several regression and binary classification algorithms are
+    available in scikit-learn. A simple way to extend these algorithms
+    to the multi-class classification case is to use the so-called
+    one-vs-all scheme.
+
+    At learning time, this simply consists in learning one regressor
+    or binary classifier per class. In doing so, one needs to convert
+    multi-class labels to binary labels (belong or does not belong
+    to the class). LabelBinarizer makes this process easy with the
+    transform method.
+
+    At prediction time, one assigns the class for which the corresponding
+    model gave the greatest confidence. LabelBinarizer makes this easy
+    with the inverse_transform method.
+
+    Read more in the :ref:`User Guide <preprocessing_targets>`.
+
+    Parameters
+    ----------
+
+    neg_label : int, default=0
+        Value with which negative labels must be encoded.
+
+    pos_label : int, default=1
+        Value with which positive labels must be encoded.
+
+    sparse_output : bool, default=False
+        True if the returned array from transform is desired to be in sparse
+        CSR format.
+
+    Attributes
+    ----------
+
+    classes_ : ndarray of shape (n_classes,)
+        Holds the label for each class.
+
+    y_type_ : str
+        Represents the type of the target data as evaluated by
+        utils.multiclass.type_of_target. Possible type are 'continuous',
+        'continuous-multioutput', 'binary', 'multiclass',
+        'multiclass-multioutput', 'multilabel-indicator', and 'unknown'.
+
+    sparse_input_ : bool
+        True if the input data to transform is given as a sparse matrix, False
+        otherwise.
+
+    Examples
+    --------
+    >>> from mars.learn import preprocessing
+    >>> lb = preprocessing.LabelBinarizer()
+    >>> lb.fit([1, 2, 6, 4, 2])
+    LabelBinarizer()
+    >>> lb.classes_
+    array([1, 2, 4, 6])
+    >>> lb.transform([1, 6])
+    array([[1, 0, 0, 0],
+           [0, 0, 0, 1]])
+
+    Binary targets transform to a column vector
+
+    >>> lb = preprocessing.LabelBinarizer()
+    >>> lb.fit_transform(['yes', 'no', 'no', 'yes'])
+    array([[1],
+           [0],
+           [0],
+           [1]])
+
+    Passing a 2D matrix for multilabel classification
+
+    >>> import numpy as np
+    >>> lb.fit(np.array([[0, 1, 1], [1, 0, 0]]))
+    LabelBinarizer()
+    >>> lb.classes_
+    array([0, 1, 2])
+    >>> lb.transform([0, 1, 2, 1])
+    array([[1, 0, 0],
+           [0, 1, 0],
+           [0, 0, 1],
+           [0, 1, 0]])
+
+    See Also
+    --------
+    label_binarize : Function to perform the transform operation of
+        LabelBinarizer with fixed classes.
+    OneHotEncoder : Encode categorical features using a one-hot aka one-of-K
+        scheme.
+    """
+
+    def __init__(self, *, neg_label=0, pos_label=1, sparse_output=False):
+        if neg_label >= pos_label:
+            raise ValueError("neg_label={0} must be strictly less than "
+                             "pos_label={1}.".format(neg_label, pos_label))
+
+        if sparse_output and (pos_label == 0 or neg_label != 0):
+            raise ValueError("Sparse binarization is only supported with non "
+                             "zero pos_label and zero neg_label, got "
+                             "pos_label={0} and neg_label={1}"
+                             "".format(pos_label, neg_label))
+
+        self.neg_label = neg_label
+        self.pos_label = pos_label
+        self.sparse_output = sparse_output
+
+    def fit(self, y, session=None, run_kwargs=None):
+        """Fit label binarizer.
+
+        Parameters
+        ----------
+        y : ndarray of shape (n_samples,) or (n_samples, n_classes)
+            Target values. The 2-d matrix should only contain 0 and 1,
+            represents multilabel classification.
+
+        Returns
+        -------
+        self : returns an instance of self.
+        """
+        self.y_type_ = fetch(execute(
+            type_of_target(y), session=session, **(run_kwargs or dict())))
+        if 'multioutput' in self.y_type_:
+            raise ValueError("Multioutput target data is not supported with "
+                             "label binarization")
+        if _num_samples(y) == 0:  # pragma: no cover
+            raise ValueError('y has 0 samples: %r' % y)
+
+        self.sparse_input_ = mt.tensor(y).issparse()
+        self.classes_ = unique_labels(y).execute(
+            session=session, **(run_kwargs or dict()))
+        return self
+
+    def fit_transform(self, y, session=None, run_kwargs=None):
+        """Fit label binarizer and transform multi-class labels to binary
+        labels.
+
+        The output of transform is sometimes referred to as
+        the 1-of-K coding scheme.
+
+        Parameters
+        ----------
+        y : {ndarray, sparse matrix} of shape (n_samples,) or \
+                (n_samples, n_classes)
+            Target values. The 2-d matrix should only contain 0 and 1,
+            represents multilabel classification. Sparse matrix can be
+            CSR, CSC, COO, DOK, or LIL.
+
+        Returns
+        -------
+        Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)
+            Shape will be (n_samples, 1) for binary problems. Sparse matrix
+            will be of CSR format.
+        """
+        return self.fit(y, session=session, run_kwargs=run_kwargs)\
+            .transform(y, session=session, run_kwargs=run_kwargs)
+
+    def transform(self, y, session=None, run_kwargs=None):
+        """Transform multi-class labels to binary labels.
+
+        The output of transform is sometimes referred to by some authors as
+        the 1-of-K coding scheme.
+
+        Parameters
+        ----------
+        y : {array, sparse matrix} of shape (n_samples,) or \
+                (n_samples, n_classes)
+            Target values. The 2-d matrix should only contain 0 and 1,
+            represents multilabel classification. Sparse matrix can be
+            CSR, CSC, COO, DOK, or LIL.
+
+        Returns
+        -------
+        Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)
+            Shape will be (n_samples, 1) for binary problems. Sparse matrix
+            will be of CSR format.
+        """
+        check_is_fitted(self)
+
+        target = fetch(execute(type_of_target(y), session=session,
+                               **(run_kwargs or dict())))
+        y_is_multilabel = target.startswith('multilabel')
+        if y_is_multilabel and not self.y_type_.startswith('multilabel'):
+            raise ValueError("The object was not fitted with multilabel"
+                             " input.")
+
+        return label_binarize(y, classes=self.classes_,
+                              pos_label=self.pos_label,
+                              neg_label=self.neg_label,
+                              sparse_output=self.sparse_output)
+
+    def inverse_transform(self, Y, threshold=None):
+        """Transform binary labels back to multi-class labels.
+
+        Parameters
+        ----------
+        Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)
+            Target values. All sparse matrices are converted to CSR before
+            inverse transformation.
+
+        threshold : float, default=None
+            Threshold used in the binary and multi-label cases.
+
+            Use 0 when ``Y`` contains the output of decision_function
+            (classifier).
+            Use 0.5 when ``Y`` contains the output of predict_proba.
+
+            If None, the threshold is assumed to be half way between
+            neg_label and pos_label.
+
+        Returns
+        -------
+        y : {ndarray, sparse matrix} of shape (n_samples,)
+            Target values. Sparse matrix will be of CSR format.
+
+        Notes
+        -----
+        In the case when the binary labels are fractional
+        (probabilistic), inverse_transform chooses the class with the
+        greatest value. Typically, this allows to use the output of a
+        linear model's decision_function method directly as the input
+        of inverse_transform.
+        """
+        check_is_fitted(self)
+
+        if threshold is None:
+            threshold = (self.pos_label + self.neg_label) / 2.
+
+        Y = mt.asarray(Y)
+        if self.y_type_ == "multiclass":
+            y_inv = Y.map_chunk(_inverse_binarize_multiclass,
+                                args=(self.classes_,), dtype=self.classes_.dtype,
+                                shape=(Y.shape[0],))
+        else:
+            shape = (Y.shape[0],) if self.y_type_ != 'multilabel-indicator' else Y.shape
+            y_inv = Y.map_chunk(_inverse_binarize_thresholding,
+                                args=(self.y_type_, self.classes_, threshold),
+                                dtype=self.classes_.dtype,
+                                shape=shape)
+
+        if self.sparse_input_:
+            y_inv = y_inv.tosparse()
+        elif y_inv.issparse():
+            y_inv = y_inv.todense()
+
+        return y_inv
+
+    def _more_tags(self):  # pragma: no cover  # noqa: R0201  # pylint: disable=no-self-use
+        return {'X_types': ['1dlabels']}
+
+
+class LabelBinarize(LearnOperand, LearnOperandMixin):
+    _op_type_ = opcodes.LABEL_BINARIZE
+
+    y = AnyField('y')
+    classes = AnyField('classes')
+    neg_label = Int32Field('neg_label')
+    pos_label = Int32Field('pos_label')
+    sparse_output = BoolField('sparse_output')
+    # for chunk
+    y_type = StringField('y_type')
+    pos_switch = BoolField('pos_switch')
+
+    def __call__(self, y: TileableType, classes: TileableType):
+        inputs = []
+        if isinstance(y, ENTITY_TYPE):
+            inputs.append(y)
+        if isinstance(classes, ENTITY_TYPE):
+            inputs.append(classes)
+        self.sparse = self.sparse_output
+        self.output_types = [OutputType.tensor]
+        return self.new_tileable(inputs, shape=(np.nan,),
+                                 dtype=np.dtype(int),
+                                 order=TensorOrder.C_ORDER)
+
+    def _set_inputs(self, inputs):
+        super()._set_inputs(inputs)
+        if isinstance(self.y, ENTITY_TYPE):
+            self.y = self._inputs[0]
+        if isinstance(self.classes, ENTITY_TYPE):
+            self.classes = self._inputs[-1]
+
+    @classmethod
+    def tile(cls, op: "LabelBinarize"):
+        y = op.y
+        classes = op.classes
+        neg_label = op.neg_label
+        pos_label = op.pos_label
+        sparse_output = op.sparse_output
+        out = op.outputs[0]
+        ctx = get_context()
+
+        if (isinstance(y, ENTITY_TYPE) and has_unknown_shape(y)) or (
+                isinstance(classes, ENTITY_TYPE) and has_unknown_shape(classes)):  # pragma: no cover
+            yield
+        if isinstance(classes, ENTITY_TYPE) and len(classes.chunks) > 1:  # pragma: no cover
+            classes = yield from recursive_tile(
+                classes.rechunk(classes.shape))
+
+        if not isinstance(y, list):
+            # XXX Workaround that will be removed when list of list format is
+            # dropped
+            y = check_array(y, accept_sparse=True, ensure_2d=False, dtype=None)
+        else:
+            if _num_samples(y) == 0:
+                raise ValueError('y has 0 samples: %r' % y)
+
+        y = yield from recursive_tile(mt.tensor(y))
+
+        if neg_label >= pos_label:
+            raise ValueError("neg_label={0} must be strictly less than "
+                             "pos_label={1}.".format(neg_label, pos_label))
+
+        if (sparse_output and (pos_label == 0 or neg_label != 0)):
+            raise ValueError("Sparse binarization is only supported with non "
+                             "zero pos_label and zero neg_label, got "
+                             "pos_label={0} and neg_label={1}"
+                             "".format(pos_label, neg_label))
+
+        # To account for pos_label == 0 in the dense case
+        pos_switch = pos_label == 0
+        if pos_switch:
+            pos_label = -neg_label
+
+        y_type = yield from recursive_tile(type_of_target(y))
+        yield y_type.chunks
+        y_type = ctx.get_chunks_result([y_type.chunks[0].key])[0]
+        y_type = y_type.item() if hasattr(y_type, 'item') else y_type
+        if 'multioutput' in y_type:
+            raise ValueError("Multioutput target data is not supported with label "
+                             "binarization")
+        if y_type == 'unknown':
+            raise ValueError("The type of target data is not known")
+
+        n_samples = mt.tensor(y).shape[0]
+        n_classes = len(classes)
+
+        if y_type == "binary":
+            if n_classes == 1:
+                if sparse_output:
+                    return (yield from recursive_tile(
+                        mt.zeros((n_samples, 1), dtype=int, sparse=True)))
+                else:
+                    Y = mt.zeros((len(y), 1), dtype=int)
+                    Y += neg_label
+                    return (yield from recursive_tile(Y))
+            elif len(classes) >= 3:
+                y_type = "multiclass"
+
+        if y_type == "multilabel-indicator":
+            y_n_classes = y.shape[1] if hasattr(y, 'shape') else len(y[0])
+            if mt.tensor(classes).size != y_n_classes:
+                raise ValueError("classes {0} mismatch with the labels {1}"
+                                 " found in the data"
+                                 .format(classes, unique_labels(y)))
+
+        if y_type in ("binary", "multiclass"):
+            y = yield from recursive_tile(column_or_1d(y))
+            if y_type == 'binary':
+                out_shape = (n_samples, 1)
+            else:
+                out_shape = (n_samples, n_classes)
+        elif y_type == 'multilabel-indicator':
+            out_shape = y.shape
+        else:
+            raise ValueError("%s target data is not supported with label "
+                             "binarization" % y_type)
+
+        out_chunks = []
+        for y_chunk in y.chunks:
+            chunk_inputs = [y_chunk]
+            classes_chunk = classes
+            if isinstance(classes, ENTITY_TYPE):
+                chunk_inputs.append(classes.chunks[0])
+                classes_chunk = classes.chunks[0]
+            chunk_op = LabelBinarize(
+                y=y_chunk, classes=classes_chunk, neg_label=neg_label,
+                pos_label=pos_label, sparse_output=sparse_output,
+                y_type=y_type, pos_switch=pos_switch,
+                _output_types=op.output_types)
+            if len(out_shape) == 2:
+                chunk_shape = (y_chunk.shape[0], out_shape[1])
+                chunk_index = (y_chunk.index[0], 0)
+            else:  # pragma: no cover
+                chunk_shape = (y_chunk.shape[0],)
+                chunk_index = (y_chunk.index[0],)
+            out_chunk = chunk_op.new_chunk(chunk_inputs, shape=chunk_shape,
+                                           dtype=out.dtype, order=out.order,
+                                           index=chunk_index)
+            out_chunks.append(out_chunk)
+
+        params = out.params.copy()
+        params['chunks'] = out_chunks
+        params['shape'] = out_shape
+        if len(out_shape) == 2:
+            nsplits = (y.nsplits[0], (out_shape[1],))
+        else:  # pragma: no cover
+            nsplits = (y.nsplits[0],)
+        params['nsplits'] = nsplits
+        return op.copy().new_tileables(op.inputs, kws=[params])
+
+    @classmethod
+    def execute(cls,
+                ctx: Union[dict, Context],
+                op: "LabelBinarize"):
+        y = ctx[op.y.key]
+        if hasattr(y, 'raw'):
+            # SparseNDArray
+            y = y.raw
+        if isinstance(op.classes, ENTITY_TYPE):
+            classes = ctx[op.classes.key]
+        else:
+            classes = op.classes
+        y_type = op.y_type
+        sparse_output = op.sparse_output
+        pos_label = op.pos_label
+        neg_label = op.neg_label
+        pos_switch = op.pos_switch
+
+        n_samples = y.shape[0] if sp.issparse(y) else len(y)
+        n_classes = len(classes)
+        sorted_class = np.sort(classes)
+
+        if y_type in ('binary', 'multiclass'):
+            # pick out the known labels from y
+            y_in_classes = np.in1d(y, classes)
+            y_seen = y[y_in_classes]
+            indices = np.searchsorted(sorted_class, y_seen)
+            indptr = np.hstack((0, np.cumsum(y_in_classes)))
+
+            data = np.empty_like(indices)
+            data.fill(pos_label)
+            Y = sp.csr_matrix((data, indices, indptr),
+                              shape=(n_samples, n_classes))
+        elif y_type == "multilabel-indicator":
+            Y = sp.csr_matrix(y)
+            if pos_label != 1:
+                data = np.empty_like(Y.data)
+                data.fill(pos_label)
+                Y.data = data
+        else:  # pragma: no cover
+            raise ValueError("%s target data is not supported with label "
+                             "binarization" % y_type)
+
+        if not sparse_output:
+            Y = Y.toarray()
+            Y = Y.astype(int, copy=False)
+
+            if neg_label != 0:
+                Y[Y == 0] = neg_label
+
+            if pos_switch:
+                Y[Y == pos_label] = 0
+        else:
+            Y.data = Y.data.astype(int, copy=False)
+
+        # preserve label ordering
+        if np.any(classes != sorted_class):
+            indices = np.searchsorted(sorted_class, classes)
+            Y = Y[:, indices]
+
+        if y_type == "binary":
+            if sparse_output:
+                Y = Y.getcol(-1)
+            else:
+                Y = Y[:, -1].reshape((-1, 1))
+
+        if sp.issparse(Y):
+            Y = SparseNDArray(Y)
+        ctx[op.outputs[0].key] = Y
+
+
+def label_binarize(y, *, classes, neg_label=0, pos_label=1,
+                   sparse_output=False, execute=True):
+    """Binarize labels in a one-vs-all fashion.
+
+    Several regression and binary classification algorithms are
+    available in scikit-learn. A simple way to extend these algorithms
+    to the multi-class classification case is to use the so-called
+    one-vs-all scheme.
+
+    This function makes it possible to compute this transformation for a
+    fixed set of class labels known ahead of time.
+
+    Parameters
+    ----------
+    y : array-like
+        Sequence of integer labels or multilabel data to encode.
+
+    classes : array-like of shape (n_classes,)
+        Uniquely holds the label for each class.
+
+    neg_label : int, default=0
+        Value with which negative labels must be encoded.
+
+    pos_label : int, default=1
+        Value with which positive labels must be encoded.
+
+    sparse_output : bool, default=False,
+        Set to true if output binary array is desired in CSR sparse format.
+
+    Returns
+    -------
+    Y : {tensor, sparse tensor} of shape (n_samples, n_classes)
+        Shape will be (n_samples, 1) for binary problems.
+
+    Examples
+    --------
+    >>> from mars.learn.preprocessing import label_binarize
+    >>> label_binarize([1, 6], classes=[1, 2, 4, 6])
+    array([[1, 0, 0, 0],
+           [0, 0, 0, 1]])
+
+    The class ordering is preserved:
+
+    >>> label_binarize([1, 6], classes=[1, 6, 4, 2])
+    array([[1, 0, 0, 0],
+           [0, 1, 0, 0]])
+
+    Binary targets transform to a column vector
+
+    >>> label_binarize(['yes', 'no', 'no', 'yes'], classes=['no', 'yes'])
+    array([[1],
+           [0],
+           [0],
+           [1]])
+
+    See Also
+    --------
+    LabelBinarizer : Class used to wrap the functionality of label_binarize and
+        allow for fitting to classes independently of the transform operation.
+    """
+    op = LabelBinarize(y=y, classes=classes, neg_label=neg_label,
+                       pos_label=pos_label, sparse_output=sparse_output)
+    result = op(y, classes)
+    return result.execute() if execute else result
+
+
+def _inverse_binarize_multiclass(y, classes):  # pragma: no cover
+    """Inverse label binarization transformation for multiclass.
+
+    Multiclass uses the maximal score instead of a threshold.
+    """
+    classes = np.asarray(classes)
+
+    if sp.issparse(y):
+        # Find the argmax for each row in y where y is a CSR matrix
+
+        y = y.tocsr()
+        n_samples, n_outputs = y.shape
+        outputs = np.arange(n_outputs)
+        row_max = min_max_axis(y, 1)[1]
+        row_nnz = np.diff(y.indptr)
+
+        y_data_repeated_max = np.repeat(row_max, row_nnz)
+        # picks out all indices obtaining the maximum per row
+        y_i_all_argmax = np.flatnonzero(y_data_repeated_max == y.data)
+
+        # For corner case where last row has a max of 0
+        if row_max[-1] == 0:
+            y_i_all_argmax = np.append(y_i_all_argmax, [len(y.data)])
+
+        # Gets the index of the first argmax in each row from y_i_all_argmax
+        index_first_argmax = np.searchsorted(y_i_all_argmax, y.indptr[:-1])
+        # first argmax of each row
+        y_ind_ext = np.append(y.indices, [0])
+        y_i_argmax = y_ind_ext[y_i_all_argmax[index_first_argmax]]
+        # Handle rows of all 0
+        y_i_argmax[np.where(row_nnz == 0)[0]] = 0
+
+        # Handles rows with max of 0 that contain negative numbers
+        samples = np.arange(n_samples)[(row_nnz > 0) &
+                                       (row_max.ravel() == 0)]
+        for i in samples:
+            ind = y.indices[y.indptr[i]:y.indptr[i + 1]]
+            y_i_argmax[i] = classes[np.setdiff1d(outputs, ind)][0]
+
+        return classes[y_i_argmax]
+    else:
+        return classes.take(y.argmax(axis=1), mode="clip")
+
+
+def _inverse_binarize_thresholding(y, output_type, classes, threshold):  # pragma: no cover
+    """Inverse label binarization transformation using thresholding."""
+
+    if output_type == "binary" and y.ndim == 2 and y.shape[1] > 2:
+        raise ValueError("output_type='binary', but y.shape = {0}".
+                         format(y.shape))
+
+    if output_type != "binary" and y.shape[1] != len(classes):
+        raise ValueError("The number of class is not equal to the number of "
+                         "dimension of y.")
+
+    classes = np.asarray(classes)
+
+    # Perform thresholding
+    if sp.issparse(y):
+        if threshold > 0:
+            if y.format not in ('csr', 'csc'):
+                y = y.tocsr()
+            y.data = np.array(y.data > threshold, dtype=int)
+            y.eliminate_zeros()
+        else:
+            y = np.array(y.toarray() > threshold, dtype=int)
+    else:
+        y = np.array(y > threshold, dtype=int)
+
+    # Inverse transform data
+    if output_type == "binary":
+        if sp.issparse(y):
+            y = y.toarray()
+        if y.ndim == 2 and y.shape[1] == 2:
+            return classes[y[:, 1]]
+        else:
+            if len(classes) == 1:
+                return np.repeat(classes[0], len(y))
+            else:
+                return classes[y.ravel()]
+
+    elif output_type == "multilabel-indicator":
+        return y
+
+    else:
+        raise ValueError("{0} format is not supported".format(output_type))
diff --git a/mars/learn/preprocessing/tests/test_label.py b/mars/learn/preprocessing/tests/test_label.py
new file mode 100644
index 0000000000..4a640c5704
--- /dev/null
+++ b/mars/learn/preprocessing/tests/test_label.py
@@ -0,0 +1,258 @@
+# Copyright 1999-2021 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import pytest
+import scipy.sparse as sp
+from sklearn.preprocessing._label import _inverse_binarize_thresholding
+from sklearn.preprocessing._label import _inverse_binarize_multiclass
+from sklearn.utils._testing import assert_array_equal, ignore_warnings
+from sklearn.utils.multiclass import type_of_target
+
+from .... import tensor as mt
+from .. import LabelBinarizer, label_binarize
+
+
+def test_label_binarizer(setup):
+    # one-class case defaults to negative label
+    # For dense case:
+    inp = ["pos", "pos", "pos", "pos"]
+    lb = LabelBinarizer(sparse_output=False)
+    expected = np.array([[0, 0, 0, 0]]).T
+    got = lb.fit_transform(inp)
+    assert_array_equal(lb.classes_, ["pos"])
+    assert_array_equal(expected, got)
+    assert_array_equal(lb.inverse_transform(got), inp)
+
+    # For sparse case:
+    lb = LabelBinarizer(sparse_output=True)
+    got = lb.fit_transform(inp)
+    assert got.issparse()
+    assert_array_equal(lb.classes_, ["pos"])
+    assert_array_equal(expected, got.fetch().toarray())
+    assert_array_equal(lb.inverse_transform(got.todense()), inp)
+
+    lb = LabelBinarizer(sparse_output=False)
+    # two-class case
+    inp = ["neg", "pos", "pos", "neg"]
+    expected = np.array([[0, 1, 1, 0]]).T
+    got = lb.fit_transform(inp)
+    assert_array_equal(lb.classes_, ["neg", "pos"])
+    assert_array_equal(expected, got)
+
+    to_invert = np.array([[1, 0],
+                          [0, 1],
+                          [0, 1],
+                          [1, 0]])
+    assert_array_equal(lb.inverse_transform(to_invert), inp)
+
+    # multi-class case
+    inp = ["spam", "ham", "eggs", "ham", "0"]
+    expected = np.array([[0, 0, 0, 1],
+                         [0, 0, 1, 0],
+                         [0, 1, 0, 0],
+                         [0, 0, 1, 0],
+                         [1, 0, 0, 0]])
+    got = lb.fit_transform(inp)
+    assert_array_equal(lb.classes_, ['0', 'eggs', 'ham', 'spam'])
+    assert_array_equal(expected, got)
+    assert_array_equal(lb.inverse_transform(got), inp)
+
+
+def test_label_binarizer_set_label_encoding(setup):
+    lb = LabelBinarizer(neg_label=-2, pos_label=0)
+
+    # two-class case with pos_label=0
+    inp = np.array([0, 1, 1, 0])
+    expected = np.array([[-2, 0, 0, -2]]).T
+    got = lb.fit_transform(mt.tensor(inp))
+    assert_array_equal(expected, got)
+    assert_array_equal(lb.inverse_transform(got), inp)
+
+    lb = LabelBinarizer(neg_label=-2, pos_label=2)
+
+    # multi-class case
+    inp = np.array([3, 2, 1, 2, 0])
+    expected = np.array([[-2, -2, -2, +2],
+                         [-2, -2, +2, -2],
+                         [-2, +2, -2, -2],
+                         [-2, -2, +2, -2],
+                         [+2, -2, -2, -2]])
+    got = lb.fit_transform(inp)
+    assert_array_equal(expected, got)
+    assert_array_equal(lb.inverse_transform(got), inp)
+
+
+@ignore_warnings
+def test_label_binarizer_errors(setup):
+    # Check that invalid arguments yield ValueError
+    one_class = np.array([0, 0, 0, 0])
+    lb = LabelBinarizer().fit(one_class)
+
+    multi_label = [(2, 3), (0,), (0, 2)]
+    with pytest.raises(ValueError):
+        lb.transform(multi_label)
+
+    lb = LabelBinarizer()
+    with pytest.raises(ValueError):
+        lb.transform([])
+    with pytest.raises(ValueError):
+        lb.inverse_transform([])
+
+    with pytest.raises(ValueError):
+        LabelBinarizer(neg_label=2, pos_label=1)
+    with pytest.raises(ValueError):
+        LabelBinarizer(neg_label=2, pos_label=2)
+
+    with pytest.raises(ValueError):
+        LabelBinarizer(neg_label=1, pos_label=2, sparse_output=True)
+
+    # Sequence of seq type should raise ValueError
+    y_seq_of_seqs = [[], [1, 2], [3], [0, 1, 3], [2]]
+    with pytest.raises(ValueError):
+        LabelBinarizer().fit_transform(y_seq_of_seqs)
+
+    # Fail on multioutput data
+    with pytest.raises(ValueError):
+        LabelBinarizer().fit(np.array([[1, 3], [2, 1]]))
+    with pytest.raises(ValueError):
+        label_binarize(np.array([[1, 3], [2, 1]]), classes=[1, 2, 3])
+
+
+def test_label_binarize_with_class_order(setup):
+    out = label_binarize([1, 6], classes=[1, 2, 4, 6])
+    expected = np.array([[1, 0, 0, 0], [0, 0, 0, 1]])
+    assert_array_equal(out, expected)
+
+    # Modified class order
+    out = label_binarize([1, 6], classes=[1, 6, 4, 2])
+    expected = np.array([[1, 0, 0, 0], [0, 1, 0, 0]])
+    assert_array_equal(out, expected)
+
+    out = label_binarize([0, 1, 2, 3], classes=[3, 2, 0, 1])
+    expected = np.array([[0, 0, 1, 0],
+                         [0, 0, 0, 1],
+                         [0, 1, 0, 0],
+                         [1, 0, 0, 0]])
+    assert_array_equal(out, expected)
+
+
+def toarray(a):
+    if hasattr(a, "toarray"):
+        a = a.toarray()
+    return a
+
+
+def check_binarized_results(y, classes, pos_label, neg_label, expected):
+    for sparse_output in [True, False]:
+        if ((pos_label == 0 or neg_label != 0) and sparse_output):
+            with pytest.raises(ValueError):
+                label_binarize(y, classes=classes, neg_label=neg_label,
+                               pos_label=pos_label,
+                               sparse_output=sparse_output)
+            continue
+
+        # check label_binarize
+        binarized = label_binarize(y, classes=classes, neg_label=neg_label,
+                                   pos_label=pos_label,
+                                   sparse_output=sparse_output)
+        binarized = binarized.fetch()
+        if hasattr(binarized, 'raw'):
+            binarized = binarized.raw
+        assert_array_equal(toarray(binarized), expected)
+        assert sp.issparse(binarized) == sparse_output
+
+        # check inverse
+        y_type = type_of_target(y)
+        if y_type == "multiclass":
+            inversed = _inverse_binarize_multiclass(binarized, classes=classes)
+
+        else:
+            inversed = _inverse_binarize_thresholding(binarized,
+                                                      output_type=y_type,
+                                                      classes=classes,
+                                                      threshold=((neg_label +
+                                                                  pos_label) /
+                                                                 2.))
+
+        assert_array_equal(toarray(inversed), toarray(y))
+
+        # Check label binarizer
+        lb = LabelBinarizer(neg_label=neg_label, pos_label=pos_label,
+                            sparse_output=sparse_output)
+        binarized = lb.fit_transform(y)
+        assert_array_equal(toarray(binarized), expected)
+        assert binarized.issparse() == sparse_output
+        inverse_output = lb.inverse_transform(binarized)
+        assert_array_equal(toarray(inverse_output), toarray(y))
+        assert inverse_output.issparse() == sp.issparse(y)
+
+
+def test_label_binarize_binary(setup):
+    y = [0, 1, 0]
+    classes = [0, 1]
+    pos_label = 2
+    neg_label = -1
+    expected = np.array([[2, -1], [-1, 2], [2, -1]])[:, 1].reshape((-1, 1))
+
+    check_binarized_results(y, classes, pos_label, neg_label, expected)
+
+    # Binary case where sparse_output = True will not result in a ValueError
+    y = [0, 1, 0]
+    classes = [0, 1]
+    pos_label = 3
+    neg_label = 0
+    expected = np.array([[3, 0], [0, 3], [3, 0]])[:, 1].reshape((-1, 1))
+
+    check_binarized_results(y, classes, pos_label, neg_label, expected)
+
+
+def test_label_binarize_multiclass(setup):
+    y = [0, 1, 2]
+    classes = [0, 1, 2]
+    pos_label = 2
+    neg_label = 0
+    expected = 2 * np.eye(3)
+
+    check_binarized_results(y, classes, pos_label, neg_label, expected)
+
+    with pytest.raises(ValueError):
+        label_binarize(y, classes=classes, neg_label=-1, pos_label=pos_label,
+                       sparse_output=True)
+
+
+def test_label_binarize_multilabel(setup):
+    y_ind = np.array([[0, 1, 0], [1, 1, 1], [0, 0, 0]])
+    classes = [0, 1, 2]
+    pos_label = 2
+    neg_label = 0
+    expected = pos_label * y_ind
+    y_sparse = [sp.csr_matrix(y_ind)]
+
+    for y in [y_ind] + y_sparse:
+        check_binarized_results(y, classes, pos_label, neg_label,
+                                expected)
+
+    with pytest.raises(ValueError):
+        label_binarize(y, classes=classes, neg_label=-1, pos_label=pos_label,
+                       sparse_output=True)
+
+
+def test_invalid_input_label_binarize(setup):
+    with pytest.raises(ValueError):
+        label_binarize([0, 2], classes=[0, 2], pos_label=0, neg_label=1)
+    with pytest.raises(ValueError, match="continuous target data is not "):
+        label_binarize([1.2, 2.7], classes=[0, 1])
+    with pytest.raises(ValueError, match="mismatch with the labels"):
+        label_binarize([[1, 3]], classes=[1, 2, 3])
diff --git a/mars/tensor/base/map_chunk.py b/mars/tensor/base/map_chunk.py
index 19aaf1d945..d43081901b 100644
--- a/mars/tensor/base/map_chunk.py
+++ b/mars/tensor/base/map_chunk.py
@@ -66,7 +66,7 @@ def _set_inputs(self, inputs):
         self._args = replace_objects(self._args, mapping)
         self._kwargs = replace_objects(self._kwargs, mapping)
 
-    def __call__(self, t, dtype=None):
+    def __call__(self, t, dtype=None, shape=None):
         if dtype is None:
             try:
                 kwargs = self.kwargs or dict()
@@ -79,7 +79,10 @@ def __call__(self, t, dtype=None):
                 raise TypeError('Cannot estimate output type of map_chunk call')
             dtype = mock_result.dtype
 
-        new_shape = t.shape if self.elementwise else (np.nan,) * t.ndim
+        if shape is not None:
+            new_shape = shape
+        else:
+            new_shape = t.shape if self.elementwise else (np.nan,) * t.ndim
         inputs = [t] + find_objects(self.args, ENTITY_TYPE) + \
                  find_objects(self.kwargs, ENTITY_TYPE)
         return self.new_tensor(inputs, dtype=dtype, shape=new_shape)
@@ -100,9 +103,10 @@ def tile(cls, op: 'TensorMapChunk'):
         chunks = []
         for c in inp.chunks:
             params = c.params
-            params['dtype'] = inp.dtype
+            params['dtype'] = out.dtype
             if not op.elementwise:
-                params['shape'] = (np.nan,) * c.ndim
+                params['shape'] = (np.nan,) * out.ndim
+                params['index'] = params['index'][:out.ndim]
 
             new_op = op.copy().reset_key()
             new_op.tileable_op_key = out.key
@@ -113,7 +117,7 @@ def tile(cls, op: 'TensorMapChunk'):
 
         new_op = op.copy().reset_key()
         params = out.params
-        nsplits = inp.nsplits
+        nsplits = inp.nsplits[:out.ndim]
         if not op.elementwise:
             nsplits = tuple((np.nan,) * len(sp) for sp in nsplits)
         return new_op.new_tileables([inp], chunks=chunks, nsplits=nsplits, **params)
@@ -176,8 +180,9 @@ def map_chunk(t, func, args=(), **kwargs):
     """
     elementwise = kwargs.pop('elementwise', None)
     dtype = np.dtype(kwargs.pop('dtype')) if 'dtype' in kwargs else None
+    shape = kwargs.pop('shape', None)
     with_chunk_index = kwargs.pop('with_chunk_index', False)
 
     op = TensorMapChunk(func=func, args=args, kwargs=kwargs, elementwise=elementwise,
                         with_chunk_index=with_chunk_index)
-    return op(t, dtype=dtype)
+    return op(t, dtype=dtype, shape=shape)
diff --git a/mars/tensor/datasource/array.py b/mars/tensor/datasource/array.py
index 4bb9d80547..2c46941a75 100644
--- a/mars/tensor/datasource/array.py
+++ b/mars/tensor/datasource/array.py
@@ -265,7 +265,7 @@ def array(x, dtype=None, copy=True, order='K', ndmin=None, chunk_size=None):
         x = x.copy(order=order)
     elif not copy and isinstance(raw_x, TENSOR_TYPE) and raw_x.dtype == x.dtype and \
             raw_x.order == x.order and raw_x.shape == x.shape and \
-            raw_x is not x:
+            raw_x is not x and hasattr(raw_x, 'data'):
         raw_x.data = x.data
 
     return x