diff --git a/include/LightGBM/boosting.h b/include/LightGBM/boosting.h
index 31bb430f0aed..c329f5e12186 100644
--- a/include/LightGBM/boosting.h
+++ b/include/LightGBM/boosting.h
@@ -166,10 +166,11 @@ class LIGHTGBM_EXPORT Boosting {
   * \brief Feature contributions for the model's prediction of one record
   * \param feature_values Feature value on this record
   * \param output Prediction result for this record
-  * \param early_stop Early stopping instance. If nullptr, no early stopping is applied and all models are evaluated.
   */
-  virtual void PredictContrib(const double* features, double* output,
-                              const PredictionEarlyStopInstance* early_stop) const = 0;
+  virtual void PredictContrib(const double* features, double* output) const = 0;
+
+  virtual void PredictContribByMap(const std::unordered_map<int, double>& features,
+				   std::vector<std::unordered_map<int, double>>& output) const = 0;
 
   /*!
   * \brief Dump model to json format string
diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h
index 6a30fce495c5..c7bbd4d13c95 100644
--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -727,6 +727,45 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSR(BoosterHandle handle,
                                                 int64_t* out_len,
                                                 double* out_result);
 
+/*!
+ * \brief Make sparse prediction for a new dataset in CSR format.  Currently only used for feature contributions.
+ * \note
+ * The outputs are pre-allocated, as they can vary for each invocation, but the shape should be the same:
+ *   - for feature contributions, the shape of sparse matrix will be ``num_class * num_data * (num_feature + 1)``.
+ * \param handle Handle of booster
+ * \param indptr Pointer to row headers
+ * \param indptr_type Type of ``indptr``, can be ``C_API_DTYPE_INT32`` or ``C_API_DTYPE_INT64``
+ * \param indices Pointer to column indices
+ * \param data Pointer to the data space
+ * \param data_type Type of ``data`` pointer, can be ``C_API_DTYPE_FLOAT32`` or ``C_API_DTYPE_FLOAT64``
+ * \param nindptr Number of rows in the matrix + 1
+ * \param nelem Number of nonzero elements in the matrix
+ * \param num_col Number of columns
+ * \param predict_type What should be predicted, only feature contributions supported currently
+ *   - ``C_API_PREDICT_CONTRIB``: feature contributions (SHAP values)
+ * \param num_iteration Number of iterations for prediction, <= 0 means no limit
+ * \param parameter Other parameters for prediction, e.g. early stopping for prediction
+ * \param[out] out_len Length of output result
+ * \param[out] out_indices Pointer to sparse indices
+ * \param[out] out_data Pointer to sparse data space
+ * \return 0 when succeed, -1 when failure happens
+ */
+LIGHTGBM_C_EXPORT int LGBM_BoosterPredictSparseForCSR(BoosterHandle handle,
+						      const void* indptr,
+						      int indptr_type,
+						      const int32_t* indices,
+						      const void* data,
+						      int data_type,
+						      int64_t nindptr,
+						      int64_t nelem,
+						      int64_t num_col,
+						      int predict_type,
+						      int num_iteration,
+						      const char* parameter,
+						      int64_t* out_len,
+						      int32_t** out_indices,
+						      void** out_data);
+
 /*!
  * \brief Make prediction for a new dataset in CSR format. This method re-uses the internal predictor structure
  *        from previous calls and is optimized for single row invocation.
@@ -812,6 +851,45 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSC(BoosterHandle handle,
                                                 int64_t* out_len,
                                                 double* out_result);
 
+/*!
+ * \brief Make sparse prediction for a new dataset in CSC format.  Currently only used for feature contributions.
+ * \note
+ * The outputs are pre-allocated, as they can vary for each invocation, but the shape should be the same:
+ *   - for feature contributions, the shape of sparse matrix will be ``num_class * num_data * (num_feature + 1)``.
+ * \param handle Handle of booster
+ * \param col_ptr Pointer to column headers
+ * \param col_ptr_type Type of ``col_ptr``, can be ``C_API_DTYPE_INT32`` or ``C_API_DTYPE_INT64``
+ * \param indices Pointer to row indices
+ * \param data Pointer to the data space
+ * \param data_type Type of ``data`` pointer, can be ``C_API_DTYPE_FLOAT32`` or ``C_API_DTYPE_FLOAT64``
+ * \param ncol_ptr Number of columns in the matrix + 1
+ * \param nelem Number of nonzero elements in the matrix
+ * \param num_row Number of rows
+ * \param predict_type What should be predicted
+ *   - ``C_API_PREDICT_CONTRIB``: feature contributions (SHAP values)
+ * \param num_iteration Number of iteration for prediction, <= 0 means no limit
+ * \param parameter Other parameters for prediction, e.g. early stopping for prediction
+ * \param[out] out_len Length of output result
+ * \param[out] out_indices Pointer to sparse indices
+ * \param[out] out_data Pointer to sparse data space
+ * \return 0 when succeed, -1 when failure happens
+ */
+LIGHTGBM_C_EXPORT int LGBM_BoosterPredictSparseForCSC(BoosterHandle handle,
+						      const void* col_ptr,
+						      int col_ptr_type,
+						      const int32_t* indices,
+						      const void* data,
+						      int data_type,
+						      int64_t ncol_ptr,
+						      int64_t nelem,
+						      int64_t num_row,
+						      int predict_type,
+						      int num_iteration,
+						      const char* parameter,
+						      int64_t* out_len,
+						      int32_t** out_indices,
+						      void** out_data);
+
 /*!
  * \brief Make prediction for a new dataset.
  * \note
diff --git a/include/LightGBM/meta.h b/include/LightGBM/meta.h
index b15b1ba4b378..7edb0c850a49 100644
--- a/include/LightGBM/meta.h
+++ b/include/LightGBM/meta.h
@@ -11,6 +11,7 @@
 #include <memory>
 #include <utility>
 #include <vector>
+#include <unordered_map>
 
 #if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))) || defined(__INTEL_COMPILER) || MM_PREFETCH
   #include <xmmintrin.h>
@@ -58,6 +59,9 @@ typedef int32_t comm_size_t;
 using PredictFunction =
 std::function<void(const std::vector<std::pair<int, double>>&, double* output)>;
 
+using PredictSparseFunction =
+std::function<void(const std::vector<std::pair<int, double>>&, std::vector<std::unordered_map<int, double>>& output)>;
+
 typedef void(*ReduceFunction)(const char* input, char* output, int type_size, comm_size_t array_size);
 
 
diff --git a/include/LightGBM/tree.h b/include/LightGBM/tree.h
index 047215231fc6..b37e26af55b4 100644
--- a/include/LightGBM/tree.h
+++ b/include/LightGBM/tree.h
@@ -135,6 +135,8 @@ class Tree {
   inline int PredictLeafIndexByMap(const std::unordered_map<int, double>& feature_values) const;
 
   inline void PredictContrib(const double* feature_values, int num_features, double* output);
+  inline void PredictContribByMap(const std::unordered_map<int, double>& feature_values,
+				  int num_features, std::unordered_map<int, double>& output);
 
   /*! \brief Get Number of leaves*/
   inline int num_leaves() const { return num_leaves_; }
@@ -382,6 +384,12 @@ class Tree {
                 PathElement *parent_unique_path, double parent_zero_fraction,
                 double parent_one_fraction, int parent_feature_index) const;
 
+  void TreeSHAPByMap(const std::unordered_map<int, double>& feature_values,
+		     std::unordered_map<int, double>& phi,
+		     int node, int unique_depth,
+		     PathElement *parent_unique_path, double parent_zero_fraction,
+		     double parent_one_fraction, int parent_feature_index) const;
+
   /*! \brief Extend our decision path with a fraction of one and zero extensions for TreeSHAP*/
   static void ExtendPath(PathElement *unique_path, int unique_depth,
                          double zero_fraction, double one_fraction, int feature_index);
@@ -525,6 +533,18 @@ inline void Tree::PredictContrib(const double* feature_values, int num_features,
   }
 }
 
+inline void Tree::PredictContribByMap(const std::unordered_map<int, double>& feature_values,
+				      int num_features, std::unordered_map<int, double>& output) {
+  output[num_features] += ExpectedValue();
+  // Run the recursion with preallocated space for the unique path data
+  if (num_leaves_ > 1) {
+    CHECK_GE(max_depth_, 0);
+    const int max_path_len = max_depth_ + 1;
+    std::vector<PathElement> unique_path_data(max_path_len*(max_path_len + 1) / 2);
+    TreeSHAPByMap(feature_values, output, 0, 0, unique_path_data.data(), 1, 1, -1);
+  }
+}
+
 inline void Tree::RecomputeLeafDepths(int node, int depth) {
   if (node == 0) leaf_depth_.resize(num_leaves());
   if (node < 0) {
diff --git a/src/application/predictor.hpp b/src/application/predictor.hpp
index 1c56cfa5eb2c..025843bcce47 100644
--- a/src/application/predictor.hpp
+++ b/src/application/predictor.hpp
@@ -87,13 +87,19 @@ class Predictor {
       predict_fun_ = [=](const std::vector<std::pair<int, double>>& features,
                          double* output) {
         int tid = omp_get_thread_num();
-        CopyToPredictBuffer(predict_buf_[tid].data(), features);
-        // get result for leaf index
-        boosting_->PredictContrib(predict_buf_[tid].data(), output,
-                                  &early_stop_);
-        ClearPredictBuffer(predict_buf_[tid].data(), predict_buf_[tid].size(),
-                           features);
+	CopyToPredictBuffer(predict_buf_[tid].data(), features);
+	// get feature importances
+	boosting_->PredictContrib(predict_buf_[tid].data(), output);
+	ClearPredictBuffer(predict_buf_[tid].data(), predict_buf_[tid].size(),
+			   features);
+      };
+      predict_sparse_fun_ = [=](const std::vector<std::pair<int, double>>& features,
+			        std::vector<std::unordered_map<int, double>>& output) {
+	auto buf = CopyToPredictMap(features);
+	// get sparse feature importances
+	boosting_->PredictContribByMap(buf, output);
       };
+      
     } else {
       if (is_raw_score) {
         predict_fun_ = [=](const std::vector<std::pair<int, double>>& features,
@@ -140,6 +146,11 @@ class Predictor {
     return predict_fun_;
   }
 
+
+  inline const PredictSparseFunction& GetPredictSparseFunction() const {
+    return predict_sparse_fun_;
+  }
+  
   /*!
   * \brief predicting on data, then saving result to disk
   * \param data_filename Filename of data
@@ -275,6 +286,7 @@ class Predictor {
   const Boosting* boosting_;
   /*! \brief function for prediction */
   PredictFunction predict_fun_;
+  PredictSparseFunction predict_sparse_fun_;
   PredictionEarlyStopInstance early_stop_;
   int num_feature_;
   int num_pred_one_row_;
diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index 6a2e3e27c791..9750ead51802 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -571,8 +571,7 @@ const double* GBDT::GetTrainingScore(int64_t* out_len) {
   return train_score_updater_->score();
 }
 
-void GBDT::PredictContrib(const double* features, double* output, const PredictionEarlyStopInstance* early_stop) const {
-  int early_stop_round_counter = 0;
+void GBDT::PredictContrib(const double* features, double* output) const {
   // set zero
   const int num_features = max_feature_idx_ + 1;
   std::memset(output, 0, sizeof(double) * num_tree_per_iteration_ * (num_features + 1));
@@ -581,17 +580,20 @@ void GBDT::PredictContrib(const double* features, double* output, const Predicti
     for (int k = 0; k < num_tree_per_iteration_; ++k) {
       models_[i * num_tree_per_iteration_ + k]->PredictContrib(features, num_features, output + k*(num_features + 1));
     }
-    // check early stopping
-    ++early_stop_round_counter;
-    if (early_stop->round_period == early_stop_round_counter) {
-      if (early_stop->callback_function(output, num_tree_per_iteration_)) {
-        return;
-      }
-      early_stop_round_counter = 0;
-    }
   }
 }
 
+void GBDT::PredictContribByMap(const std::unordered_map<int, double>& features,
+			       std::vector<std::unordered_map<int, double>>& output) const {
+  const int num_features = max_feature_idx_ + 1;
+  for (int i = 0; i < num_iteration_for_pred_; ++i) {
+    // predict all the trees for one iteration
+    for (int k = 0; k < num_tree_per_iteration_; ++k) {
+      models_[i * num_tree_per_iteration_ + k]->PredictContribByMap(features, num_features, output[k]);
+    }
+  }
+}
+  
 void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
   CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_score_updater_.size()));
 
diff --git a/src/boosting/gbdt.h b/src/boosting/gbdt.h
index 67c30c86be2e..2069708ff1e8 100644
--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -240,8 +240,10 @@ class GBDT : public GBDTBase {
 
   void PredictLeafIndexByMap(const std::unordered_map<int, double>& features, double* output) const override;
 
-  void PredictContrib(const double* features, double* output,
-                      const PredictionEarlyStopInstance* earlyStop) const override;
+  void PredictContrib(const double* features, double* output) const override;
+
+  void PredictContribByMap(const std::unordered_map<int, double>& features,
+			   std::vector<std::unordered_map<int, double>>& output) const override;
 
   /*!
   * \brief Dump model to json format string
diff --git a/src/io/tree.cpp b/src/io/tree.cpp
index be928b7e3124..08c21eb767ef 100644
--- a/src/io/tree.cpp
+++ b/src/io/tree.cpp
@@ -727,6 +727,56 @@ void Tree::TreeSHAP(const double *feature_values, double *phi,
   }
 }
 
+// recursive sparse computation of SHAP values for a decision tree
+void Tree::TreeSHAPByMap(const std::unordered_map<int, double>& feature_values, std::unordered_map<int, double>& phi,
+			 int node, int unique_depth,
+			 PathElement *parent_unique_path, double parent_zero_fraction,
+			 double parent_one_fraction, int parent_feature_index) const {
+  // extend the unique path
+  PathElement* unique_path = parent_unique_path + unique_depth;
+  if (unique_depth > 0) std::copy(parent_unique_path, parent_unique_path + unique_depth, unique_path);
+  ExtendPath(unique_path, unique_depth, parent_zero_fraction,
+             parent_one_fraction, parent_feature_index);
+
+  // leaf node
+  if (node < 0) {
+    for (int i = 1; i <= unique_depth; ++i) {
+      const double w = UnwoundPathSum(unique_path, unique_depth, i);
+      const PathElement &el = unique_path[i];
+      phi[el.feature_index] += w*(el.one_fraction - el.zero_fraction)*leaf_value_[~node];
+    }
+
+    // internal node
+  } else {
+    const int hot_index = Decision(feature_values.count(split_feature_[node]) > 0 ? feature_values.at(split_feature_[node]) : 0.0f, node);
+    const int cold_index = (hot_index == left_child_[node] ? right_child_[node] : left_child_[node]);
+    const double w = data_count(node);
+    const double hot_zero_fraction = data_count(hot_index) / w;
+    const double cold_zero_fraction = data_count(cold_index) / w;
+    double incoming_zero_fraction = 1;
+    double incoming_one_fraction = 1;
+
+    // see if we have already split on this feature,
+    // if so we undo that split so we can redo it for this node
+    int path_index = 0;
+    for (; path_index <= unique_depth; ++path_index) {
+      if (unique_path[path_index].feature_index == split_feature_[node]) break;
+    }
+    if (path_index != unique_depth + 1) {
+      incoming_zero_fraction = unique_path[path_index].zero_fraction;
+      incoming_one_fraction = unique_path[path_index].one_fraction;
+      UnwindPath(unique_path, unique_depth, path_index);
+      unique_depth -= 1;
+    }
+
+    TreeSHAPByMap(feature_values, phi, hot_index, unique_depth + 1, unique_path,
+		  hot_zero_fraction*incoming_zero_fraction, incoming_one_fraction, split_feature_[node]);
+
+    TreeSHAPByMap(feature_values, phi, cold_index, unique_depth + 1, unique_path,
+		  cold_zero_fraction*incoming_zero_fraction, 0, split_feature_[node]);
+  }
+}
+ 
 double Tree::ExpectedValue() const {
   if (num_leaves_ == 1) return LeafOutput(0);
   const double total_count = internal_count_[0];