diff --git a/algorithms/linfa-ensemble/Cargo.toml b/algorithms/linfa-ensemble/Cargo.toml
index c5440506c..9985c6c1c 100644
--- a/algorithms/linfa-ensemble/Cargo.toml
+++ b/algorithms/linfa-ensemble/Cargo.toml
@@ -30,7 +30,10 @@ linfa-datasets = { version = "0.7.0", path = "../../datasets/", features = ["iri
 ndarray = { version = "0.15" , features = ["rayon", "approx"]}
 ndarray-rand = "0.14"
 rand = { version = "0.8", features = ["small_rng"] }
+approx = {version = "0.5"}
 
 [dev-dependencies]
 rand = { version = "0.8", features = ["small_rng"] }
-linfa-datasets = { version = "0.7.0", path = "../../datasets/", features = ["iris"] }
\ No newline at end of file
+linfa-datasets = { version = "0.7.0", path = "../../datasets/", features = ["iris"] }
+ndarray = { version = "0.15" , features = ["rayon", "approx"]}
+approx = {version = "0.5"}
\ No newline at end of file
diff --git a/algorithms/linfa-ensemble/examples/adaboost.rs b/algorithms/linfa-ensemble/examples/adaboost.rs
index 2fa0ea019..69bd2fa78 100644
--- a/algorithms/linfa-ensemble/examples/adaboost.rs
+++ b/algorithms/linfa-ensemble/examples/adaboost.rs
@@ -8,7 +8,6 @@ use rand::rngs::SmallRng;
 use linfa::prelude::*;
 use linfa_ensemble::{Adaboost, Result};
 
-
 fn main() -> Result<()> {
     let mut rng = SmallRng::seed_from_u64(42);
 
@@ -17,9 +16,15 @@ fn main() -> Result<()> {
         .split_with_ratio(0.8);
 
     println!("Training model with Adaboost ...");
-    let ada_model = Adaboost::<f64,usize>::params().n_estimators(10)
-    .d_tree_params(DecisionTreeParams::new().max_depth(Some(2)).min_weight_leaf(0.00001).min_weight_split(0.00001))
-    .fit(&train)?;
+    let ada_model = Adaboost::<f64, usize>::params()
+        .n_estimators(10)
+        .d_tree_params(
+            DecisionTreeParams::new()
+                .max_depth(Some(2))
+                .min_weight_leaf(0.00001)
+                .min_weight_split(0.00001),
+        )
+        .fit(&train)?;
 
     let ada_pred_y = ada_model.predict(&test);
     let cm = ada_pred_y.confusion_matrix(&test)?;
@@ -32,4 +37,4 @@ fn main() -> Result<()> {
     );
 
     Ok(())
-}
\ No newline at end of file
+}
diff --git a/algorithms/linfa-ensemble/examples/random_forest.rs b/algorithms/linfa-ensemble/examples/random_forest.rs
index 3361d218d..5edf25391 100644
--- a/algorithms/linfa-ensemble/examples/random_forest.rs
+++ b/algorithms/linfa-ensemble/examples/random_forest.rs
@@ -1,38 +1,37 @@
 //! Random Forest
-use ndarray_rand::rand::SeedableRng;
-use rand::rngs::SmallRng;
-use linfa_trees::DecisionTree;
-use linfa::traits::Fit;
 use linfa::prelude::{Predict, ToConfusionMatrix};
+use linfa::traits::Fit;
 use linfa_ensemble::EnsembleLearnerParams;
+use linfa_trees::DecisionTree;
+use ndarray_rand::rand::SeedableRng;
+use rand::rngs::SmallRng;
 
 fn main() {
+    //Number of models in the ensemble
+    let ensemble_size = 100;
+    //Proportion of training data given to each model
+    let bootstrap_proportion = 0.7;
 
-  //Number of models in the ensemble
-  let ensemble_size = 100;
-  //Proportion of training data given to each model
-  let bootstrap_proportion = 0.7;
-
-  //Load dataset
-  let mut rng = SmallRng::seed_from_u64(42);
-  let (train, test) = linfa_datasets::iris()
-      .shuffle(&mut rng)
-      .split_with_ratio(0.7);
+    //Load dataset
+    let mut rng = SmallRng::seed_from_u64(42);
+    let (train, test) = linfa_datasets::iris()
+        .shuffle(&mut rng)
+        .split_with_ratio(0.7);
 
-  //Train ensemble learner model
-  let model = EnsembleLearnerParams::new(DecisionTree::params())
-      .ensemble_size(ensemble_size)
-      .bootstrap_proportion(bootstrap_proportion)
-      .fit(&train)
-      .unwrap();
+    //Train ensemble learner model
+    let model = EnsembleLearnerParams::new(DecisionTree::params())
+        .ensemble_size(ensemble_size)
+        .bootstrap_proportion(bootstrap_proportion)
+        .fit(&train)
+        .unwrap();
 
-  //Return highest ranking predictions
-  let final_predictions_ensemble = model.predict(&test);
-  println!("Final Predictions: \n{:?}", final_predictions_ensemble);
+    //Return highest ranking predictions
+    let final_predictions_ensemble = model.predict(&test);
+    println!("Final Predictions: \n{:?}", final_predictions_ensemble);
 
-  let cm = final_predictions_ensemble.confusion_matrix(&test).unwrap();
+    let cm = final_predictions_ensemble.confusion_matrix(&test).unwrap();
 
-  println!("{:?}", cm);
-  println!("Test accuracy: {} \n with default Decision Tree params, \n Ensemble Size: {},\n Bootstrap Proportion: {}",
+    println!("{:?}", cm);
+    println!("Test accuracy: {} \n with default Decision Tree params, \n Ensemble Size: {},\n Bootstrap Proportion: {}",
   100.0 * cm.accuracy(), ensemble_size, bootstrap_proportion);
-}
\ No newline at end of file
+}
diff --git a/algorithms/linfa-ensemble/src/adaboost/algorithm.rs b/algorithms/linfa-ensemble/src/adaboost/algorithm.rs
index e6a48c1d1..6592096c7 100644
--- a/algorithms/linfa-ensemble/src/adaboost/algorithm.rs
+++ b/algorithms/linfa-ensemble/src/adaboost/algorithm.rs
@@ -1,19 +1,13 @@
-use std::{collections::{HashMap}, iter::zip};
+use std::{collections::HashMap, iter::zip};
 
-use linfa_trees::{DecisionTree};
-use linfa::{
-    dataset::{Labels},
-    error::Error,
-    error::Result,
-    traits::*,
-    DatasetBase, Float, Label,
-};
+use linfa::{dataset::Labels, error::Error, error::Result, traits::*, DatasetBase, Float, Label};
+use linfa_trees::DecisionTree;
 
-#[cfg(feature = "serde")]
-use serde_crate::{Deserialize, Serialize};
+use super::AdaboostValidParams;
 use linfa::dataset::AsSingleTargets;
-use super::{AdaboostValidParams};
 use ndarray::{Array1, ArrayBase, Data, Ix2};
+#[cfg(feature = "serde")]
+use serde_crate::{Deserialize, Serialize};
 // adaboost will be a vector of stumps
 
 // stump will contain a decision tree and a weight associated with that stump
@@ -25,13 +19,13 @@ use ndarray::{Array1, ArrayBase, Data, Ix2};
     serde(crate = "serde_crate")
 )]
 #[derive(Debug, Clone, PartialEq)]
-pub struct Stump<F: Float,L: Label> {
-    tree: DecisionTree<F,L>,
+pub struct Stump<F: Float, L: Label> {
+    tree: DecisionTree<F, L>,
     weight: f32,
 }
 
-impl <F: Float, L: Label + std::fmt::Debug> Stump<F,L> {
-    fn make_stump(tree: DecisionTree<F,L> ,weight: f32) -> Self {
+impl<F: Float, L: Label + std::fmt::Debug> Stump<F, L> {
+    fn make_stump(tree: DecisionTree<F, L>, weight: f32) -> Self {
         Stump { tree, weight }
     }
 }
@@ -42,8 +36,8 @@ impl <F: Float, L: Label + std::fmt::Debug> Stump<F,L> {
     serde(crate = "serde_crate")
 )]
 #[derive(Debug, Clone, PartialEq)]
-pub struct Adaboost<F: Float,L: Label> {
-    stumps: Vec<Stump<F,L>>,
+pub struct Adaboost<F: Float, L: Label> {
+    stumps: Vec<Stump<F, L>>,
 }
 
 impl<F: Float, L: Label + Default, D: Data<Elem = F>> PredictInplace<ArrayBase<D, Ix2>, Array1<L>>
@@ -58,7 +52,7 @@ impl<F: Float, L: Label + Default, D: Data<Elem = F>> PredictInplace<ArrayBase<D
         );
 
         // Create a vector that has a hashmap with key as label and value as the weight for that label to hold the aggregate of the predictions from every stump for every data record
-        let mut map: Vec<HashMap<L,f32>> = Vec::new();
+        let mut map: Vec<HashMap<L, f32>> = Vec::new();
         for stump in self.stumps.iter() {
             // go over each and aggregate the weights of the stump in a hashmap
             for pred in stump.tree.predict(x).iter() {
@@ -90,9 +84,8 @@ impl<F: Float, L: Label + Default, D: Data<Elem = F>> PredictInplace<ArrayBase<D
     }
 }
 
-
 impl<'a, F: Float, L: Label + 'a + std::fmt::Debug, D, T> Fit<ArrayBase<D, Ix2>, T, Error>
-    for AdaboostValidParams<F,L>
+    for AdaboostValidParams<F, L>
 where
     D: Data<Elem = F>,
     T: AsSingleTargets<Elem = L> + Labels<Elem = L>,
@@ -108,21 +101,25 @@ where
         let weights = vec![sample_weight; dataset.records().nrows()];
 
         // updating the dataset to have the weights by creating a new dataset
-        let mut data = DatasetBase::new(dataset.records.view().clone(), dataset.targets.as_targets().clone()).with_feature_names(dataset.feature_names().clone()).with_weights(Array1::from_vec(weights));
+        let mut data = DatasetBase::new(
+            dataset.records.view().clone(),
+            dataset.targets.as_targets().clone(),
+        )
+        .with_feature_names(dataset.feature_names().clone())
+        .with_weights(Array1::from_vec(weights));
 
         // for lifetime purpose
         let binding = dataset.targets.as_targets();
         // collect all the different unique classes
         let classes: std::collections::HashSet<&L> = binding.iter().collect();
         let num_classes = classes.len();
-        
+
         // lowest f32 value allowed
         let eps = f32::EPSILON;
         let differential = 1.0 - eps;
 
-        let mut stumps: Vec<Stump<F,L>> = Vec::new();
+        let mut stumps: Vec<Stump<F, L>> = Vec::new();
         for i in 0..self.n_estimators() {
-            
             let tree_params = self.d_tree_params();
             let tree = tree_params.fit(&data)?;
             // Debug:
@@ -134,58 +131,63 @@ where
             // predict the data and accumulate the error for wrongly predicted samples
             let predictions = tree.predict(&data);
 
-            for ((idx, pred), weight) in zip(dataset.targets().as_targets().iter().enumerate(), data.weights().unwrap().iter()){
+            for ((idx, pred), weight) in zip(
+                dataset.targets().as_targets().iter().enumerate(),
+                data.weights().unwrap().iter(),
+            ) {
                 if predictions[idx] != *pred {
                     error += weight;
                 }
             }
-            
+
             // To avoid 0 errors
             error = error.min(differential);
 
-            let alpha: f32 = ((num_classes-1) as f32).ln() + self.learning_rate() * ((1.0 - error) / error ).ln();
+            let alpha: f32 = ((num_classes - 1) as f32).ln()
+                + self.learning_rate() * ((1.0 - error) / error).ln();
 
             // From sklearn: sample_weight = np.exp(np.log(sample_weight)+ estimator_weight * incorrect * (sample_weight > 0))
-            
+
             // update weights in dataset
             let mut updated_weights: Vec<f32> = Vec::new();
-            for ((idx,pred),  weight) in zip(dataset.targets().as_targets().iter().enumerate(), data.weights().unwrap().iter()){
+            for ((idx, pred), weight) in zip(
+                dataset.targets().as_targets().iter().enumerate(),
+                data.weights().unwrap().iter(),
+            ) {
                 if *weight > 0.0 && predictions[idx] != *pred {
                     let delta = f32::exp(f32::ln(*weight) + alpha);
                     updated_weights.push(delta);
-
                 } else {
-                   updated_weights.push(*weight);
+                    updated_weights.push(*weight);
                 }
             }
-            
+
             // normalize the weights
             let updated_weights = &Array1::from_vec(updated_weights);
-            let normalized_weights = (updated_weights)/(updated_weights.sum());
+            let normalized_weights = (updated_weights) / (updated_weights.sum());
 
             // update the weights in the dataset for new stump
-            data = DatasetBase::new(dataset.records.view().clone(), dataset.targets.as_targets().clone()).with_feature_names(dataset.feature_names().clone()).with_weights(normalized_weights);
+            data = DatasetBase::new(
+                dataset.records.view().clone(),
+                dataset.targets.as_targets().clone(),
+            )
+            .with_feature_names(dataset.feature_names().clone())
+            .with_weights(normalized_weights);
 
             // push the stump with it's weight
             stumps.push(Stump::make_stump(tree, alpha));
-            
         }
-        Ok(Adaboost{
-            stumps,
-        })
-        
+        Ok(Adaboost { stumps })
     }
 }
 
-
-
 #[cfg(test)]
 mod tests {
     use super::*;
 
     use linfa::{error::Result, Dataset};
     use linfa_trees::DecisionTreeParams;
-    use ndarray::{array};
+    use ndarray::array;
 
     use crate::AdaboostParams;
 
@@ -203,12 +205,17 @@ mod tests {
         let targets = array![0, 0, 1];
 
         let dataset = Dataset::new(data.clone(), targets);
-        let model = Adaboost::params().n_estimators(5).d_tree_params(DecisionTreeParams::new().min_weight_leaf(0.00001).min_weight_split(0.00001)).fit(&dataset)?;
+        let model = Adaboost::params()
+            .n_estimators(5)
+            .d_tree_params(
+                DecisionTreeParams::new()
+                    .min_weight_leaf(0.00001)
+                    .min_weight_split(0.00001),
+            )
+            .fit(&dataset)?;
 
         assert_eq!(model.predict(&data), array![0, 0, 1]);
 
         Ok(())
     }
-
-
-}
\ No newline at end of file
+}
diff --git a/algorithms/linfa-ensemble/src/adaboost/hyperparams.rs b/algorithms/linfa-ensemble/src/adaboost/hyperparams.rs
index 351816b8a..064e77557 100644
--- a/algorithms/linfa-ensemble/src/adaboost/hyperparams.rs
+++ b/algorithms/linfa-ensemble/src/adaboost/hyperparams.rs
@@ -7,21 +7,20 @@ use linfa::{
 use serde_crate::{Deserialize, Serialize};
 
 use crate::Adaboost;
-use linfa_trees::{DecisionTreeParams};
+use linfa_trees::DecisionTreeParams;
 #[cfg_attr(
     feature = "serde",
     derive(Serialize, Deserialize),
     serde(crate = "serde_crate")
 )]
 #[derive(Clone, Copy, Debug, PartialEq)]
-pub struct AdaboostValidParams<F,L> {
+pub struct AdaboostValidParams<F, L> {
     n_estimators: usize,
     learning_rate: f32,
     d_tree_params: DecisionTreeParams<F, L>,
 }
 
-impl<F:Float, L:Label> AdaboostValidParams<F,L> {
-
+impl<F: Float, L: Label> AdaboostValidParams<F, L> {
     pub fn learning_rate(&self) -> f32 {
         self.learning_rate
     }
@@ -30,7 +29,7 @@ impl<F:Float, L:Label> AdaboostValidParams<F,L> {
         self.n_estimators
     }
 
-    pub fn d_tree_params(&self) -> DecisionTreeParams<F,L> {
+    pub fn d_tree_params(&self) -> DecisionTreeParams<F, L> {
         self.d_tree_params.clone()
     }
 }
@@ -41,14 +40,16 @@ impl<F:Float, L:Label> AdaboostValidParams<F,L> {
     serde(crate = "serde_crate")
 )]
 #[derive(Clone, Copy, Debug, PartialEq)]
-pub struct AdaboostParams<F,L>(AdaboostValidParams<F,L>);
+pub struct AdaboostParams<F, L>(AdaboostValidParams<F, L>);
 
-impl<F:Float, L:Label>  AdaboostParams<F,L> {
+impl<F: Float, L: Label> AdaboostParams<F, L> {
     pub fn new() -> Self {
-        Self(AdaboostValidParams {   
+        Self(AdaboostValidParams {
             learning_rate: 0.5,
             n_estimators: 50,
-            d_tree_params: DecisionTreeParams::new().min_weight_leaf(0.00001).min_weight_split(0.00001),
+            d_tree_params: DecisionTreeParams::new()
+                .min_weight_leaf(0.00001)
+                .min_weight_split(0.00001),
         })
     }
 
@@ -65,15 +66,13 @@ impl<F:Float, L:Label>  AdaboostParams<F,L> {
     }
 
     /// Sets the params for the weak learner used in Adaboost
-    pub fn d_tree_params(mut self, d_tree_params: DecisionTreeParams<F,L>) -> Self {
+    pub fn d_tree_params(mut self, d_tree_params: DecisionTreeParams<F, L>) -> Self {
         self.0.d_tree_params = d_tree_params;
         self
     }
-
-
 }
 
-impl<F:Float, L:Label> Default for AdaboostParams<F,L> {
+impl<F: Float, L: Label> Default for AdaboostParams<F, L> {
     fn default() -> Self {
         Self::new()
     }
@@ -85,13 +84,13 @@ impl<F: Float, L: Label> Adaboost<F, L> {
     /// * `learning_rate = 0.00001`
     // Violates the convention that new should return a value of type `Self`
     #[allow(clippy::new_ret_no_self)]
-    pub fn params() -> AdaboostParams<F,L> {
+    pub fn params() -> AdaboostParams<F, L> {
         AdaboostParams::new()
     }
 }
 
-impl<F:Float, L:Label> ParamGuard for AdaboostParams<F,L> {
-    type Checked = AdaboostValidParams<F,L>;
+impl<F: Float, L: Label> ParamGuard for AdaboostParams<F, L> {
+    type Checked = AdaboostValidParams<F, L>;
     type Error = Error;
 
     fn check_ref(&self) -> Result<&Self::Checked> {
@@ -100,8 +99,7 @@ impl<F:Float, L:Label> ParamGuard for AdaboostParams<F,L> {
                 "Minimum learning rate should be greater than zero, but was {}",
                 self.0.learning_rate
             )))
-        }
-         else {
+        } else {
             Ok(&self.0)
         }
     }
@@ -110,4 +108,4 @@ impl<F:Float, L:Label> ParamGuard for AdaboostParams<F,L> {
         self.check_ref()?;
         Ok(self.0)
     }
-}
\ No newline at end of file
+}
diff --git a/algorithms/linfa-ensemble/src/adaboost/mod.rs b/algorithms/linfa-ensemble/src/adaboost/mod.rs
index e41dce25d..35c8da056 100644
--- a/algorithms/linfa-ensemble/src/adaboost/mod.rs
+++ b/algorithms/linfa-ensemble/src/adaboost/mod.rs
@@ -2,4 +2,4 @@ mod algorithm;
 mod hyperparams;
 
 pub use algorithm::*;
-pub use hyperparams::*;
\ No newline at end of file
+pub use hyperparams::*;
diff --git a/algorithms/linfa-ensemble/src/gradient_boost/README.md b/algorithms/linfa-ensemble/src/gradient_boost/README.md
new file mode 100644
index 000000000..dc3ec74ec
--- /dev/null
+++ b/algorithms/linfa-ensemble/src/gradient_boost/README.md
@@ -0,0 +1,3 @@
+# Classifier
+- Start with `log(odds)` as the initial leaf (a.k.a base estimator) of prediction using the classes.
+- `log(proba_kth_class)` should be one-vs-all label classification
diff --git a/algorithms/linfa-ensemble/src/gradient_boost/algorithm.rs b/algorithms/linfa-ensemble/src/gradient_boost/algorithm.rs
new file mode 100644
index 000000000..fcd0ebd98
--- /dev/null
+++ b/algorithms/linfa-ensemble/src/gradient_boost/algorithm.rs
@@ -0,0 +1,160 @@
+use ndarray::{Array1, Array2};
+
+use crate::random_forest::DecisionTreeRegressor;
+
+
+/*
+Source of Algorithm implemented is taken from the blog:
+https://lewtun.github.io/hepml/lesson05_gradient-boosting-deep-dive/
+*/
+
+pub struct GBRegressor {
+    trees: Vec<DecisionTreeRegressor>,
+    num_trees: usize,
+    learning_rate: f64,
+    max_features: usize, // Maximum number of features to consider for each split
+    max_depth: usize,
+    min_samples_split: usize,
+    init_train_target_mean: f64,
+}
+
+impl GBRegressor {
+    pub fn new(
+        num_trees: usize,
+        learning_rate: f64,
+        max_features: usize,
+        max_depth: usize,
+        min_samples_split: usize,
+    ) -> Self {
+        GBRegressor {
+            trees: Vec::new(),
+            num_trees,
+            learning_rate,
+            max_features,
+            max_depth,
+            min_samples_split,
+            init_train_target_mean: 0.0,
+        }
+    }
+
+    pub fn fit(&mut self, features: &Array2<f64>, targets: &Array1<f64>) {
+
+        self.init_train_target_mean = targets.mean().unwrap_or(0.0);
+
+        let mut y_pred = vec![self.init_train_target_mean; targets.dim()];
+
+        for idx in 0..self.num_trees {
+            let gradient = self.gradient(&y_pred, targets);
+            // let tree = DecisionTreeRegressor::new(self.max_depth, self.min_samples_split);
+            // tree.fit(&features, &Array1::from_vec(gradient));
+            // self.trees.push(tree);
+
+            let tree = self.create_and_train_tree(features, &Array1::from_vec(gradient));
+            self.trees.push(tree);
+
+            let update: Array1<f64> = self.trees[idx].predict(features);
+            y_pred = self.update_preds(&update, &y_pred);
+        }
+    }
+
+    fn gradient(&self, y_pred: &Vec<f64>, targets: &Array1<f64>) -> Vec<f64> {
+        y_pred
+            .iter()
+            .zip(targets.iter())
+            .map(|(pred, target)| (target - pred))
+            .collect()
+    }
+
+    fn update_preds(&self, update_vals: &Array1<f64>, y_pred: &Vec<f64>) -> Vec<f64> {
+        y_pred
+            .iter()
+            .zip(update_vals.iter())
+            .map(|(pred, val)| pred + (self.learning_rate * val))
+            .collect()
+    }
+
+    fn create_and_train_tree(
+        &mut self,
+        features: &Array2<f64>,
+        targets: &Array1<f64>,
+    ) -> DecisionTreeRegressor {
+        let mut tree = DecisionTreeRegressor::new(self.max_depth, self.min_samples_split);
+        tree.fit(&features, &targets);
+        tree
+    }
+
+    pub fn predict(&self, features: &Array2<f64>) -> Array1<f64> {
+
+        let mut predictions: Vec<Array1<f64>> = Vec::new();
+        for tree in &self.trees {
+            let prediction = tree.predict(features) * self.learning_rate;
+            predictions.push(prediction);
+        }
+
+        let num_samples = features.nrows();
+        let mut final_predictions = Array1::<f64>::zeros(num_samples);
+        final_predictions.fill(self.init_train_target_mean);
+
+        for prediction in predictions {
+            final_predictions += &prediction;
+        }
+        // final_predictions /= self.num_trees as f64;
+        final_predictions
+    }
+}
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use ndarray::{Array1, Array2}; // For floating-point assertions
+
+    fn load_iris_data() -> (Array2<f64>, Array1<f64>) {
+        // Load the dataset
+        let dataset = linfa_datasets::iris();
+
+        // Extract features; assuming all rows and all but the last column if last is target
+        let features = dataset.records().clone();
+
+        // Assuming the target are the labels, we need to convert them or use them as is depending on the use case.
+        // If you need to predict a feature, then split differently as shown in previous messages.
+        // Here we just clone the labels for the demonstration.
+        let targets = dataset.targets().mapv(|x| x as f64);
+
+        (features, targets)
+    }
+
+    #[test]
+    fn test_gradient_boost_with_iris() {
+        let (features, targets) = load_iris_data();
+
+        let mut gb_regressor = GBRegressor::new(50, 0.1, 4, 10, 3);
+        gb_regressor.fit(&features, &targets);
+        let predictions = gb_regressor.predict(&features);
+
+        // Define a tolerance level
+        let tolerance = 0.01; // Tolerance level for correct classification
+        let mut correct = 0;
+        let mut incorrect = 0;
+
+        // Count correct and incorrect predictions
+        for (&actual, &predicted) in targets.iter().zip(predictions.iter()) {
+            if (predicted - actual).abs() < tolerance {
+                correct += 1;
+            } else {
+                incorrect += 1;
+            }
+        }
+
+        println!("Correct predictions: {}", correct);
+        println!("Incorrect predictions: {}", incorrect);
+
+        let rmse = (&predictions - &targets)
+            .mapv(|a| a.powi(2))
+            .mean()
+            .unwrap()
+            .sqrt();
+
+        println!("RMSE: {:?}", rmse);
+    }
+}
diff --git a/algorithms/linfa-ensemble/src/gradient_boost/mod.rs b/algorithms/linfa-ensemble/src/gradient_boost/mod.rs
new file mode 100644
index 000000000..d46b6269f
--- /dev/null
+++ b/algorithms/linfa-ensemble/src/gradient_boost/mod.rs
@@ -0,0 +1,5 @@
+mod algorithm;
+// mod hyperparams;
+
+pub use algorithm::*;
+// pub use hyperparams::*;
diff --git a/algorithms/linfa-ensemble/src/lib.rs b/algorithms/linfa-ensemble/src/lib.rs
index 3364ca14e..de31e5cc8 100644
--- a/algorithms/linfa-ensemble/src/lib.rs
+++ b/algorithms/linfa-ensemble/src/lib.rs
@@ -1,6 +1,11 @@
-mod random_forest;
 mod adaboost;
-pub use random_forest::*;
+mod random_forest;
+// mod random_forest_regressor;
+mod gradient_boost;
+
 pub use adaboost::*;
+pub use random_forest::*;
+// pub use random_forest_regressor::*;
+pub use gradient_boost::*;
 
 pub use linfa::error::Result;
diff --git a/algorithms/linfa-ensemble/src/random_forest/algorithm.rs b/algorithms/linfa-ensemble/src/random_forest/algorithm.rs
index 3f5d80b4e..2f7ef3108 100644
--- a/algorithms/linfa-ensemble/src/random_forest/algorithm.rs
+++ b/algorithms/linfa-ensemble/src/random_forest/algorithm.rs
@@ -3,17 +3,17 @@ use linfa::{
     error::{Error, Result},
     traits::*,
     DatasetBase, ParamGuard,
-  };
-  use ndarray::{Array, Array2, Axis, Dimension};
-  use rand::rngs::ThreadRng;
-  use rand::Rng;
-  use std::{cmp::Eq, collections::HashMap, hash::Hash};
-  
-  pub struct EnsembleLearner<M> {
+};
+use ndarray::{Array, Array2, Axis, Dimension};
+use rand::rngs::ThreadRng;
+use rand::Rng;
+use std::{cmp::Eq, collections::HashMap, hash::Hash};
+
+pub struct EnsembleLearner<M> {
     pub models: Vec<M>,
-  }
-  
-  impl<M> EnsembleLearner<M> {
+}
+
+impl<M> EnsembleLearner<M> {
     // Generates prediction iterator returning predictions from each model
     pub fn generate_predictions<'b, R: Records, T>(
         &'b self,
@@ -24,7 +24,7 @@ use linfa::{
     {
         self.models.iter().map(move |m| m.predict(x))
     }
-  
+
     // Consumes prediction iterator to return all predictions
     pub fn aggregate_predictions<Ys: Iterator>(
         &self,
@@ -43,11 +43,11 @@ use linfa::{
         <Ys::Item as AsTargets>::Elem: Copy + Eq + Hash,
     {
         let mut prediction_maps = Vec::new();
-  
+
         for y in ys {
             let targets = y.as_targets();
             let no_targets = targets.shape()[0];
-  
+
             for i in 0..no_targets {
                 if prediction_maps.len() == i {
                     prediction_maps.push(HashMap::new());
@@ -57,21 +57,21 @@ use linfa::{
                     .or_insert(0) += 1;
             }
         }
-  
+
         prediction_maps.into_iter().map(|xs| {
             let mut xs: Vec<_> = xs.into_iter().collect();
             xs.sort_by(|(_, x), (_, y)| y.cmp(x));
             xs
         })
     }
-  }
-  
-  impl<F: Clone, T, M> PredictInplace<Array2<F>, T> for EnsembleLearner<M>
-  where
+}
+
+impl<F: Clone, T, M> PredictInplace<Array2<F>, T> for EnsembleLearner<M>
+where
     M: PredictInplace<Array2<F>, T>,
     <T as AsTargets>::Elem: Copy + Eq + Hash,
     T: AsTargets + AsTargetsMut<Elem = <T as AsTargets>::Elem>,
-  {
+{
     fn predict_inplace(&self, x: &Array2<F>, y: &mut T) {
         let mut y_array = y.as_targets_mut();
         assert_eq!(
@@ -79,10 +79,10 @@ use linfa::{
             y_array.len_of(Axis(0)),
             "The number of data points must match the number of outputs."
         );
-  
+
         let mut predictions = self.generate_predictions(x);
         let aggregated_predictions = self.aggregate_predictions(&mut predictions);
-  
+
         for (target, output) in y_array
             .axis_iter_mut(Axis(0))
             .zip(aggregated_predictions.into_iter())
@@ -92,30 +92,30 @@ use linfa::{
             }
         }
     }
-  
+
     fn default_target(&self, x: &Array2<F>) -> T {
         self.models[0].default_target(x)
     }
-  }
-  
-  #[derive(Clone, Copy, Debug, PartialEq)]
-  pub struct EnsembleLearnerValidParams<P, R> {
+}
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct EnsembleLearnerValidParams<P, R> {
     pub ensemble_size: usize,
     pub bootstrap_proportion: f64,
     pub model_params: P,
     pub rng: R,
-  }
-  
-  #[derive(Clone, Copy, Debug, PartialEq)]
-  pub struct EnsembleLearnerParams<P, R>(EnsembleLearnerValidParams<P, R>);
-  
-  impl<P> EnsembleLearnerParams<P, ThreadRng> {
+}
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct EnsembleLearnerParams<P, R>(EnsembleLearnerValidParams<P, R>);
+
+impl<P> EnsembleLearnerParams<P, ThreadRng> {
     pub fn new(model_params: P) -> EnsembleLearnerParams<P, ThreadRng> {
         return Self::new_fixed_rng(model_params, rand::thread_rng());
     }
-  }
-  
-  impl<P, R: Rng + Clone> EnsembleLearnerParams<P, R> {
+}
+
+impl<P, R: Rng + Clone> EnsembleLearnerParams<P, R> {
     pub fn new_fixed_rng(model_params: P, rng: R) -> EnsembleLearnerParams<P, R> {
         Self(EnsembleLearnerValidParams {
             ensemble_size: 1,
@@ -124,22 +124,22 @@ use linfa::{
             rng: rng,
         })
     }
-  
+
     pub fn ensemble_size(mut self, size: usize) -> Self {
         self.0.ensemble_size = size;
         self
     }
-  
+
     pub fn bootstrap_proportion(mut self, proportion: f64) -> Self {
         self.0.bootstrap_proportion = proportion;
         self
     }
-  }
-  
-  impl<P, R> ParamGuard for EnsembleLearnerParams<P, R> {
+}
+
+impl<P, R> ParamGuard for EnsembleLearnerParams<P, R> {
     type Checked = EnsembleLearnerValidParams<P, R>;
     type Error = Error;
-  
+
     fn check_ref(&self) -> Result<&Self::Checked> {
         if self.0.bootstrap_proportion > 1.0 || self.0.bootstrap_proportion <= 0.0 {
             Err(Error::Parameters(format!(
@@ -155,45 +155,45 @@ use linfa::{
             Ok(&self.0)
         }
     }
-  
+
     fn check(self) -> Result<Self::Checked> {
         self.check_ref()?;
         Ok(self.0)
     }
-  }
-  
-  impl<D, T, P: Fit<Array2<D>, T::Owned, Error>, R: Rng + Clone> Fit<Array2<D>, T, Error>
+}
+
+impl<D, T, P: Fit<Array2<D>, T::Owned, Error>, R: Rng + Clone> Fit<Array2<D>, T, Error>
     for EnsembleLearnerValidParams<P, R>
-  where
+where
     D: Clone,
     T: FromTargetArrayOwned,
     T::Elem: Copy + Eq + Hash,
     T::Owned: AsTargets,
-  {
+{
     type Object = EnsembleLearner<P::Object>;
-  
+
     fn fit(
         &self,
         dataset: &DatasetBase<Array2<D>, T>,
     ) -> core::result::Result<Self::Object, Error> {
         let mut models = Vec::new();
         let mut rng = self.rng.clone();
-  
+
         let dataset_size =
             ((dataset.records.nrows() as f64) * self.bootstrap_proportion).ceil() as usize;
-  
+
         let iter = dataset.bootstrap_samples(dataset_size, &mut rng);
-      //   let mut count = 0;
+        //   let mut count = 0;
         for train in iter {
-          //   count += 1;
+            //   count += 1;
             let model = self.model_params.fit(&train).unwrap();
             models.push(model);
-  
+
             if models.len() == self.ensemble_size {
                 break;
             }
         }
-  
+
         Ok(EnsembleLearner { models })
     }
-  }
\ No newline at end of file
+}
diff --git a/algorithms/linfa-ensemble/src/random_forest/mod.rs b/algorithms/linfa-ensemble/src/random_forest/mod.rs
index e9f7752a0..1eb472a96 100644
--- a/algorithms/linfa-ensemble/src/random_forest/mod.rs
+++ b/algorithms/linfa-ensemble/src/random_forest/mod.rs
@@ -1,3 +1,5 @@
 mod algorithm;
+mod random_forest_regressor;
 
-pub use algorithm::*;
\ No newline at end of file
+pub use algorithm::*;
+pub use random_forest_regressor::*;
diff --git a/algorithms/linfa-ensemble/src/random_forest/random_forest_regressor.rs b/algorithms/linfa-ensemble/src/random_forest/random_forest_regressor.rs
new file mode 100644
index 000000000..7cfa7d8db
--- /dev/null
+++ b/algorithms/linfa-ensemble/src/random_forest/random_forest_regressor.rs
@@ -0,0 +1,325 @@
+use linfa::prelude::*;
+use linfa_datasets::iris;
+use ndarray::{Array1, Array2, Axis};
+use rand::rngs::ThreadRng;
+use rand::seq::SliceRandom;
+use rand::Rng;
+
+pub struct DecisionTreeRegressor {
+    max_depth: usize,
+    min_samples_split: usize,
+    tree: Option<TreeNode>,
+}
+
+#[derive(Debug)]
+pub struct TreeNode {
+    feature: usize,
+    value: f64,
+    left: Box<Option<TreeNode>>,
+    right: Box<Option<TreeNode>>,
+    output: Option<f64>,
+}
+
+impl DecisionTreeRegressor {
+    pub fn new(max_depth: usize, min_samples_split: usize) -> Self {
+        DecisionTreeRegressor {
+            max_depth,
+            min_samples_split,
+            tree: None,
+        }
+    }
+
+    pub fn fit(&mut self, features: &Array2<f64>, targets: &Array1<f64>) {
+        self.tree = Some(self.build_tree(features, targets, 0));
+    }
+
+    fn build_tree(&self, features: &Array2<f64>, targets: &Array1<f64>, depth: usize) -> TreeNode {
+        if depth >= self.max_depth || features.nrows() < self.min_samples_split {
+            return TreeNode {
+                feature: 0,
+                value: 0.0,
+                left: Box::new(None),
+                right: Box::new(None),
+                output: Some(Self::mean(targets)),
+            };
+        }
+
+        let (best_feature, best_value) = self.best_split(features, targets);
+        let (left_idxs, right_idxs) = Self::split_dataset(features, best_feature, best_value);
+
+        let left_tree = if !left_idxs.is_empty() {
+            let left_features = features.select(Axis(0), &left_idxs);
+            let left_targets = targets.select(Axis(0), &left_idxs);
+            Some(self.build_tree(&left_features, &left_targets, depth + 1))
+        } else {
+            None
+        };
+
+        let right_tree = if !right_idxs.is_empty() {
+            let right_features = features.select(Axis(0), &right_idxs);
+            let right_targets = targets.select(Axis(0), &right_idxs);
+            Some(self.build_tree(&right_features, &right_targets, depth + 1))
+        } else {
+            None
+        };
+
+        TreeNode {
+            feature: best_feature,
+            value: best_value,
+            left: Box::new(left_tree),
+            right: Box::new(right_tree),
+            output: None, // Output is now only assigned at leaves
+        }
+    }
+
+    fn best_split(&self, features: &Array2<f64>, targets: &Array1<f64>) -> (usize, f64) {
+        let mut best_feature = 0;
+        let mut best_value = 0.0;
+        let mut best_mse = f64::INFINITY;
+
+        for feature_idx in 0..features.ncols() {
+            let mut possible_values = features.column(feature_idx).to_vec();
+            possible_values.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
+            possible_values.dedup();
+
+            for &value in possible_values.iter() {
+                let (left_idxs, right_idxs) = Self::split_dataset(features, feature_idx, value);
+                if !left_idxs.is_empty() && !right_idxs.is_empty() {
+                    let left_targets = targets.select(Axis(0), &left_idxs);
+                    let right_targets = targets.select(Axis(0), &right_idxs);
+                    let mse = Self::calculate_mse(&left_targets, &right_targets);
+                    println!("Feature: {}, Value: {}, MSE: {}", feature_idx, value, mse); // Debug statement
+                    if mse < best_mse {
+                        best_mse = mse;
+                        best_feature = feature_idx;
+                        best_value = value;
+                    }
+                }
+            }
+        }
+
+        (best_feature, best_value)
+    }
+
+    pub fn predict(&self, features: &Array2<f64>) -> Array1<f64> {
+        let mut predictions = Array1::<f64>::zeros(features.nrows());
+        for (i, feature_row) in features.axis_iter(Axis(0)).enumerate() {
+            let mut node = &self.tree;
+            while let Some(ref n) = *node {
+                if let Some(output) = n.output {
+                    predictions[i] = output;
+                    break; // Break if a leaf node with output is reached.
+                }
+                node = if feature_row[n.feature] <= n.value {
+                    &n.left
+                } else {
+                    &n.right
+                };
+            }
+        }
+        predictions
+    }
+
+    fn split_dataset(
+        features: &Array2<f64>,
+        feature_idx: usize,
+        value: f64,
+    ) -> (Vec<usize>, Vec<usize>) {
+        let mut left_idxs = Vec::new();
+        let mut right_idxs = Vec::new();
+
+        for (idx, feature_row) in features.axis_iter(Axis(0)).enumerate() {
+            if feature_row[feature_idx] <= value {
+                left_idxs.push(idx);
+            } else {
+                right_idxs.push(idx);
+            }
+        }
+
+        (left_idxs, right_idxs)
+    }
+
+    fn calculate_mse(left_targets: &Array1<f64>, right_targets: &Array1<f64>) -> f64 {
+        let left_mean = Self::mean(left_targets);
+        let right_mean = Self::mean(right_targets);
+        let left_mse: f64 = left_targets.iter().map(|&x| (x - left_mean).powi(2)).sum();
+        let right_mse: f64 = right_targets
+            .iter()
+            .map(|&x| (x - right_mean).powi(2))
+            .sum();
+        (left_mse + right_mse) / (left_targets.len() + right_targets.len()) as f64
+    }
+
+    fn mean(values: &Array1<f64>) -> f64 {
+        values.mean().unwrap_or(0.0)
+    }
+}
+
+// impl DecisionTreeRegressor {
+//     pub fn predict(&self, features: &Array2<f64>) -> Array1<f64> {
+//         let mut predictions = Array1::<f64>::zeros(features.nrows());
+//         for (i, feature_row) in features.axis_iter(Axis(0)).enumerate() {
+//             let mut node = &self.tree;
+//             while let Some(ref n) = *node {
+//                 if feature_row[n.feature] <= n.value {
+//                     node = &n.left;
+//                 } else {
+//                     node = &n.right;
+//                 }
+//             }
+//             predictions[i] = if let Some(ref n) = *node { n.output } else { 0.0 };
+//         }
+//         predictions
+//     }
+// }
+
+pub struct RandomForestRegressor {
+    trees: Vec<DecisionTreeRegressor>,
+    num_trees: usize,
+    max_features: usize, // Maximum number of features to consider for each split
+    max_depth: usize,
+    min_samples_split: usize,
+    rng: ThreadRng,
+}
+
+impl RandomForestRegressor {
+    pub fn new(
+        num_trees: usize,
+        max_features: usize,
+        max_depth: usize,
+        min_samples_split: usize,
+    ) -> Self {
+        let rng = rand::thread_rng();
+        RandomForestRegressor {
+            trees: Vec::with_capacity(num_trees),
+            num_trees,
+            max_features,
+            max_depth,
+            min_samples_split,
+            rng,
+        }
+    }
+
+    pub fn fit(&mut self, features: &Array2<f64>, targets: &Array1<f64>) {
+        for _ in 0..self.num_trees {
+            let tree = self.create_and_train_tree(features, targets);
+            self.trees.push(tree);
+        }
+    }
+
+    fn create_and_train_tree(
+        &mut self,
+        features: &Array2<f64>,
+        targets: &Array1<f64>,
+    ) -> DecisionTreeRegressor {
+        let num_samples = features.nrows();
+        let indices: Vec<usize> = (0..num_samples).collect();
+        let sampled_indices = indices
+            .as_slice()
+            .choose_multiple(&mut self.rng, num_samples)
+            .cloned()
+            .collect::<Vec<usize>>();
+
+        let sampled_features = features.select(Axis(0), &sampled_indices);
+        let sampled_targets = targets.select(Axis(0), &sampled_indices);
+
+        let mut tree = DecisionTreeRegressor::new(self.max_depth, self.min_samples_split);
+        tree.fit(&sampled_features, &sampled_targets);
+        tree
+    }
+
+    pub fn predict(&self, features: &Array2<f64>) -> Array1<f64> {
+        let mut predictions: Vec<Array1<f64>> = Vec::new();
+        for tree in &self.trees {
+            predictions.push(tree.predict(features));
+        }
+
+        let num_samples = features.nrows();
+        let mut final_predictions = Array1::<f64>::zeros(num_samples);
+        for prediction in predictions {
+            final_predictions += &prediction;
+        }
+        final_predictions /= self.num_trees as f64;
+        final_predictions
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use approx::assert_relative_eq;
+    use ndarray::{array, Array1, Array2}; // For floating-point assertions
+
+    #[test]
+    fn test_decision_tree_regressor() {
+        let features = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 4.0, 5.0]).unwrap();
+        let targets = Array1::from_vec(vec![1.1, 1.9, 3.9, 5.1]);
+        let mut regressor = DecisionTreeRegressor::new(3, 1);
+
+        if features.nrows() != targets.len() {
+            panic!("Feature and target count mismatch");
+        }
+
+        regressor.fit(&features, &targets);
+        let predictions = regressor.predict(&features);
+        let rmse = calculate_rmse(&targets, &predictions);
+
+        println!("RMSE: {:?}", rmse);
+        assert_relative_eq!(rmse, 0.0, epsilon = 0.1);
+    }
+
+    fn calculate_rmse(actual: &Array1<f64>, predicted: &Array1<f64>) -> f64 {
+        let errors = actual - predicted;
+        let mse = errors.mapv(|e| e.powi(2)).mean().unwrap();
+        mse.sqrt()
+    }
+
+    fn load_iris_data() -> (Array2<f64>, Array1<f64>) {
+        // Load the dataset
+        let dataset = iris();
+
+        // Extract features; assuming all rows and all but the last column if last is target
+        let features = dataset.records().clone();
+
+        // Assuming the target are the labels, we need to convert them or use them as is depending on the use case.
+        // If you need to predict a feature, then split differently as shown in previous messages.
+        // Here we just clone the labels for the demonstration.
+        let targets = dataset.targets().mapv(|x| x as f64);
+
+        (features, targets)
+    }
+
+    #[test]
+    fn test_random_forest_with_iris() {
+        let (features, targets) = load_iris_data();
+
+        let mut forest = RandomForestRegressor::new(100, 4, 10, 3);
+        forest.fit(&features, &targets);
+        let predictions = forest.predict(&features);
+
+        // Define a tolerance level
+        let tolerance = 0.1; // Tolerance level for correct classification
+        let mut correct = 0;
+        let mut incorrect = 0;
+
+        // Count correct and incorrect predictions
+        for (&actual, &predicted) in targets.iter().zip(predictions.iter()) {
+            if (predicted - actual).abs() < tolerance {
+                correct += 1;
+            } else {
+                incorrect += 1;
+            }
+        }
+
+        println!("Correct predictions: {}", correct);
+        println!("Incorrect predictions: {}", incorrect);
+
+        let rmse = (&predictions - &targets)
+            .mapv(|a| a.powi(2))
+            .mean()
+            .unwrap()
+            .sqrt();
+
+        println!("RMSE: {:?}", rmse);
+    }
+}
diff --git a/algorithms/linfa-ensemble/src/xgboost/README.md b/algorithms/linfa-ensemble/src/xgboost/README.md
new file mode 100644
index 000000000..5e7cd5f42
--- /dev/null
+++ b/algorithms/linfa-ensemble/src/xgboost/README.md
@@ -0,0 +1,27 @@
+# XGBoost Tree Regression Based on [Video](https://www.youtube.com/watch?v=OtD8wVaFm6E&ab_channel=StatQuestwithJoshStarmer)
+
+- XGBoost Trees (different from our regular trees)
+- Most common Regression for XGBoost:
+  ### First Epoch
+  - Initial prediction is made (by default 0.5)
+  - Each tree starts as single leaf
+  - Similarity score of residuals: (sum of residuals)^2 / (no. of residuals + lambda)
+  - lambda: regularization param (starts with 0)
+  ### Subsequent Epochs
+  - Take two datapoints (start with extreme label vals and move towards other end), find the avg
+  - Split the data based on the average of label vals and find simliarity of left, and right
+  - Find Gain using formula: Left_similarity + Right_similarity - Root_similarity
+  - The avg of the two datapoints which results in the largest gain will be used as the split.
+  - We continue to do this split for multiple levels deep (default max depth is 6).
+    #### Tree Pruning
+    - We keep a gamma value and on each branch of the (starting from the bottom most) tree, we compare the gain with gamma. If (gain - gamma < 0) on a branch, prune that branch.
+    > Side NOTE: 
+    >- A lambda (regularization) helps removing the outliers in the data, preventing overfitting. It decreases the gain value on each branch.
+    >- Regularization also helps in decreasing the similarity score. And the amount of decrease in scores is inversely proportional to the number of residuals in the node.
+    >- In the video example, if lambda is 1 and using a gamma even of value 0 leads to pruning of the tree branch since the gain becomes negative. That means when lambda
+  - Output value for the leaf nodes is calculated using formula (`sum of all residuals / (number of residuals + lambda)`).
+  - Learning rate (default 0.3) is applied to each leaf node and values are updated from the predicted value (`leaf_residual = prediced_val + eta x leaf_output`).
+
+# XGBoost Tree Classification
+- Start with a prediction probability of 0.5.
+- In each epoch, 
\ No newline at end of file
diff --git a/datasets/data/iris.csv.gz b/datasets/data/iris.csv.gz
index 700e93baf..97c2de270 100644
Binary files a/datasets/data/iris.csv.gz and b/datasets/data/iris.csv.gz differ
diff --git a/datasets/data/winequality-red.csv.gz b/datasets/data/winequality-red.csv.gz
index 7fc89bd71..32ca7d23d 100644
Binary files a/datasets/data/winequality-red.csv.gz and b/datasets/data/winequality-red.csv.gz differ
diff --git a/decision_tree_example.tex b/decision_tree_example.tex
new file mode 100644
index 000000000..be96ddd8e
--- /dev/null
+++ b/decision_tree_example.tex
@@ -0,0 +1,53 @@
+
+\documentclass[margin=10pt]{standalone}
+\usepackage{tikz,forest}
+\usetikzlibrary{arrows.meta}
+\forestset{
+default preamble={
+before typesetting nodes={
+  !r.replace by={[, coordinate, append]}
+},  
+where n children=0{
+  tier=word,
+}{  
+  %diamond, aspect=2,
+},  
+where level=0{}{
+  if n=1{
+    edge label={node[pos=.2, above] {Y}},
+  }{  
+    edge label={node[pos=.2, above] {N}},
+  }   
+},  
+for tree={
+  edge+={thick, -Latex},
+  s sep'+=2cm,
+  draw,
+  thick,
+  edge path'={ (!u) -| (.parent)},
+  align=center,
+}   
+}
+}\begin{document}\begin{forest}[Val($2$) $ \leq 2.45$ \\ Imp. $0.33$
+	[Label: 0]
+	[Val($2$) $ \leq 4.85$ \\ Imp. $0.41$
+		[Val($3$) $ \leq 1.65$ \\ Imp. $0.02$
+			[Label: 1]
+			[Val($0$) $ \leq 5.95$ \\ Imp. $0.50$
+				[Label: 1]
+				[Label: 2]]]
+		[Val($3$) $ \leq 1.75$ \\ Imp. $0.06$
+			[Val($2$) $ \leq 5.05$ \\ Imp. $0.25$
+				[Val($1$) $ \leq 2.55$ \\ Imp. $0.12$
+					[Val($2$) $ \leq 4.95$ \\ Imp. $0.50$
+						[Label: 1]
+						[Label: 2]]
+					[Label: 1]]
+				[Label: 2]]
+			[Label: 2]]]]
+\node [anchor=north west] at (current bounding box.north east) {%
+                \begin{tabular}{c c c}
+                  \multicolumn{3}{@{}l@{}}{Legend:}\\
+                  Imp.&:&Impurity decrease\\Var(2)&:&feature-2\\Var(3)&:&feature-3\\Var(1)&:&feature-1\\Var(0)&:&feature-0\\\end{tabular}};
+	\end{forest}
+\end{document}
\ No newline at end of file
diff --git a/src/dataset/impl_dataset.rs b/src/dataset/impl_dataset.rs
index 837c47eff..bf5e04738 100644
--- a/src/dataset/impl_dataset.rs
+++ b/src/dataset/impl_dataset.rs
@@ -498,7 +498,7 @@ where
                 .map(|_| rng.gen_range(0..self.nsamples()))
                 .collect::<Vec<_>>();
 
-                let records = self.records().select(Axis(0), &indices);
+            let records = self.records().select(Axis(0), &indices);
             let targets = T::new_targets(self.as_targets().select(Axis(0), &indices));
 
             DatasetBase::new(records, targets)
diff --git a/src/dataset/impl_targets.rs b/src/dataset/impl_targets.rs
index 8952ab56c..09b734ebd 100644
--- a/src/dataset/impl_targets.rs
+++ b/src/dataset/impl_targets.rs
@@ -2,8 +2,8 @@ use std::collections::HashMap;
 
 use super::{
     AsMultiTargets, AsMultiTargetsMut, AsProbabilities, AsSingleTargets, AsSingleTargetsMut,
-    AsTargets, AsTargetsMut, CountedTargets, DatasetBase, FromTargetArray, FromTargetArrayOwned, Label, Labels, Pr,
-    TargetDim,
+    AsTargets, AsTargetsMut, CountedTargets, DatasetBase, FromTargetArray, FromTargetArrayOwned,
+    Label, Labels, Pr, TargetDim,
 };
 use ndarray::{
     Array, Array1, Array2, ArrayBase, ArrayView, ArrayViewMut, Axis, CowArray, Data, DataMut,
@@ -43,7 +43,6 @@ impl<'a, L: Clone + 'a, S: Data<Elem = L>, I: TargetDim> FromTargetArray<'a> for
     }
 }
 
-
 impl<L, S: DataMut<Elem = L>, I: TargetDim> AsTargetsMut for ArrayBase<S, I> {
     type Elem = L;
     type Ix = I;
@@ -99,7 +98,6 @@ where
     }
 }
 
-
 impl<'a, L: Label + 'a, T> FromTargetArray<'a> for CountedTargets<L, T>
 where
     T: FromTargetArray<'a, Elem = L>,
@@ -243,4 +241,4 @@ where
             feature_names: self.feature_names.clone(),
         }
     }
-}
\ No newline at end of file
+}
diff --git a/src/dataset/mod.rs b/src/dataset/mod.rs
index 2164cdf8a..9db22a632 100644
--- a/src/dataset/mod.rs
+++ b/src/dataset/mod.rs
@@ -998,4 +998,4 @@ mod tests {
         let prob = -0.5;
         assert_abs_diff_eq!(Pr::new_unchecked(prob).0, prob);
     }
-}
\ No newline at end of file
+}