Skip to content

Commit

Permalink
Merge pull request #4 from Sin317/km1139
Browse files Browse the repository at this point in the history
Python API calling introduced with Pyo3 and TIKZ for Adaboost and random forest classifier parallelization
  • Loading branch information
Sin317 authored May 6, 2024
2 parents 51fb2f7 + f24d601 commit bb5c2ab
Show file tree
Hide file tree
Showing 18 changed files with 574 additions and 60 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Cargo.lock
**/*.rs.bk
# vscode
.vscode

.DS_Store
# ctags
tags
*.npy
Expand Down
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ thiserror = "1.0"

criterion = { version = "0.4.0", optional = true }

rayon = "1.10.0"
[dependencies.serde_crate]
package = "serde"
optional = true
Expand All @@ -62,6 +63,7 @@ linfa-datasets = { path = "datasets", features = [
"iris",
"diabetes",
"generate",
"mnist",
] }
statrs = "0.16.0"

Expand Down
72 changes: 72 additions & 0 deletions algorithms/linfa-ensemble/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/target

# Byte-compiled / optimized / DLL files
__pycache__/
.pytest_cache/
*.py[cod]

# C extensions
*.so

# Distribution / packaging
.Python
.venv/
env/
bin/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
include/
man/
venv/
*.egg-info/
.installed.cfg
*.egg

# Installer logs
pip-log.txt
pip-delete-this-directory.txt
pip-selfcheck.json

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.cache
nosetests.xml
coverage.xml

# Translations
*.mo

# Mr Developer
.mr.developer.cfg
.project
.pydevproject

# Rope
.ropeproject

# Django stuff:
*.log
*.pot

.DS_Store

# Sphinx documentation
docs/_build/

# PyCharm
.idea/

# VSCode
.vscode/

# Pyenv
.python-version
20 changes: 15 additions & 5 deletions algorithms/linfa-ensemble/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ description = "A general method for creating ensemble classifiers"
license = "MIT/Apache-2.0"

repository = "https://github.com/rust-ml/linfa"
readme = "README.md"

keywords = ["machine-learning", "linfa", "ensemble"]
categories = ["algorithms", "mathematics", "science"]

# "cdylib" is necessary to produce a shared library for Python to import from.
crate-type = ["cdylib"]

[features]
default = []
serde = ["serde_crate", "ndarray/serde"]
Expand All @@ -26,14 +28,22 @@ features = ["std", "derive"]
[dependencies]
linfa = { version = "0.7.0", path = "../.." }
linfa-trees = { version = "0.7.0", path = "../linfa-trees"}
linfa-datasets = { version = "0.7.0", path = "../../datasets/", features = ["iris"] }
linfa-datasets = { version = "0.7.0", path = "../../datasets/", features = ["iris", "mnist"] }
ndarray = { version = "0.15" , features = ["rayon", "approx"]}
ndarray-rand = "0.14"
rand = { version = "0.8", features = ["small_rng"] }
pyo3 = { version = "0.21.2", features = ["extension-module"] }
rayon = {version = "1.10.0"}
approx = {version = "0.5"}

[dev-dependencies]
rand = { version = "0.8", features = ["small_rng"] }
linfa-datasets = { version = "0.7.0", path = "../../datasets/", features = ["iris"] }
ndarray = { version = "0.15" , features = ["rayon", "approx"]}
approx = {version = "0.5"}
linfa-datasets = { version = "0.7.0", path = "../../datasets/", features = ["iris", "mnist"] }
rayon = {version = "1.10.0"}
approx = {version = "0.5"}

[lib]
# The name of the native library. This is the name which will be used in Python to import the
# library (i.e. `import string_sum`). If you change this, you must also change the name of the
# `#[pymodule]` in `src/lib.rs`.
name = "linfa_ensemble"
18 changes: 14 additions & 4 deletions algorithms/linfa-ensemble/examples/adaboost.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use std::fs::File;
use std::io::Write;
use std::{fs::File, io::Write};

use linfa_trees::DecisionTreeParams;
use ndarray_rand::rand::SeedableRng;
Expand All @@ -15,7 +14,8 @@ fn main() -> Result<()> {
.shuffle(&mut rng)
.split_with_ratio(0.8);

println!("Training model with Adaboost ...");
println!("IRIS DATA: Training model with Adaboost ...");

let ada_model = Adaboost::<f64, usize>::params()
.n_estimators(10)
.d_tree_params(
Expand All @@ -32,9 +32,19 @@ fn main() -> Result<()> {
println!("{:?}", cm);

println!(
"Test accuracy with Adaboost : {:.2}%",
"IRIS DATA: Test accuracy with Adaboost : {:.2}%",
100.0 * cm.accuracy()
);

let mut tikz = File::create("adaboost_example.tex").unwrap();
tikz.write_all(
ada_model
.export_to_tikz()
.with_legend()
.to_string()
.as_bytes(),
)
.unwrap();

Ok(())
}
16 changes: 9 additions & 7 deletions algorithms/linfa-ensemble/examples/random_forest.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
//! Random Forest
use linfa::prelude::{Predict, ToConfusionMatrix};
use linfa::traits::Fit;
use linfa::prelude::*;
use linfa_ensemble::EnsembleLearnerParams;
use linfa_trees::DecisionTree;
use ndarray_rand::rand::SeedableRng;
Expand All @@ -14,18 +13,21 @@ fn main() {

//Load dataset
let mut rng = SmallRng::seed_from_u64(42);
let (train, test) = linfa_datasets::iris()
.shuffle(&mut rng)
.split_with_ratio(0.7);

let (train, test) = linfa_datasets::mnist();

train.shuffle(&mut rng);
test.shuffle(&mut rng);

//Train ensemble learner model
let model = EnsembleLearnerParams::new(DecisionTree::params())
let model = EnsembleLearnerParams::new(DecisionTree::<f64, usize>::params())
.ensemble_size(ensemble_size)
.bootstrap_proportion(bootstrap_proportion)
.fit(&train)
.unwrap();
// println!("Done with Fit");
// //Return highest ranking predictions

//Return highest ranking predictions
let final_predictions_ensemble = model.predict(&test);
println!("Final Predictions: \n{:?}", final_predictions_ensemble);

Expand Down
15 changes: 15 additions & 0 deletions algorithms/linfa-ensemble/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[build-system]
requires = ["maturin>=1.5,<2.0"]
build-backend = "maturin"

[project]
name = "linfa-ensemble"
requires-python = ">=3.8"
classifiers = [
"Programming Language :: Rust",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dynamic = ["version"]
[tool.maturin]
features = ["pyo3/extension-module"]
50 changes: 43 additions & 7 deletions algorithms/linfa-ensemble/src/adaboost/algorithm.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
use std::{collections::HashMap, iter::zip};

use linfa::{dataset::Labels, error::Error, error::Result, traits::*, DatasetBase, Float, Label};
use linfa_trees::DecisionTree;

use super::AdaboostValidParams;
use super::Tikz;
use linfa::dataset::AsSingleTargets;
use linfa::{dataset::Labels, error::Error, error::Result, traits::*, DatasetBase, Float, Label};
use linfa_trees::DecisionTree;
use ndarray::{Array1, ArrayBase, Data, Ix2};
#[cfg(feature = "serde")]
use serde_crate::{Deserialize, Serialize};
use std::{collections::HashMap, iter::zip};
// adaboost will be a vector of stumps

// stump will contain a decision tree and a weight associated with that stump
Expand All @@ -25,6 +22,10 @@ pub struct Stump<F: Float, L: Label> {
}

impl<F: Float, L: Label + std::fmt::Debug> Stump<F, L> {
pub fn tree(&self) -> &DecisionTree<F, L> {
&self.tree
}

fn make_stump(tree: DecisionTree<F, L>, weight: f32) -> Self {
Stump { tree, weight }
}
Expand Down Expand Up @@ -84,6 +85,15 @@ impl<F: Float, L: Label + Default, D: Data<Elem = F>> PredictInplace<ArrayBase<D
}
}

impl<F: Float, L: Label> Adaboost<F, L> {
pub fn stumps(&self) -> &Vec<Stump<F, L>> {
&self.stumps
}
pub fn export_to_tikz(&self) -> Tikz<'_, F, L> {
Tikz::new(&self)
}
}

impl<'a, F: Float, L: Label + 'a + std::fmt::Debug, D, T> Fit<ArrayBase<D, Ix2>, T, Error>
for AdaboostValidParams<F, L>
where
Expand Down Expand Up @@ -218,4 +228,30 @@ mod tests {

Ok(())
}

#[test]
fn mnist_test() {
use ndarray_rand::rand::SeedableRng;
use rand::rngs::SmallRng;
// mnist
let mut rng = SmallRng::seed_from_u64(42);

let (train, test) = linfa_datasets::mnist();
train.shuffle(&mut rng);
test.shuffle(&mut rng);

println!("MNIST DATA: Training model with Adaboost ...");
let ada_model = Adaboost::<f64, usize>::params()
.n_estimators(2)
.d_tree_params(
DecisionTreeParams::new()
.max_depth(Some(25))
.min_weight_leaf(0.00001)
.min_weight_split(0.00001),
)
.fit(&train)
.unwrap();

let _ada_pred_y = ada_model.predict(&test);
}
}
3 changes: 2 additions & 1 deletion algorithms/linfa-ensemble/src/adaboost/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
mod algorithm;
mod hyperparams;

mod tikz;
pub use algorithm::*;
pub use hyperparams::*;
pub use tikz::*;
Loading

0 comments on commit bb5c2ab

Please sign in to comment.