From e1d004448afd86f4ffa2ed4b87629e6798ef41b2 Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Wed, 5 Jun 2024 12:53:24 +0200 Subject: [PATCH 1/4] feat(import/export): fully functional file dumping For both tables and classifier models. --- chemfusekit/__base.py | 45 ++++++++++++++++++- chemfusekit/knn.py | 12 +++++ chemfusekit/lda.py | 10 +++++ chemfusekit/lldf.py | 9 +--- chemfusekit/lr.py | 9 ++++ chemfusekit/plsda.py | 8 ++++ chemfusekit/svm.py | 9 ++++ tests/test_base.py | 100 ++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 193 insertions(+), 9 deletions(-) create mode 100644 tests/test_base.py diff --git a/chemfusekit/__base.py b/chemfusekit/__base.py index 58309f8..db5212a 100644 --- a/chemfusekit/__base.py +++ b/chemfusekit/__base.py @@ -3,19 +3,56 @@ import pandas as pd import numpy as np import joblib + +from sklearn.base import BaseEstimator + from .__utils import GraphMode class BaseDataModel: '''Models the output data from data-outputting operations''' + def __init__(self, x_data: pd.DataFrame, x_train: pd.DataFrame, y: np.ndarray): self.x_data = x_data self.x_train = x_train self.y = y + @classmethod + def load_from_file(cls, import_path: str, sheet_name: str = 'Sheet1'): + try: + table_data = pd.read_excel( + import_path, + sheet_name=sheet_name, + index_col=0, + header=0 + ) + except Exception as exc: + raise FileNotFoundError("Error opening the selected files.") from exc + + x = table_data.iloc[:, 1:] + + # It is necessary to convert the column names as string to select them + x.columns = x.columns.astype(str) # to make the colnames as text + + y = table_data.loc[:, 'Substance'].values + y_dataframe = pd.DataFrame(y, columns=['Substance']) + x_train = pd.concat( + [y_dataframe, x], + axis=1 + ) + + return cls(x, x_train, y) + + def export_to_file(self, export_path: str, sheet_name: str = 'Sheet1'): + try: + self.x_train.to_excel(excel_writer=export_path, sheet_name=sheet_name) + except Exception as exc: + raise RuntimeError("Could not export data to the selected path.") from exc + class BaseSettings: '''Holds the settings for the BaseClassifier object.''' + def __init__(self, output: GraphMode = GraphMode.NONE, test_split: bool = False): if test_split is True and output is GraphMode.NONE: raise Warning( @@ -27,13 +64,17 @@ def __init__(self, output: GraphMode = GraphMode.NONE, test_split: bool = False) class BaseClassifier: '''Parent class for all classifiers, containing basic shared utilities.''' + def __init__(self, settings: BaseSettings, data: BaseDataModel): self.settings = settings self.data = data - self.model = None + self.model: BaseEstimator | None = None def import_model(self, import_path: str): - joblib.load(self.model, import_path) + model = joblib.load(import_path) + if not isinstance(model, BaseEstimator): + raise ImportError("The file you tried importing is not a sklearn model!") + self.model = model def export_model(self, export_path: str): if self.model is not None: diff --git a/chemfusekit/knn.py b/chemfusekit/knn.py index 6c7eb16..e1a08b5 100644 --- a/chemfusekit/knn.py +++ b/chemfusekit/knn.py @@ -1,4 +1,5 @@ '''k-Nearest Neighbors Analysis module''' +from copy import copy from typing import Optional from beartype.typing import Callable @@ -79,3 +80,14 @@ def knn(self): algorithm=self.settings.algorithm ) run_split_test(self.data.x_data, self.data.y, knn_split) + + def import_model(self, import_path: str): + model_backup = copy(self.model) + super().import_model(import_path) + if not isinstance(self.model, KNeighborsClassifier): + self.model = model_backup + raise ImportError("The file you tried to import is not a KNeighborsClassifier.") + self.settings.n_neighbors = self.model.n_neighbors + self.settings.metric = self.model.metric + self.settings.weights = self.model.weights + self.settings.algorithm = self.model.algorithm diff --git a/chemfusekit/lda.py b/chemfusekit/lda.py index fe9ea77..3e12485 100644 --- a/chemfusekit/lda.py +++ b/chemfusekit/lda.py @@ -1,6 +1,8 @@ '''Linear Discriminant Analysis module''' +from copy import copy from typing import Optional +import joblib import numpy as np import pandas as pd @@ -108,3 +110,11 @@ def lda(self): LD(n_components=self.settings.components), mode=self.settings.output ) + + def import_model(self, import_path: str): + model_backup = copy(self.model) + super().import_model(import_path) + if not isinstance(self.model, LD): + self.model = model_backup + raise ImportError("The file you tried to import is not a LinearDiscriminantAnalysis classifier.") + self.settings.components = self.model.n_components diff --git a/chemfusekit/lldf.py b/chemfusekit/lldf.py index e80b525..4b45b08 100644 --- a/chemfusekit/lldf.py +++ b/chemfusekit/lldf.py @@ -139,14 +139,9 @@ def lldf(self): self.fused_data = LLDFDataModel(x_data, x_train, y) - def export_data(self, export_path: str): + def export_data(self, export_path: str, sheet_name: str = 'Sheet1'): '''Exports the data fusion artifacts to a file''' if self.fused_data is None: raise RuntimeError("Cannot export data before data fusion.") - x_train_dataframe = pd.DataFrame(self.fused_data.x_train) - - try: - x_train_dataframe.to_excel(export_path) - except Exception as exc: - raise RuntimeError("Could not export data to the selected path.") from exc + self.fused_data.export_to_file(export_path=export_path, sheet_name=sheet_name) diff --git a/chemfusekit/lr.py b/chemfusekit/lr.py index 1c91aae..79ddcd4 100644 --- a/chemfusekit/lr.py +++ b/chemfusekit/lr.py @@ -1,4 +1,5 @@ '''Logistic Regression Module''' +from copy import copy from typing import Optional import numpy as np @@ -128,3 +129,11 @@ def predict(self, x_sample: pd.DataFrame): ) return prediction + + def import_model(self, import_path: str): + model_backup = copy(self.model) + super().import_model(import_path) + if not isinstance(self.model, LogisticRegression): + self.model = model_backup + raise ImportError("The file you tried to import is not a LogisticRegression classifier.") + self.settings.algorithm = self.model.solver diff --git a/chemfusekit/plsda.py b/chemfusekit/plsda.py index a606367..567a218 100644 --- a/chemfusekit/plsda.py +++ b/chemfusekit/plsda.py @@ -133,3 +133,11 @@ def plsda(self): x = self.data.x_data y = self.data.x_train.Substance.astype('category').cat.codes run_split_test(x, y, PLSR(self.settings.n_components), mode=self.settings.output) + + def import_model(self, import_path: str): + model_backup = copy(self.model) + super().import_model(import_path) + if not isinstance(self.model, PLSR): + self.model = model_backup + raise ImportError("The file you tried to import is not a PLSRegression classifier.") + self.settings.n_components = self.model.n_components diff --git a/chemfusekit/svm.py b/chemfusekit/svm.py index fde2dde..b295a42 100644 --- a/chemfusekit/svm.py +++ b/chemfusekit/svm.py @@ -1,4 +1,5 @@ '''Support Vector Machine module.''' +from copy import copy from typing import Optional import pandas as pd @@ -60,3 +61,11 @@ def svm(self): model=SVC(kernel=self.settings.kernel), mode=self.settings.output ) + + def import_model(self, import_path: str): + model_backup = copy(self.model) + super().import_model(import_path) + if not isinstance(self.model, SVC): + self.model = model_backup + raise ImportError("The file you tried to import is not an SVC classifier.") + self.settings.kernel = self.model.kernel diff --git a/tests/test_base.py b/tests/test_base.py new file mode 100644 index 0000000..b3dd5f4 --- /dev/null +++ b/tests/test_base.py @@ -0,0 +1,100 @@ +'''This module contains the test cases for the base module.''' +import unittest + +import os + +from chemfusekit.lldf import LLDFSettings, LLDF, Table +from chemfusekit.__base import BaseDataModel +from chemfusekit.lda import LDASettings, LDA + + +class TestBase(unittest.TestCase): + def test_import_export(self): + '''Test case for table import and export.''' + + # Import and fuse data from tables + lldf_settings = LLDFSettings() + table1 = Table( + file_path="tests/qepas.xlsx", + sheet_name="Sheet1", + preprocessing="snv" + ) + table2 = Table( + file_path="tests/rt.xlsx", + sheet_name="Sheet1", + preprocessing="none" + ) + tables = [table1, table2] + lldf = LLDF(lldf_settings, tables) + lldf.lldf() + + # Export the fused dataset to file + lldf.export_data("export_test.xlsx") + + # Import the fused dataset from file + imported_data = BaseDataModel.load_from_file("export_test.xlsx", "Sheet1") + + # Assert the equality between the fused data and the export/import data + self.assertTrue( + # The compare method returns a difference dataframe, if it's empty it means the two are equal + lldf.fused_data.x_data.compare(imported_data.x_data).empty + ) + self.assertTrue( + # The compare method returns a difference dataframe, if it's empty it means the two are equal + lldf.fused_data.x_train.compare(imported_data.x_train).empty + ) + self.assertTrue( + # The comparison between ndarrays returns an array of booleans, collapsed by "all()" + (lldf.fused_data.y == imported_data.y).all() + ) + + # Second phase: re-export and re-import from BaseDataModel + imported_data.export_to_file("export_test_2.xlsx") + reimported_data = BaseDataModel.load_from_file("export_test_2.xlsx") + + # Assert the equality between the re-exported data and the re-reimported data + self.assertTrue(lldf.fused_data.x_data.compare(imported_data.x_data).empty) + self.assertTrue(lldf.fused_data.x_train.compare(imported_data.x_train).empty) + self.assertTrue((lldf.fused_data.y == imported_data.y).all()) + + # Clean up + os.remove("export_test.xlsx") + os.remove("export_test_2.xlsx") + + def test_model_import(self): + '''Integration test for model dumping and reloading.''' + # Let's start by creating and training an LDA model + lldf_settings = LLDFSettings() + table1 = Table( + file_path="tests/qepas.xlsx", + sheet_name="Sheet1", + preprocessing="snv" + ) + table2 = Table( + file_path="tests/rt.xlsx", + sheet_name="Sheet1", + preprocessing="none" + ) + tables = [table1, table2] + lldf = LLDF(lldf_settings, tables) + lldf.lldf() + lda_settings = LDASettings() + lda = LDA(lda_settings, lldf.fused_data) + lda.lda() + + # Dump the model to file + lda.export_model("modelfile.sklearn") + + # Reload the model + lda2 = LDA(lda_settings, lldf.fused_data) + lda2.import_model("modelfile.sklearn") + + # Check whether the imported model is the same as the exported model + self.assertEqual(lda.model.get_params(), lda2.model.get_params()) + + # Clean up + os.remove("modelfile.sklearn") + + +if __name__ == '__main__': + unittest.main() From 39c55420774090231db53ccbacf52ffbbc53009e Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Wed, 5 Jun 2024 10:54:36 +0000 Subject: [PATCH 2/4] ci(release): 2.1.0-beta.1 [skip ci] ## [2.1.0-beta.1](https://github.com/f-aguzzi/tesi/compare/v2.0.0...v2.1.0-beta.1) (2024-06-05) ### Features * **import/export:** fully functional file dumping ([e1d0044](https://github.com/f-aguzzi/tesi/commit/e1d004448afd86f4ffa2ed4b87629e6798ef41b2)) ### chore * **license:** add GPLv3 license ([3fdd0b8](https://github.com/f-aguzzi/tesi/commit/3fdd0b87b6587b7413dd36f5101d37a5d712e7d7)) --- CHANGELOG.md | 12 ++++++++++++ pyproject.toml | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b98c701..7f46790 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,15 @@ +## [2.1.0-beta.1](https://github.com/f-aguzzi/tesi/compare/v2.0.0...v2.1.0-beta.1) (2024-06-05) + + +### Features + +* **import/export:** fully functional file dumping ([e1d0044](https://github.com/f-aguzzi/tesi/commit/e1d004448afd86f4ffa2ed4b87629e6798ef41b2)) + + +### chore + +* **license:** add GPLv3 license ([3fdd0b8](https://github.com/f-aguzzi/tesi/commit/3fdd0b87b6587b7413dd36f5101d37a5d712e7d7)) + ## [2.0.0](https://github.com/f-aguzzi/tesi/compare/v1.2.0...v2.0.0) (2024-06-04) diff --git a/pyproject.toml b/pyproject.toml index 5e98de9..4eeeda0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "chemfusekit" -version = "2.0.0" +version = "2.1.0b1" description = "A minimal Python / Jupyter Notebook / Colab library for data fusion and chemometrical analysis." authors = [ { name = "Federico Aguzzi", email = "62149513+f-aguzzi@users.noreply.github.com" } From be7c2624710d6dbf4f0d320e451e5001853560cc Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Wed, 5 Jun 2024 14:16:05 +0200 Subject: [PATCH 3/4] docs: add new information --- docs/cookbook/structure.md | 48 +++++++++++++++++++++++++++++---- docs/docs/base/basedatamodel.md | 7 +++++ docs/docs/lldf/lldfmodel.md | 4 +++ docs/docs/pca/pcadatamodel.md | 9 ++++++- 4 files changed, 62 insertions(+), 6 deletions(-) diff --git a/docs/cookbook/structure.md b/docs/cookbook/structure.md index f2bbba2..64bec98 100644 --- a/docs/cookbook/structure.md +++ b/docs/cookbook/structure.md @@ -47,6 +47,36 @@ As you can see, each module contains a class with the same name of the module, a ## Modular design features +The entire library was streamlined to make operations as smooth and easy as possible. Any operation (import and export of both data and classifier models, training, processing, prediction, ...) looks the same on any class. + +
+ +> *Want to update the settings in a classifier?* + +You'll find the settings for `LDA` in `LDA.settings`. And the settings of `PCA` in `PCA.settings`. Where are the settings for `SVM`? In `SVM.settings`, of course. You get the hang of it. + +
+ +> *Want to inspect the underlying `sklearn` model in one of the classifiers? + +Let's say you're using a `LR` object. Its underlying sklearn classifier is in `LR.model`, as much as the underlying sklearn classifier of `KNN` is in `KNN.model`. + +
+ +> *Want to swap out the data in a model and retrain it?* + +Let's assume your new data is called `new_data`. Knowing that the training data, when present, is located in the `.data` field, just do this: + +```python +knn.data = new_data +knn.knn() +``` + +The training method is always called like its container class, but in lower case. To train a `KNN` model, like in this case, you just have to call `.knn()` on it. Same goes for `.lda()` on `LDA`, `.lldf()` on `LLDF`, and so on. + + +### Modular settings + The settings for all classifiers (that is, all classes except `LLDF` and `PCA`) inherit from a base class called [`BaseSettings`](/docs/base/basesettings) in the `base` module: ```mermaid @@ -84,8 +114,9 @@ classDiagram BaseSettings *-- SVMSettings ``` -\ -\ + +### Modular classifiers + The classifiers themselves all inherit from a base class called [`BaseClassifier`](/docs/base/baseclassifier) in the `base` module: ```mermaid @@ -128,8 +159,9 @@ classDiagram BaseClassifier *-- SVM ``` -\ -\ + +### Modular data types + The data types are modular and interexchangeable too. Both [`LLDFDataModel`](/docs/lldf/lldfmodel) and [`PCADataModel`](/docs/pca/pcadatamodel) inherit from [`BaseDataModel`](/docs/base/basedatamodel) as shown in the following diagram: ```mermaid @@ -156,4 +188,10 @@ classDiagram BaseDataModel *-- PCADataModel ``` -This allows all the classifiers to use the `LLDF` data, dimension-reduced `PCA` data, or any other type of data as long as it follows the `BaseDataModel` template. \ No newline at end of file +This allows all the classifiers to use the `LLDF` data, dimension-reduced `PCA` data, or any other type of data as long as it follows the `BaseDataModel` template. + +## File import and export + +All the data models (`BaseDataModel`, and its derived, `LLDFDataModel` and `PCADataModel`) can export their content to Excel tables. + +All classifiers derived from `BaseClassifier` (`KNN`, `LDA`, `LR`, `PLSDA`, `SVM`) can import and export their sklearn data model from and to file. \ No newline at end of file diff --git a/docs/docs/base/basedatamodel.md b/docs/docs/base/basedatamodel.md index 727c520..d66f32e 100644 --- a/docs/docs/base/basedatamodel.md +++ b/docs/docs/base/basedatamodel.md @@ -19,3 +19,10 @@ The first two are `Pandas` `DataFrame` objects: - `x_train` The last is a `NumPy` `ndarray`: - `y` + +## Methods + +Both methods are inherited from [`BaseDataModel`](../base/basedatamodel.md): + +- `@classmethod def load_from_file(import_path: str, sheet_name: str = 'Sheet1')`: creates a `BaseDataModel` instance from an Excel file +- `export_to_file(export_path: str, sheet_name: str = 'Sheet1')`: exports the `BaseDataModel` contents to an Excel table diff --git a/docs/docs/lldf/lldfmodel.md b/docs/docs/lldf/lldfmodel.md index 7232264..594ef0e 100644 --- a/docs/docs/lldf/lldfmodel.md +++ b/docs/docs/lldf/lldfmodel.md @@ -23,3 +23,7 @@ The first two are `Pandas` `DataFrame` objects: The last is a `NumPy` `ndarray`: - `y` +## Methods + +- `@classmethod def load_from_file(import_path: str, sheet_name: str = 'Sheet1')`: creates an `LLDFDataModel` instance from an Excel file +- `export_to_file(export_path: str, sheet_name: str = 'Sheet1')`: exports the `LLDFDataModel` contents to an Excel table \ No newline at end of file diff --git a/docs/docs/pca/pcadatamodel.md b/docs/docs/pca/pcadatamodel.md index ffb1085..f76b156 100644 --- a/docs/docs/pca/pcadatamodel.md +++ b/docs/docs/pca/pcadatamodel.md @@ -25,4 +25,11 @@ The second two are `NumPy` `ndarray`s: - `array_scores` The last is an integer: -- `components` \ No newline at end of file +- `components` + +## Methods + +Both methods are inherited from [`BaseDataModel`](../base/basedatamodel.md): + +- `@classmethod def load_from_file(import_path: str, sheet_name: str = 'Sheet1')`: creates a `BaseDataModel` instance from an Excel file +- `export_to_file(export_path: str, sheet_name: str = 'Sheet1')`: exports the `BaseDataModel` contents to an Excel table From 3601750fab58414dfe565abb7ecab57630b61a31 Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Fri, 7 Jun 2024 11:46:07 +0200 Subject: [PATCH 4/4] chore(docs): version 2.1.0 --- .../version-2.1.0/case-study-classifier.md | 9 + .../version-2.1.0/case-study-hybrid.md | 9 + .../version-2.1.0/case-study-realtime.md | 9 + .../version-2.1.0/introduction.md | 26 +++ .../version-2.1.0/structure.md | 197 ++++++++++++++++++ .../version-2.1.0-sidebars.json | 8 + docs/cookbook_versions.json | 1 + .../version-2.1.0/base/_category_.json | 8 + .../version-2.1.0/base/baseclassifier.md | 37 ++++ .../version-2.1.0/base/basedatamodel.md | 28 +++ .../version-2.1.0/base/basesettings.md | 21 ++ .../version-2.1.0/complete-workflow.md | 31 +++ .../version-2.1.0/knn/_category_.json | 8 + docs/versioned_docs/version-2.1.0/knn/knn.md | 47 +++++ .../version-2.1.0/knn/knnsettings.md | 64 ++++++ .../version-2.1.0/lda/_category_.json | 8 + docs/versioned_docs/version-2.1.0/lda/lda.md | 48 +++++ .../version-2.1.0/lda/ldasettings.md | 39 ++++ .../version-2.1.0/lldf/_category_.json | 8 + .../version-2.1.0/lldf/index.mdx | 47 +++++ .../version-2.1.0/lldf/lldf-class.md | 67 ++++++ .../version-2.1.0/lldf/lldfmodel.md | 29 +++ .../version-2.1.0/lldf/lldfsettings.md | 25 +++ .../version-2.1.0/lldf/table.md | 39 ++++ .../version-2.1.0/lr/_category_.json | 8 + docs/versioned_docs/version-2.1.0/lr/lr.md | 51 +++++ .../version-2.1.0/lr/lrsettings.md | 44 ++++ .../version-2.1.0/pca/_category_.json | 8 + docs/versioned_docs/version-2.1.0/pca/pca.md | 46 ++++ .../version-2.1.0/pca/pcadatamodel.md | 35 ++++ .../version-2.1.0/pca/pcasettings.md | 41 ++++ .../version-2.1.0/plsda/_category_.json | 8 + .../version-2.1.0/plsda/plsda.md | 47 +++++ .../version-2.1.0/plsda/plsdasettings.md | 38 ++++ .../version-2.1.0/svm/_category_.json | 8 + docs/versioned_docs/version-2.1.0/svm/svm.md | 49 +++++ .../version-2.1.0/svm/svmsettings.md | 43 ++++ docs/versioned_docs/version-2.1.0/tutorial.md | 109 ++++++++++ .../version-2.1.0/utils/_category_.json | 8 + .../version-2.1.0/utils/graphmode.md | 11 + .../version-2.1.0/utils/graphoutput.md | 20 ++ .../utils/printconfusionmatrix.md | 19 ++ .../version-2.1.0/utils/printtable.md | 20 ++ .../version-2.1.0/utils/runsplittests.md | 20 ++ .../version-2.1.0-sidebars.json | 8 + docs/versions.json | 1 + 46 files changed, 1455 insertions(+) create mode 100644 docs/cookbook_versioned_docs/version-2.1.0/case-study-classifier.md create mode 100644 docs/cookbook_versioned_docs/version-2.1.0/case-study-hybrid.md create mode 100644 docs/cookbook_versioned_docs/version-2.1.0/case-study-realtime.md create mode 100644 docs/cookbook_versioned_docs/version-2.1.0/introduction.md create mode 100644 docs/cookbook_versioned_docs/version-2.1.0/structure.md create mode 100644 docs/cookbook_versioned_sidebars/version-2.1.0-sidebars.json create mode 100644 docs/versioned_docs/version-2.1.0/base/_category_.json create mode 100644 docs/versioned_docs/version-2.1.0/base/baseclassifier.md create mode 100644 docs/versioned_docs/version-2.1.0/base/basedatamodel.md create mode 100644 docs/versioned_docs/version-2.1.0/base/basesettings.md create mode 100644 docs/versioned_docs/version-2.1.0/complete-workflow.md create mode 100644 docs/versioned_docs/version-2.1.0/knn/_category_.json create mode 100644 docs/versioned_docs/version-2.1.0/knn/knn.md create mode 100644 docs/versioned_docs/version-2.1.0/knn/knnsettings.md create mode 100644 docs/versioned_docs/version-2.1.0/lda/_category_.json create mode 100644 docs/versioned_docs/version-2.1.0/lda/lda.md create mode 100644 docs/versioned_docs/version-2.1.0/lda/ldasettings.md create mode 100644 docs/versioned_docs/version-2.1.0/lldf/_category_.json create mode 100644 docs/versioned_docs/version-2.1.0/lldf/index.mdx create mode 100644 docs/versioned_docs/version-2.1.0/lldf/lldf-class.md create mode 100644 docs/versioned_docs/version-2.1.0/lldf/lldfmodel.md create mode 100644 docs/versioned_docs/version-2.1.0/lldf/lldfsettings.md create mode 100644 docs/versioned_docs/version-2.1.0/lldf/table.md create mode 100644 docs/versioned_docs/version-2.1.0/lr/_category_.json create mode 100644 docs/versioned_docs/version-2.1.0/lr/lr.md create mode 100644 docs/versioned_docs/version-2.1.0/lr/lrsettings.md create mode 100644 docs/versioned_docs/version-2.1.0/pca/_category_.json create mode 100644 docs/versioned_docs/version-2.1.0/pca/pca.md create mode 100644 docs/versioned_docs/version-2.1.0/pca/pcadatamodel.md create mode 100644 docs/versioned_docs/version-2.1.0/pca/pcasettings.md create mode 100644 docs/versioned_docs/version-2.1.0/plsda/_category_.json create mode 100644 docs/versioned_docs/version-2.1.0/plsda/plsda.md create mode 100644 docs/versioned_docs/version-2.1.0/plsda/plsdasettings.md create mode 100644 docs/versioned_docs/version-2.1.0/svm/_category_.json create mode 100644 docs/versioned_docs/version-2.1.0/svm/svm.md create mode 100644 docs/versioned_docs/version-2.1.0/svm/svmsettings.md create mode 100644 docs/versioned_docs/version-2.1.0/tutorial.md create mode 100644 docs/versioned_docs/version-2.1.0/utils/_category_.json create mode 100644 docs/versioned_docs/version-2.1.0/utils/graphmode.md create mode 100644 docs/versioned_docs/version-2.1.0/utils/graphoutput.md create mode 100644 docs/versioned_docs/version-2.1.0/utils/printconfusionmatrix.md create mode 100644 docs/versioned_docs/version-2.1.0/utils/printtable.md create mode 100644 docs/versioned_docs/version-2.1.0/utils/runsplittests.md create mode 100644 docs/versioned_sidebars/version-2.1.0-sidebars.json diff --git a/docs/cookbook_versioned_docs/version-2.1.0/case-study-classifier.md b/docs/cookbook_versioned_docs/version-2.1.0/case-study-classifier.md new file mode 100644 index 0000000..8b1d3d3 --- /dev/null +++ b/docs/cookbook_versioned_docs/version-2.1.0/case-study-classifier.md @@ -0,0 +1,9 @@ +--- +sidebar_position: 3 +--- + +# Case study: training a classifier from lab data + +:::note +This case study is still **under construction**. +::: \ No newline at end of file diff --git a/docs/cookbook_versioned_docs/version-2.1.0/case-study-hybrid.md b/docs/cookbook_versioned_docs/version-2.1.0/case-study-hybrid.md new file mode 100644 index 0000000..a7c4c5d --- /dev/null +++ b/docs/cookbook_versioned_docs/version-2.1.0/case-study-hybrid.md @@ -0,0 +1,9 @@ +--- +sidebar_position: 4 +--- + +# Case study: hybrid workflow + +:::note +This case study is still **under construction**. +::: \ No newline at end of file diff --git a/docs/cookbook_versioned_docs/version-2.1.0/case-study-realtime.md b/docs/cookbook_versioned_docs/version-2.1.0/case-study-realtime.md new file mode 100644 index 0000000..8725eac --- /dev/null +++ b/docs/cookbook_versioned_docs/version-2.1.0/case-study-realtime.md @@ -0,0 +1,9 @@ +--- +sidebar_position: 5 +--- + +# Case study: real-time data classification + +:::note +This case study is still **under construction**. +::: \ No newline at end of file diff --git a/docs/cookbook_versioned_docs/version-2.1.0/introduction.md b/docs/cookbook_versioned_docs/version-2.1.0/introduction.md new file mode 100644 index 0000000..3a4d2ef --- /dev/null +++ b/docs/cookbook_versioned_docs/version-2.1.0/introduction.md @@ -0,0 +1,26 @@ +--- +sidebar_position: 1 +--- + +# The ChemFuseKit Cookbook: an introduction + +*What is a cookbook, exactly?* + +> A cookbook is a comprehensive collection of recipes that guide users through +the process of learning and mastering the use of a specific library or +programming technique, by providing step-by-step instructions, explanations and +examples. + +## What you'll learn + +In this cookbook you will learn the basic principles of operation of `ChemFuseKit` through practical examples and case studies. You will be shown that all modules follow a basic structure, and once you've learned it for one module, you will be able to reapply that knowledge for all modules. + +You will be shown how to use the library on its own, and also how to use it as a part of a bigger pipeline. + +## Cookbook sectioning + +Here we go: + +- first of all, you will be shown the basic principles and structure; +- then, you will be shown three case studies; +- finally, you'll receive instructions on how to modify and expand this library for your own purposes. \ No newline at end of file diff --git a/docs/cookbook_versioned_docs/version-2.1.0/structure.md b/docs/cookbook_versioned_docs/version-2.1.0/structure.md new file mode 100644 index 0000000..64bec98 --- /dev/null +++ b/docs/cookbook_versioned_docs/version-2.1.0/structure.md @@ -0,0 +1,197 @@ +--- +sidebar_position: 2 +--- + +# Project structure + +In this cookbook page, you will be shown how the project is structured, and the purpose of each module. + +## Project Hierarchy + +``` +chemfusekit + │ + ├── lda + │ ├── LDASettings + │ └── LDA + │ + ├── lr + │ ├── LRSettings + │ └── LR + │ + ├── plsda + │ ├── PLSDASettings + │ └── PLSDA + │ + ├── pca + │ ├── PCASettings + │ ├── PCA + │ └── PCADataModel + │ + ├── lldf + │ ├── LLDFSettings + │ ├── LLDF + │ └── LLDFDataModel + │ + ├── svm + │ ├── SVMSettings + │ └── SVM + │ + └── knn + ├── KNNSettings + └── KNN +``` + +As you can see, each module contains a class with the same name of the module, and a settings class. That's because this project tries to be as modular and as regular as possible, for clarity and interoperability. + + +## Modular design features + +The entire library was streamlined to make operations as smooth and easy as possible. Any operation (import and export of both data and classifier models, training, processing, prediction, ...) looks the same on any class. + +
+ +> *Want to update the settings in a classifier?* + +You'll find the settings for `LDA` in `LDA.settings`. And the settings of `PCA` in `PCA.settings`. Where are the settings for `SVM`? In `SVM.settings`, of course. You get the hang of it. + +
+ +> *Want to inspect the underlying `sklearn` model in one of the classifiers? + +Let's say you're using a `LR` object. Its underlying sklearn classifier is in `LR.model`, as much as the underlying sklearn classifier of `KNN` is in `KNN.model`. + +
+ +> *Want to swap out the data in a model and retrain it?* + +Let's assume your new data is called `new_data`. Knowing that the training data, when present, is located in the `.data` field, just do this: + +```python +knn.data = new_data +knn.knn() +``` + +The training method is always called like its container class, but in lower case. To train a `KNN` model, like in this case, you just have to call `.knn()` on it. Same goes for `.lda()` on `LDA`, `.lldf()` on `LLDF`, and so on. + + +### Modular settings + +The settings for all classifiers (that is, all classes except `LLDF` and `PCA`) inherit from a base class called [`BaseSettings`](/docs/base/basesettings) in the `base` module: + +```mermaid +classDiagram + class BaseSettings { + +output: GraphMode + +test_split: bool + __init__(output, test_split) + } + + class KNNSettings { + ... + } + + class LDASettings { + ... + } + + class LRSettings { + ... + } + + class PLSDASettings { + ... + } + + class SVMSettings { + ... + } + + BaseSettings *-- KNNSettings + BaseSettings *-- LDASettings + BaseSettings *-- LRSettings + BaseSettings *-- PLSDASettings + BaseSettings *-- SVMSettings +``` + + +### Modular classifiers + +The classifiers themselves all inherit from a base class called [`BaseClassifier`](/docs/base/baseclassifier) in the `base` module: + +```mermaid +classDiagram + + class BaseClassifier { + +settings: BaseSettings + +data: BaseDataModel + +model: sklearn model + __init__(settings, data) + import_model(import_path: str) + export_model(export_path: str) + predict(x_data: pd.DataFrame) + } + + class KNN { + ... + } + + class LDA { + ... + } + + class LR { + ... + } + + class PLSDA { + ... + } + + class SVM { + ... + } + + BaseClassifier *-- KNN + BaseClassifier *-- LDA + BaseClassifier *-- LR + BaseClassifier *-- PLSDA + BaseClassifier *-- SVM +``` + + +### Modular data types + +The data types are modular and interexchangeable too. Both [`LLDFDataModel`](/docs/lldf/lldfmodel) and [`PCADataModel`](/docs/pca/pcadatamodel) inherit from [`BaseDataModel`](/docs/base/basedatamodel) as shown in the following diagram: + +```mermaid +classDiagram + class BaseDataModel { + +x_data: DataFrame + +x_train: DataFrame + +y: ndarray + __init__(x_data, x_train, y) + } + + class LLDFDataModel { + ... + __init__(...) + } + + class PCADataModel { + +array_scores: ndarray + +components: int + __init__(..., array_scores) + } + + BaseDataModel *-- LLDFDataModel + BaseDataModel *-- PCADataModel +``` + +This allows all the classifiers to use the `LLDF` data, dimension-reduced `PCA` data, or any other type of data as long as it follows the `BaseDataModel` template. + +## File import and export + +All the data models (`BaseDataModel`, and its derived, `LLDFDataModel` and `PCADataModel`) can export their content to Excel tables. + +All classifiers derived from `BaseClassifier` (`KNN`, `LDA`, `LR`, `PLSDA`, `SVM`) can import and export their sklearn data model from and to file. \ No newline at end of file diff --git a/docs/cookbook_versioned_sidebars/version-2.1.0-sidebars.json b/docs/cookbook_versioned_sidebars/version-2.1.0-sidebars.json new file mode 100644 index 0000000..caea0c0 --- /dev/null +++ b/docs/cookbook_versioned_sidebars/version-2.1.0-sidebars.json @@ -0,0 +1,8 @@ +{ + "tutorialSidebar": [ + { + "type": "autogenerated", + "dirName": "." + } + ] +} diff --git a/docs/cookbook_versions.json b/docs/cookbook_versions.json index 3aea034..dc236fe 100644 --- a/docs/cookbook_versions.json +++ b/docs/cookbook_versions.json @@ -1,3 +1,4 @@ [ + "2.1.0", "2.0.0" ] diff --git a/docs/versioned_docs/version-2.1.0/base/_category_.json b/docs/versioned_docs/version-2.1.0/base/_category_.json new file mode 100644 index 0000000..ac3f2d9 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/base/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Base module", + "position": 9, + "link": { + "type": "generated-index", + "description": "A module containing base classes for all the other modules." + } +} diff --git a/docs/versioned_docs/version-2.1.0/base/baseclassifier.md b/docs/versioned_docs/version-2.1.0/base/baseclassifier.md new file mode 100644 index 0000000..05448ad --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/base/baseclassifier.md @@ -0,0 +1,37 @@ +--- +sidebar_position: 1 +--- + +# BaseClassifier class + +A base class from which all classifiers inherit. + +## Syntax + +```python +BaseClassifier(settings: BaseSettings, data: BaseDataModel) +``` + +## Constructor parameters + +- `settings`: object of type [`BaseSettings`](./basesettings.md). Contains the settings for + the `BaseClassifier` object. +- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md). Contains the data to be analyzed. + +## Fields + +- `settings`: object of type [`KNNSettings`](/tesi/docs/knn/knnsettings). Contains the settings for + the `BaseClassifier` object. +- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md). Contains the data to be analyzed. +- `model`: a `sklearn` model from `scikit-learn`. Defaults to `None`. + +## Methods + +- `import_model(import_path: str)`: loads a model from file +- `export_model(export_path: str)`: exports a model to file + - *raises*: + - `RuntimeError("You haven't trained the model yet! You cannot export it now.")` when trying to export an untrained model +- `predict(x_data: pd.DataFrame)`: performs prediction through the `model` + - *raises*: + - `TypeError("X data for prediction must be non-empty.")` on empty `x_data` + - `RuntimeError("The model is not trained yet!")` when run with an untrained `model` diff --git a/docs/versioned_docs/version-2.1.0/base/basedatamodel.md b/docs/versioned_docs/version-2.1.0/base/basedatamodel.md new file mode 100644 index 0000000..d66f32e --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/base/basedatamodel.md @@ -0,0 +1,28 @@ +--- +sidebar_position: 3 +--- + +# BaseDataModel class + +This class models the output data for all data-outputting operations (currently, the [`LLDF`](../lldf/lldf-class.md) operation and the [`PCA`](../pca/pca.md) operation). + +## Syntax + +```python +BaseDataModel(x_data: pd.DataFrame, x_train: pd.DataFrame, y: pd.DataFrame) +``` + +## Fields and constructor parameters + +The first two are `Pandas` `DataFrame` objects: +- `x_data` +- `x_train` +The last is a `NumPy` `ndarray`: +- `y` + +## Methods + +Both methods are inherited from [`BaseDataModel`](../base/basedatamodel.md): + +- `@classmethod def load_from_file(import_path: str, sheet_name: str = 'Sheet1')`: creates a `BaseDataModel` instance from an Excel file +- `export_to_file(export_path: str, sheet_name: str = 'Sheet1')`: exports the `BaseDataModel` contents to an Excel table diff --git a/docs/versioned_docs/version-2.1.0/base/basesettings.md b/docs/versioned_docs/version-2.1.0/base/basesettings.md new file mode 100644 index 0000000..740581b --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/base/basesettings.md @@ -0,0 +1,21 @@ +--- +sidebar-position: 1 +--- + +# BaseSettings class + +Holds the settings for all classifier object. It's not meant for direct usage, only for inheritance. + + +## Syntax + +```python +BaseSettings(output: GraphMode, test_split: false) +``` + +## Fields and constructor parameters +- `output`: toggles graph output mode. Defaults to [`GraphMode.NONE`](../utils/graphmode.md). +- `test_split`: toggles the training split test phase. Defaults to `False`. Requires `output` to be set to `True` to work. + +The constructor raises: +- `Warning("You selected test_split but it won't run because you disabled the output.")` if `test_split` is run with `output` set to false (split tests only produce graphical output, and are useless when run with disabled output). diff --git a/docs/versioned_docs/version-2.1.0/complete-workflow.md b/docs/versioned_docs/version-2.1.0/complete-workflow.md new file mode 100644 index 0000000..f607520 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/complete-workflow.md @@ -0,0 +1,31 @@ +--- +sidebar-position: 7 +--- + +# Complete workflow + +Here's a sequence diagram to represent an example workflow, from the raw data +tables to classification, including data fusion, PCA and training. + +```plantuml +actor User +participant LLDF +participant PCA +participant Classifier + +User -> LLDF : Upload training tables +User -> LLDF : Set parameters +User -> Classifier : (optional) Upload model + +LLDF -> PCA : Pass preprocessed / fused tables +LLDF --> User : Download fused tables +LLDF -> Classifier : Pass preprocessed / fused tables \nRun classification +PCA -> Classifier : (optional) Set number of components + +Classifier --> User : classification results, graphs +PCA --> User : classification results, graphs +Classifier --> User : (optional) download trained model + +User -> Classifier : pass data to classify +Classifier --> User : classification results +``` diff --git a/docs/versioned_docs/version-2.1.0/knn/_category_.json b/docs/versioned_docs/version-2.1.0/knn/_category_.json new file mode 100644 index 0000000..e670264 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/knn/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "kNN module", + "position": 6, + "link": { + "type": "generated-index", + "description": "A module for k-nearest neighbors analysis." + } +} diff --git a/docs/versioned_docs/version-2.1.0/knn/knn.md b/docs/versioned_docs/version-2.1.0/knn/knn.md new file mode 100644 index 0000000..2cecad5 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/knn/knn.md @@ -0,0 +1,47 @@ +--- +sidebar_position: 1 +--- + +# KNN class + +A class to store the data, methods and artifacts for _k-Nearest Neighbors Analysis_. + +## Syntax + +```python +KNN(settings: KNNSettings, data: LLDFModel) +``` + +## Constructor parameters + +- `settings`: object of type [`KNNSettings`](knnsettings.md). Contains the settings for + the `KNN` object. +- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md). Contains the data to be analyzed. + +## Fields + +- `settings`: object of type [`KNNSettings`](/tesi/docs/knn/knnsettings). Contains the settings for + the `KNN` object. +- `fused_data`: onject of type ['LLDFModel`](/tesi/docs/lldf/lldfmodel). Contains the + artifacts from the data fusion process. +- `model`: a `KNeighborsClassifier` model from `scikit-learn`. Defaults to `None`. + +## Methods + +- `knn(self)`: trains the k-Neighbors Analysis model +- `predict(self, x_data)`: performs LDA prediction once the model is trained. + - *raises*: + - `RuntimeError("The kNN model is not trained yet!")` if the `KNN` model hasn't been trained yet + +## Example + +```python +from chemfusekit.knn import KNN + +# Initialize and run the LDA class +knn = KNN(settings, lldf.fused_data) +knn.knn() + +# Run predictions +knn.predict(x_data) +``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/knn/knnsettings.md b/docs/versioned_docs/version-2.1.0/knn/knnsettings.md new file mode 100644 index 0000000..81f9486 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/knn/knnsettings.md @@ -0,0 +1,64 @@ +--- +sidebar-position: 1 +--- + +# KNNSettings class + +Holds the settings for the [`KNN`](knn.md) object. + +Inherits from [`BaseSettings`](../base/basesettings.md). + +## Syntax + +```python +KNNSettings( + n_neighbors: int, + metric: str | Callable, + weights: str | Callable, + algorithm: str, + output: GraphMode, + test_split: false +) +``` + +## Fields and constructor parameters +- `n_neighbors`: the amount of components to be used in the `KNN` model. Defaults to 15. +- `metric`: the distance metric for the model. It can take one of the following values: + - `minkwoski` + - `precomputed` + - `euclidean` + or be a callable object. +- `weights`: the weight metric for the model. It can take one of the following values: + - `uniform` + - `distance` + or be a callable object. +- `algorithm`: the algorithm for the model. It can take one of the following values: + - `auto` + - `ball_tree` + - `kd_tree` + - `brute` + or be a callable object. +- `output`: toggles graph output mode. Defaults to [`GraphMode.NONE`](../utils/graphmode.md). +- `test_split`: toggles the training split test phase. Defaults to `False`. Requires `output` to be set to `True` to work. + +The constructor raises: +- `ValueError("Invalid n_neighbors number: should be a positive integer.")` if the number of components is not valid. +- `ValueError("Invalid metric: should be 'minkwoski', 'precomputed', 'euclidean' or a callable.")` if the chosen metric is neither available nor a callable function. +- `ValueError("Invalid weight: should be 'uniform', 'distance' or a callable")` if the chosen weight is neither available nor a callable function. +- `ValueError("Invalid algorithm: should be 'auto', 'ball_tree', 'kd_tree' or 'brute'.")` if the chosen algotithm does not exist. +- `Warning("You selected test_split but it won't run because you disabled the output.")` if `test_split` is run with `output` set to false (split tests only produce graphical output, and are useless when run with disabled output). + +## Example + +```python +from chemfusekit.knn import KNNSettings, GraphMode + +settings = KNNSettings( + n_neighbors=20, # pick 20 neighbors + metric='minkowski', # choose the metric + weights='distance', # choose the weight metric + algorithm='auto', # the best algorithm gets chosen automatically + output=GraphMode.GRAPHIC, # graph output is enabled + test_split=True # the model will be split-tested at the end of the training +) +``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/lda/_category_.json b/docs/versioned_docs/version-2.1.0/lda/_category_.json new file mode 100644 index 0000000..8d1f6af --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/lda/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "LDA module", + "position": 4, + "link": { + "type": "generated-index", + "description": "A module for linear discriminant analysis." + } +} diff --git a/docs/versioned_docs/version-2.1.0/lda/lda.md b/docs/versioned_docs/version-2.1.0/lda/lda.md new file mode 100644 index 0000000..2b8f19e --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/lda/lda.md @@ -0,0 +1,48 @@ +--- +sidebar_position: 1 +--- + +# LDA class + +A class to store the data, methods and artifacts for _Linear Discriminant Analysis_. + +## Syntax + +```python +LDA(settings: LDASettings, data: BaseDataModel) +``` + +## Constructor parameters + +- `settings`: object of type [`LDASettings`](./ldasettings.md). Contains the settings for + the `LDA` object. +- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md). Contains the data to be analyzed. + +## Fields + +- `settings`: object of type [`LDASettings`](./ldasettings.md). Contains the settings for + the `LDA` object. +- Fused data fields: + - `x_data` + - `x_train` + - `y` +- `model`: a `LinearDiscriminantAnalysis` model from `scikit-learn`. Defaults to `None`. + +## Methods + +- `lda(self)`: performs Linear Discriminant Analysis +- `__print_prediction_graphs(self, y_test, y_pred)`: helper function to print + graphs and stats about LDA predictions +- `predict(self, x_data)`: performs LDA prediction once the model is trained. + - *raises*: + - `RuntimeError("The LDA model is not trained yet!")` if the LDA model hasn't been trained yet + +## Example + +```python +from chemfusekit.lda import LDA + +# Initialize and run the LDA class +lda = LDA(lldf.fused_data, settings) +lda.lda() +``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/lda/ldasettings.md b/docs/versioned_docs/version-2.1.0/lda/ldasettings.md new file mode 100644 index 0000000..c9c850e --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/lda/ldasettings.md @@ -0,0 +1,39 @@ +--- +sidebar_position: 2 +--- + +# LDASettings class + +Holds the settings for the [`LDA`](./lda.md) object. + +Inherits from [`BaseSettings`](../base/basesettings.md). + +## Syntax + +```python +LDASettings(components: int, output: GraphMode, split_test: bool) +``` + +## Fields and constructor parameters + +- `components`: the amount of components to be used in the LDA model. Defaults to 3. +- `output`: toggles graph output. Defaults to [`GraphMode.NONE`](../utils/graphmode.md). +- `test_split`: toggles split testing. Defaults to `False`. + + +The constructor raises: +- `ValueError("Invalid component number: must be a > 1 integer.")` if the number of + components is not valid. +- `Warning("You selected test_split but it won't run because you disabled the output.")` if split tests are run with `output` disabled + +## Example + +```python +from chemfusekit.lda import LDASettings, GraphMode + +settings = LDASettings( + components=(pca.components - 1), # one less component than the number determined by PCA + output=GraphMode.GRAPHIC, # graphs will be printed + test_split=True # split testing is enabled +) +``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/lldf/_category_.json b/docs/versioned_docs/version-2.1.0/lldf/_category_.json new file mode 100644 index 0000000..096ed0c --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/lldf/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "LLDF Module", + "position": 2, + "link": { + "type": "doc", + "id": "index" + } +} diff --git a/docs/versioned_docs/version-2.1.0/lldf/index.mdx b/docs/versioned_docs/version-2.1.0/lldf/index.mdx new file mode 100644 index 0000000..01c1d16 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/lldf/index.mdx @@ -0,0 +1,47 @@ +import DocCardList from '@theme/DocCardList'; + +# LLDF Module + +A module for low-level data fusion. + +# Members + + + +# UML + +```mermaid +classDiagram + class Table { + +str file_path + +str sheet_name + +str preprocessing + __init__(file_path, sheet_name, preprocessing) + } + + class LLDFSettings { + +GraphOutput output + __init__(output) + } + + class LLDF { + +LLDFSettings settings + +Table[] tables + +LLDFDataModel | None fused_data + lldf() + -_snv() + +export_data(export_path: str) + __init__(settings, tables[]) + } + + class LLDFDataModel { + +x_data: pd.DataFrame + +x_train: pd.DataFrame + +y: np.ndarray + __init__(x_data, x_train, y) + } + + LLDF *-- LLDFModel + LLDF *-- Table + LLDF *-- LLDFSettings +``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/lldf/lldf-class.md b/docs/versioned_docs/version-2.1.0/lldf/lldf-class.md new file mode 100644 index 0000000..1a0be38 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/lldf/lldf-class.md @@ -0,0 +1,67 @@ +--- +sidebar_position: 1 +--- + +# LLDF class + +The `LLDF` class is used for _low-level data fusion_. + +## Syntax + +```python +LLDF(tables: List[Table], lldf_settings: LLDFSettings) +``` + +## Constructor parameters + +- `tables`: `List[`[`Table`](./table.md)`]` + + A list of `Table` objects containing info about the files to import + +- `lldf_settings`: [`LLDFSettings`](./lldfsettings) + + The settings for the LLDF object. + +## Fields + +- `settings`: [`LLDFSettings`](./lldfsettings) + + The settings for the LLDF object. + +- `tables`: `List[`[`Table`](./table.md)`]` + + A list of `Table` objects containing info about the files to import + +- `fused_data`: [`LLDFModel`](./lldfmodel.md) + + The resulting model containing the data fusion artifacts. + +## Methods + +- `_snv(self, input_data)`: static method to rescale input arrays +- `lldf(self)`: performs low-level data fusion on the data passed in the settings + - *raises*: + - `FileNotFoundError("Error opening the selected files.")` + if the files specified in the settings are not valid + - `SyntaxError("LLDF: this type of preprocessing does not exist")` + if the preprocessing method specified in the settings is not valid +- `export_data(self, export_path)`: exports the data fusion artifacts to an Excel file + - *raises*: + - `RuntimeError("Cannot export data before data fusion.")` if export is + attempted before fusing the data + - `RuntimeError("Could not export data to the selected path.")` if any error + happens during the export phase + + +## Example + +```python +from chemfusekit.lldf import LLDF + +# Initialize and run low-level data fusion +lldf = LLDF(tables, lldf_settings) +lldf.lldf() + +# Export the LLDF data to an Excel file +lldf.export_data('output_file.xlsx') +``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/lldf/lldfmodel.md b/docs/versioned_docs/version-2.1.0/lldf/lldfmodel.md new file mode 100644 index 0000000..594ef0e --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/lldf/lldfmodel.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 3 +--- + +# LLDFDataModel class + +This class models the output data from the [`LLDF`](./lldf-class.md) operation. + +It inherits from the [`BaseDataModel`](../base/basedatamodel.md). + +## Syntax + +```python +LLDFModel(x_data: pd.DataFrame, x_train: pd.DataFrame, y: np.ndarray) +``` + +## Fields and constructor parameters + +The first two are `Pandas` `DataFrame` objects: +- `x_data` +- `x_train` + +The last is a `NumPy` `ndarray`: +- `y` + +## Methods + +- `@classmethod def load_from_file(import_path: str, sheet_name: str = 'Sheet1')`: creates an `LLDFDataModel` instance from an Excel file +- `export_to_file(export_path: str, sheet_name: str = 'Sheet1')`: exports the `LLDFDataModel` contents to an Excel table \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/lldf/lldfsettings.md b/docs/versioned_docs/version-2.1.0/lldf/lldfsettings.md new file mode 100644 index 0000000..30f985f --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/lldf/lldfsettings.md @@ -0,0 +1,25 @@ +--- +sidebar_position: 2 +--- + +# LLDFSettings class + +Holds the settings for the [`LLDF`](./lldf-class.md) object. + +## Syntax + +```python +LLDFSettings(output: GraphMode) +``` + +## Fields and constructor parameters +- `output`: toggles graph output. Defaults to [`GraphMode.NONE`](../utils/graphmode.md). + +## Example + +```python +from chemfusekit.lldf import LLDFSettings + +# Initialize the settings for low-level data fusion +lldf_settings = LLDFSettings(output=GraphMode.TEXT) +``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/lldf/table.md b/docs/versioned_docs/version-2.1.0/lldf/table.md new file mode 100644 index 0000000..782da64 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/lldf/table.md @@ -0,0 +1,39 @@ +--- +sidebar_position: 4 +--- + +# Table class + +Holds the information for a single table to import. + +The [`LLDF`](./lldf-class.md) object takes a list of `Table` as a parameter. + +## Syntax + +```python +Table( + file_path: str + sheet_name: str + preprocessing: str +) +``` + +## Fields and constructor parameters + +- `file_path`: a `str` containing the path to the Excel datasheet +- `sheet_name`: a `str` containing the name of the sheet to select within the Excel file +- `preprocessing`: a `str` with the name of the preprocessing to be applied to the table. + Available options: `snv` (normalization), `savgol` (Savitski-Golay smoothing), `savgol+snv` (both), `none` (no processing). + +## Example + +```python +from chemfusekit.lldf import Table + +# Create a table +table1 = Table( + file_path='tests/qepas.xlsx', + sheet_name='Sheet1', + preprocessing='snv' # normalization preprocessing; other options: savgol, both or none +) +``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/lr/_category_.json b/docs/versioned_docs/version-2.1.0/lr/_category_.json new file mode 100644 index 0000000..c9a9071 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/lr/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "LR module", + "position": 6, + "link": { + "type": "generated-index", + "description": "A module for logistic regression." + } +} diff --git a/docs/versioned_docs/version-2.1.0/lr/lr.md b/docs/versioned_docs/version-2.1.0/lr/lr.md new file mode 100644 index 0000000..f7ded6f --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/lr/lr.md @@ -0,0 +1,51 @@ +--- +sidebar_position: 1 +--- + +# LR class + +A class to store the data, methods and artifacts for _Logistic Regression_. + +## Syntax + +```python +LR(settings: LRSettings, array_scores: np.ndarray, y: np.ndarray): +``` + +## Constructor parameters + +- `settings`: object of type [`LRSettings`](./lrsettings.md). Contains the settings for + the `LR` object. +- `array_scores`: `np.ndarray`, product of [`PCA` analysis](../pca/). +- `y`: `np.ndarray`, product of [`PCA` analysis](../pca/). + +## Fields + +- `settings`: object of type [`LRSettings`](./lrsettings.md). Contains the settings for + the `LR` object. +- `array_scores`: product of [`PCA` analysis](../pca/). +- `y`: product of [`PCA` analysis](../pca/). +- `model`: A `LR` model from `scikit-learn`. Defaults to `None`. + +## Methods + +- `lr(self)`: performs Logistic Regression. +- `predict(self, x_sample)`: performs LR-based classification on input data. + - *raises*: + - `RuntimeError("The LR model is not trained yet!")` if prediction is + started without training the model first; + - `raise TypeError("X data for LDA prediction must be non-empty.")` if + the data passed as argument is null. + +## Example + +```python +from chemfusekit.lr import LR + +# Initialize and train the LR class +lr = LR(settings, array_scores, y) +lr.lr() + +# Perform prediction +lr.predict(x_sample) +``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/lr/lrsettings.md b/docs/versioned_docs/version-2.1.0/lr/lrsettings.md new file mode 100644 index 0000000..847c002 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/lr/lrsettings.md @@ -0,0 +1,44 @@ +--- +sidebar_position: 2 +--- + +# LRSettings class + +Holds the settings for the [`LR`](./lr.md) object. + +Inherits from [`BaseSettings`](../base/basesettings.md). + +## Syntax + +```python +LRSettings(algorithm: str, output: GraphMode, test_split: bool) +``` + +## Fields and constructor parameters + +- `algorithm`: the amount of components to be used in the LDA model. Defaults to + `liblinear`. Other available options: + - `lbfgs` + - `newton-cg` + - `newton-cholesky` + - `sag` + - `saga` +- `output`: toggles graph output. Defaults to [`GraphMode.NONE`](../utils/graphmode.md). +- `test_split`: toggles split testing. Defaults to `False`. + +The constructor raises: +- `ValueError("This algorithm does not exist.")` if the selected `algorithm` + is not a valid option. +- `Warning("You selected test_split but it won't run because you disabled the output.")` if split tests are run with `output` disabled + +## Example + +```python +from chemfusekit.lr import LRSettings, GraphMode + +settings = LRSettings( + algorithm='newton-cg', + output=GraphMode.GRAPHIC, # graphs will be printed + test_split=True # split testing is enabled +) +``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/pca/_category_.json b/docs/versioned_docs/version-2.1.0/pca/_category_.json new file mode 100644 index 0000000..266d36f --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/pca/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "PCA module", + "position": 3, + "link": { + "type": "generated-index", + "description": "A module for principal component analysis." + } +} diff --git a/docs/versioned_docs/version-2.1.0/pca/pca.md b/docs/versioned_docs/version-2.1.0/pca/pca.md new file mode 100644 index 0000000..1a07127 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/pca/pca.md @@ -0,0 +1,46 @@ +--- +sidebar_position: 1 +--- + +# PCA class + +A class to store the data, methods and artifacts for _Principal Component Analysis_. + +## Syntax + +```python +PCA(settings: PCASettings, data: BaseDataModel) +``` + +## Constructor parameters + +- `settings`: object of type [`PCASettings`](./pcasettings.md). Contains the settings for + the `PCA` object. +- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md). Contains the data to be analyzed. + +## Fields + +- `fused_data`: object of type [`LLDF`](../lldf/lldf-class.md). Contains the data to be analyzed. +- `components`: Number of components for the PCA analysis. Defaults to 0. +- `pca_model`: A `PCA` model from `scikit-learn`. Defaults to `None`. +- `settings`: object of type [`PCASettings`](./pcasettings.md). Contains the settings for + the `PCA` object. + +## Methods + +- `pca(self)`: performs Principal Component Analysis +- `pca_stats(self)` produces PCA-related statistics and graphs. + +## Example + +```python +from chemfusekit.pca import PCA + +# Initialize and run the PCA class +pca = PCA(lldf.fused_data, pca_settings) +pca.pca() + +# Print the number of components and the statistics +print(pca.components) +pca.pca_stats() +``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/pca/pcadatamodel.md b/docs/versioned_docs/version-2.1.0/pca/pcadatamodel.md new file mode 100644 index 0000000..f76b156 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/pca/pcadatamodel.md @@ -0,0 +1,35 @@ +--- +sidebar_position: 3 +--- + +# PCADataModel class + +This class models the output data from the [`PCA`](./pca.md) operation. + +It inherits from the [`BaseDataModel`](../base/basedatamodel.md). + +## Syntax + +```python +PCAModel(x_data: pd.DataFrame, x_train: pd.DataFrame, y: np.ndarray, array_scores: np.ndarray, components: int) +``` + +## Fields and constructor parameters + +The first two are `Pandas` `DataFrame` objects: +- `x_data` +- `x_train` + +The second two are `NumPy` `ndarray`s: +- `y` +- `array_scores` + +The last is an integer: +- `components` + +## Methods + +Both methods are inherited from [`BaseDataModel`](../base/basedatamodel.md): + +- `@classmethod def load_from_file(import_path: str, sheet_name: str = 'Sheet1')`: creates a `BaseDataModel` instance from an Excel file +- `export_to_file(export_path: str, sheet_name: str = 'Sheet1')`: exports the `BaseDataModel` contents to an Excel table diff --git a/docs/versioned_docs/version-2.1.0/pca/pcasettings.md b/docs/versioned_docs/version-2.1.0/pca/pcasettings.md new file mode 100644 index 0000000..ca533ea --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/pca/pcasettings.md @@ -0,0 +1,41 @@ +--- +sidebar_position: 2 +--- + +# PCASettings class + +Holds the settings for the [`PCA`](./pca.md) object. + +## Syntax + +```python +PCASettings( + target_variance: float, + confidence_level: float, + initial_components: int, + output: GraphMode +) +``` + +## Fields and constructor parameters + +- `target_variance`: the minimum cumulative explained variance to reach in the analysis. + Defaults to 0.95. +- `confidence_level`: the confidence level for statistical tests. Defaults to 0.05. +- `initial_components`: the minimum amount of components to be used in the PCA model. + Defaults to 10. +- `output`: toggles graph output. Defaults to [`GraphMode.NONE`](../utils/graphmode.md). + +## Example + +```python +from chemfusekit.pca import PCASettings, GraphMode + +# Initialize the settings for Principal Component Analysis +pca_settings = PCASettings( + target_variance=0.99, + confidence_level=0.05, + initial_components=10, + output=GraphMode.GRAPHIC # graphs will be printed +) +``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/plsda/_category_.json b/docs/versioned_docs/version-2.1.0/plsda/_category_.json new file mode 100644 index 0000000..aaf0cbf --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/plsda/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "PLSDA module", + "position": 7, + "link": { + "type": "generated-index", + "description": "A module for partial least squares discriminant analysis." + } +} diff --git a/docs/versioned_docs/version-2.1.0/plsda/plsda.md b/docs/versioned_docs/version-2.1.0/plsda/plsda.md new file mode 100644 index 0000000..3287fd6 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/plsda/plsda.md @@ -0,0 +1,47 @@ +--- +sidebar_position: 1 +--- + +# PLSDA class + +A class to store the data, methods and artifacts for _Partial Least Squares Discriminant Analysis_. + +## Syntax + +```python +PLSDA(settings: PLSDASettings, data: BaseDataModel) +``` + +## Constructor parameters + +- `settings`: object of type [`PLSDASettings`](plsdasettings.md). Contains the settings for + the `PLSDA` object. +- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md). Contains the data to be analyzed. + +## Fields + +- `settings`: object of type [`PLSDASettings`](./plsdasettings.md). Contains the settings for + the `PLSDA` object. +- `fused_data`: onject of type ['LLDFModel`](../lldf/lldfmodel.md). Contains the + artifacts from the data fusion process. +- `model`: a `PLSRegression` model from `scikit-learn`. Defaults to `None`. + +## Methods + +- `plsda(self)`: trains the Partial Least Squares Discriminant Analysis model. +- `predict(self, x_data)`: performs PLSDA prediction once the model is trained. + - *raises*: + - `RuntimeError("The PLSDA model is not trained yet!")` if the `PLSDA` model hasn't been trained yet + +## Example + +```python +from chemfusekit.knn import PLSDA + +# Initialize and run the LDA class +plsda = PLSDA(settings, lldf.fused_data) +plsda.plsda() + +# Run predictions +plsda.predict(x_data) +``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/plsda/plsdasettings.md b/docs/versioned_docs/version-2.1.0/plsda/plsdasettings.md new file mode 100644 index 0000000..7b547a7 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/plsda/plsdasettings.md @@ -0,0 +1,38 @@ +--- +sidebar_position: 2 +--- + +# PLSDASettings class + +Holds the settings for the [`PLSDA`](./plsda.md) object. + +Inherits from [`BaseSettings`](../base/basesettings.md). + +## Syntax + +```python +PLSDASettings(n_components: int, output: GraphMode, test_split: bool) +``` + +## Fields and constructor parameters + +- `n_components`: number of components for the PLSDA analysis. Defaults to 3. +- `output`: toggles graph output. Defaults to [`GraphMode.NONE`](../utils/graphmode.md). +- `test_split`: toggles the training split test phase. Defaults to `False`. Requires `output` to not be set to `GraphMode.NONE` to work. + +The constructor raises: +- `ValueError("Invalid n_components number: should be a positive integer.")` if the number of components is below 1. +- `Warning("You selected test_split but it won't run because you disabled the output.")` if `test_split` is run with `output` set to `GraphOutput.NONE` (split tests only produce graphical output, and are useless when run with disabled output). + +## Example + +```python +from chemfusekit.plsda import PLSDASettings, GraphMode + +# Initialize the settings for Partial Least Squares Discriminant Analysis +plsda_settings = PLSDASettings( + n_components=5, + output=GraphMode.GRAPHIC, # graphs will be printed + test_split=False # no split testing +) +``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/svm/_category_.json b/docs/versioned_docs/version-2.1.0/svm/_category_.json new file mode 100644 index 0000000..f0ebda8 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/svm/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "SVM module", + "position": 5, + "link": { + "type": "generated-index", + "description": "A module for support vector machine analysis." + } +} diff --git a/docs/versioned_docs/version-2.1.0/svm/svm.md b/docs/versioned_docs/version-2.1.0/svm/svm.md new file mode 100644 index 0000000..b984ff7 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/svm/svm.md @@ -0,0 +1,49 @@ +--- +sidebar_position: 1 +--- + +# SVM class + +A class to store the data, methods and artifacts for _Support Vector Machine Analysis_. + +## Syntax + +```python +SVM(settings: SVMSettings, data: BaseDataModel) +``` + +## Constructor parameters + +- `settings`: object of type [`SVMSettings`](./svmsettings.md). Contains the settings for + the `SVM` object. +- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md). Contains the data to be analyzed. + +The constructor raises: +- `ValueError("Fused data input cannot be empty.")` if the input data is null +- `valueError("Settings cannot be empty.")` if the settings are null + +## Fields + +- `fused_data`: object of type [`LLDFModel`](../lldf/lldfmodel.md). Contains the data to be analyzed. +- `settings`: object of type [`SVMSettings`](./svmsettings.md). Contains the settings for + the `PCA` object. +- `pca_model`: an `SVM` model from `scikit-learn`. Defaults to `None`. + +## Methods + +- `svm(self)`: performs Support Vector Machine analysis. + - *raises*: + - `ValueError(SVM: this type of kernel does not exist.")` if the kernel type is invalid +- `predict(self, x_data)`: performs classification based on SVM + - *raises*: + - `RuntimeError("The model hasn't been trained yet!")` if the model is null + +## Example + +```python +from chemfusekit.svm import SVM + +# Initialize and run the SVM class +svm = LDA(lldf.fused_data, settings) +svm.svm() +``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/svm/svmsettings.md b/docs/versioned_docs/version-2.1.0/svm/svmsettings.md new file mode 100644 index 0000000..7ebf852 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/svm/svmsettings.md @@ -0,0 +1,43 @@ +--- +sidebar_position: 2 +--- + +# SVMSettings class + +Holds the settings for the [`SVM`](./svm.md) object. + +Inherits from [`BaseSettings`](../base/basesettings.md). + +## Syntax + +```python +SVMSettings(kernel: str, output: GraphMode, test_split: bool) +``` + +## Fields and constructor parameters + +- `kernel`: the type of kernel to use in the SVM analysis. Available options: + - `linear` + - `poly` + - `gaussian` + - `sigmoid` + Defaults to `linear`. +- `output`: toggles graph output. Defaults to [`GraphMode.NONE`](../utils/graphmode.md). +- `test_split`: toggles split testing. Defaults to `False`. + +The constructor raises: +- `ValueError("Invalid type: must be linear, poly, gaussian or sigmoid")` if the selected kernel is not one of the available +- `Warning("You selected test_split but it won't run because you disabled the output.")` if split tests are run with `output` disabled + +## Example + +```python +from chemfusekit.svm import SVMSettings, GraphMode + +# Initialize the settings for Support Vector Machine +svm_settings = SVMSettings( + type='linear', + output=GraphMode.GRAPHIC, # graphs will be printed + test_split=True # split testing is enabled +) +``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/tutorial.md b/docs/versioned_docs/version-2.1.0/tutorial.md new file mode 100644 index 0000000..7945b21 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/tutorial.md @@ -0,0 +1,109 @@ +--- +sidebar_position: 1 +--- + +# Tutorial + +Let's discover **Data Fusion**. + +As a simple example, we will train an LDA model and use it for classification. + +First of all, let's install the package: +```bash +pip install chemfusekit +``` + +## First step: data fusion + +We will load the `LLDFSettings` with the paths to some Excel datasheets, containing +respectively the data from a QEPAS spectrometer and a GC chromatographer. + +We will pick normalization as the preprocessing technique for the data. + +The `LLDF` class will take these settings and perform low-level data fusion on the +two Excel tables we picked. + +```python +from chemfusekit.lldf import LLDFSettings, LLDF + +# Initialize the settings for low-level data fusion +lldf_settings = LLDFSettings( + qepas_path='tests/qepas.xlsx', + qepas_sheet='Sheet1', + rt_path='tests/rt.xlsx', + rt_sheet='Sheet1', + preprocessing='snv' # normalization preprocessing; other options: savgol or both +) + +# Initialize and run low-level data fusion +lldf = LLDF(lldf_settings) +lldf.lldf() +``` + +Optionally, we can export the fused data into a new, single Excel datasheet: + +```python +# (optional) export the LLDF data to an Excel file +lldf.export_data('output_file.xlsx') +``` + +## Second step: PCA + +A run of Principal Component Analysis (`PCA`) will help us pick the right number +of components for the subsequent `LDA` analysis step. + +As in the previous case, we will set it up with the help of the `PCASettings` class. + +```python +from chemfusekit.pca import PCASettings, PCA + +# Initialize the settings for Principal Component Analysis +pca_settings = PCASettings( + target_variance=0.99, # the minimum acceptable level of cumulative explained covariance + confidence_level=0.05, # the desired level of confidence + initial_components=10, # the initial amount of components for the iterative analysis + output=GraphMode.GRAPHIC # graphs will be printed +) + +# Initialize and run the PCA class +pca = PCA(lldf.fused_data, pca_settings) +pca.pca() + +# Print the number of components and the statistics +print(pca.components) +pca.pca_stats() +``` + +## Third step: LDA training + +In this step we will set up the `LDASettings` and then run the `LDA` analysis with one less +component than what we figured out from the `PCA` analysis of the previous step. + +```python +from chemfusekit.lda import LDASettings, LDA + +settings = LDASettings( + components=(pca.components - 1), # one less component than the number determined by PCA + output=GraphMode.GRAPHIC, # graphs will be printed + test_split=True # Split testing is enabled +) + +# Initialize and run the LDA class +lda = LDA(lldf.fused_data, settings) +lda.lda() +``` + +## Fourth step: prediction + +We will pick a random sample from the dataset and see whether the trained `LDA` model +can identify it correctly. + +```python +# Let's pick a random sample from the dataset and see if it gets recognized correctly: +x_data_sample = lldf.fused_data.x_train.iloc[119] # should be DMMP +x_data_sample = x_data_sample.iloc[1:].to_frame().transpose() + +# Let's run the prediction: +predictions = lda.predict(x_data_sample) +print(predictions) +``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/utils/_category_.json b/docs/versioned_docs/version-2.1.0/utils/_category_.json new file mode 100644 index 0000000..322950e --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/utils/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Utilities module", + "position": 8, + "link": { + "type": "generated-index", + "description": "A module containing helper functions." + } +} diff --git a/docs/versioned_docs/version-2.1.0/utils/graphmode.md b/docs/versioned_docs/version-2.1.0/utils/graphmode.md new file mode 100644 index 0000000..bdfaa0a --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/utils/graphmode.md @@ -0,0 +1,11 @@ +--- +sidebar-position: 1 +--- + +# GraphMode enum + +The `GraphMode` enum defines three possible values that the output of other classes can take: + +- `GRAPHIC`: graphs, tables and stats will be rendered with `Plotly`, `MatPlotLib` or `Seaborn`. Best used with `Jupyter Notebook`; +- `TEXT`: output will be rendered as plain text. The best option for offline batch processing; +- `NONE`: output will be suppressed completely. \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/utils/graphoutput.md b/docs/versioned_docs/version-2.1.0/utils/graphoutput.md new file mode 100644 index 0000000..362bd7f --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/utils/graphoutput.md @@ -0,0 +1,20 @@ +--- +sidebar-position: 2 +--- + +# `graph_output` function + +A (partially) reusable graphing function shared by different classes. Not meant for direct usage. + +## Syntax + +```python +graph_output(scores, model, name: str, mode: GraphMode) +``` + +## Parameters + +- `scores`: the scores that are output by the model fitting function +- `model`: a `scikit-learn` classification model +- `name`: a `str` representing the name of the analysis technique +- `mode`: a [`GraphMode`](./graphmode.md) enum that acts as an output selector diff --git a/docs/versioned_docs/version-2.1.0/utils/printconfusionmatrix.md b/docs/versioned_docs/version-2.1.0/utils/printconfusionmatrix.md new file mode 100644 index 0000000..826c2f2 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/utils/printconfusionmatrix.md @@ -0,0 +1,19 @@ +--- +sidebar-position: 5 +--- + +# `print_confuson_matrix` function + +A multimodal confusion matrix and classification report printer utility. Not meant for direct usage. + +## Syntax + +```python +print_confusion_matrix(y1, y2, title: str, mode: GraphMode) +``` + +## Parameters + +- `y1` and `y2`: the true and predicted values +- `title`: a `str` representing the title for the confusion matrix and classification report +- `mode`: a [`GraphMode`](./graphmode.md) enum that acts as an output selector \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/utils/printtable.md b/docs/versioned_docs/version-2.1.0/utils/printtable.md new file mode 100644 index 0000000..a07206e --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/utils/printtable.md @@ -0,0 +1,20 @@ +--- +sidebar-position: 3 +--- + +# `print_table` function + +A multimodal table printing utility. It can output tables as `Plotly` plots or as plain text. Not meant for direct usage. + +## Syntax + +```python +print_table(header_values, cell_values, title: str, mode: GraphMode) +``` + +## Parameters + +- `header_values`: the column titles +- `cell_values`: a row array of column arrays +- `title`: a `str` containing the title for the table +- `mode`: a [`GraphMode`](./graphmode.md) enum that acts as an output selector \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/utils/runsplittests.md b/docs/versioned_docs/version-2.1.0/utils/runsplittests.md new file mode 100644 index 0000000..410eb65 --- /dev/null +++ b/docs/versioned_docs/version-2.1.0/utils/runsplittests.md @@ -0,0 +1,20 @@ +--- +sidebar-position: 4 +--- + +# `run_split_tests` function + +A reusable function for split testing a generic model. Not meant for direct usage. + +## Syntax + +```python +run_split_test(x, y, model, extended=False, mode: GraphMode) +``` + +## Parameters + +- `x` and `y`: the regressor and target arrays +- `model`: a `scikit-learn` classifier +- `extended`: a `bool` that selects whether a longer split analysis will be carried out +- `mode`: a [`GraphMode`](./graphmode.md) enum that acts as an output selector diff --git a/docs/versioned_sidebars/version-2.1.0-sidebars.json b/docs/versioned_sidebars/version-2.1.0-sidebars.json new file mode 100644 index 0000000..caea0c0 --- /dev/null +++ b/docs/versioned_sidebars/version-2.1.0-sidebars.json @@ -0,0 +1,8 @@ +{ + "tutorialSidebar": [ + { + "type": "autogenerated", + "dirName": "." + } + ] +} diff --git a/docs/versions.json b/docs/versions.json index 77b48ba..b619bc6 100644 --- a/docs/versions.json +++ b/docs/versions.json @@ -1,4 +1,5 @@ [ + "2.1.0", "2.0.0", "1.2.0", "1.1.3"