From a0eb4e6110dc25a5a8a4e6e72ff7ba02c05f6a14 Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Tue, 4 Jun 2024 20:45:25 +0200 Subject: [PATCH] docs: fix wrong version number --- .../version-2.0.0/structure.md | 14 ++- .../version-2.1.0/case-study-classifier.md | 9 -- .../version-2.1.0/case-study-hybrid.md | 9 -- .../version-2.1.0/case-study-realtime.md | 9 -- .../version-2.1.0/introduction.md | 26 ----- .../version-2.1.0/structure.md | 44 ------- .../version-2.1.0-sidebars.json | 8 -- docs/cookbook_versions.json | 1 - .../version-2.0.0/base/baseclassifier.md | 10 +- .../version-2.0.0/lldf/index.mdx | 4 +- .../version-2.0.0/lldf/lldfmodel.md | 3 +- .../pca/pcadatamodel.md | 0 .../version-2.1.0/base/_category_.json | 8 -- .../version-2.1.0/base/baseclassifier.md | 33 ------ .../version-2.1.0/base/basedatamodel.md | 21 ---- .../version-2.1.0/base/basesettings.md | 21 ---- .../version-2.1.0/complete-workflow.md | 31 ----- .../version-2.1.0/knn/_category_.json | 8 -- docs/versioned_docs/version-2.1.0/knn/knn.md | 47 -------- .../version-2.1.0/knn/knnsettings.md | 64 ---------- .../version-2.1.0/lda/_category_.json | 8 -- docs/versioned_docs/version-2.1.0/lda/lda.md | 48 -------- .../version-2.1.0/lda/ldasettings.md | 39 ------- .../version-2.1.0/lldf/_category_.json | 8 -- .../version-2.1.0/lldf/index.mdx | 47 -------- .../version-2.1.0/lldf/lldf-class.md | 67 ----------- .../version-2.1.0/lldf/lldfmodel.md | 25 ---- .../version-2.1.0/lldf/lldfsettings.md | 25 ---- .../version-2.1.0/lldf/table.md | 39 ------- .../version-2.1.0/lr/_category_.json | 8 -- docs/versioned_docs/version-2.1.0/lr/lr.md | 51 -------- .../version-2.1.0/lr/lrsettings.md | 44 ------- .../version-2.1.0/pca/_category_.json | 8 -- docs/versioned_docs/version-2.1.0/pca/pca.md | 46 -------- .../version-2.1.0/pca/pcasettings.md | 41 ------- .../version-2.1.0/plsda/_category_.json | 8 -- .../version-2.1.0/plsda/plsda.md | 47 -------- .../version-2.1.0/plsda/plsdasettings.md | 38 ------ .../version-2.1.0/svm/_category_.json | 8 -- docs/versioned_docs/version-2.1.0/svm/svm.md | 49 -------- .../version-2.1.0/svm/svmsettings.md | 43 ------- docs/versioned_docs/version-2.1.0/tutorial.md | 109 ------------------ .../version-2.1.0/utils/_category_.json | 8 -- .../version-2.1.0/utils/graphmode.md | 11 -- .../version-2.1.0/utils/graphoutput.md | 20 ---- .../utils/printconfusionmatrix.md | 19 --- .../version-2.1.0/utils/printtable.md | 20 ---- .../version-2.1.0/utils/runsplittests.md | 20 ---- .../version-2.1.0-sidebars.json | 8 -- docs/versions.json | 1 - 50 files changed, 21 insertions(+), 1262 deletions(-) delete mode 100644 docs/cookbook_versioned_docs/version-2.1.0/case-study-classifier.md delete mode 100644 docs/cookbook_versioned_docs/version-2.1.0/case-study-hybrid.md delete mode 100644 docs/cookbook_versioned_docs/version-2.1.0/case-study-realtime.md delete mode 100644 docs/cookbook_versioned_docs/version-2.1.0/introduction.md delete mode 100644 docs/cookbook_versioned_docs/version-2.1.0/structure.md delete mode 100644 docs/cookbook_versioned_sidebars/version-2.1.0-sidebars.json rename docs/versioned_docs/{version-2.1.0 => version-2.0.0}/pca/pcadatamodel.md (100%) delete mode 100644 docs/versioned_docs/version-2.1.0/base/_category_.json delete mode 100644 docs/versioned_docs/version-2.1.0/base/baseclassifier.md delete mode 100644 docs/versioned_docs/version-2.1.0/base/basedatamodel.md delete mode 100644 docs/versioned_docs/version-2.1.0/base/basesettings.md delete mode 100644 docs/versioned_docs/version-2.1.0/complete-workflow.md delete mode 100644 docs/versioned_docs/version-2.1.0/knn/_category_.json delete mode 100644 docs/versioned_docs/version-2.1.0/knn/knn.md delete mode 100644 docs/versioned_docs/version-2.1.0/knn/knnsettings.md delete mode 100644 docs/versioned_docs/version-2.1.0/lda/_category_.json delete mode 100644 docs/versioned_docs/version-2.1.0/lda/lda.md delete mode 100644 docs/versioned_docs/version-2.1.0/lda/ldasettings.md delete mode 100644 docs/versioned_docs/version-2.1.0/lldf/_category_.json delete mode 100644 docs/versioned_docs/version-2.1.0/lldf/index.mdx delete mode 100644 docs/versioned_docs/version-2.1.0/lldf/lldf-class.md delete mode 100644 docs/versioned_docs/version-2.1.0/lldf/lldfmodel.md delete mode 100644 docs/versioned_docs/version-2.1.0/lldf/lldfsettings.md delete mode 100644 docs/versioned_docs/version-2.1.0/lldf/table.md delete mode 100644 docs/versioned_docs/version-2.1.0/lr/_category_.json delete mode 100644 docs/versioned_docs/version-2.1.0/lr/lr.md delete mode 100644 docs/versioned_docs/version-2.1.0/lr/lrsettings.md delete mode 100644 docs/versioned_docs/version-2.1.0/pca/_category_.json delete mode 100644 docs/versioned_docs/version-2.1.0/pca/pca.md delete mode 100644 docs/versioned_docs/version-2.1.0/pca/pcasettings.md delete mode 100644 docs/versioned_docs/version-2.1.0/plsda/_category_.json delete mode 100644 docs/versioned_docs/version-2.1.0/plsda/plsda.md delete mode 100644 docs/versioned_docs/version-2.1.0/plsda/plsdasettings.md delete mode 100644 docs/versioned_docs/version-2.1.0/svm/_category_.json delete mode 100644 docs/versioned_docs/version-2.1.0/svm/svm.md delete mode 100644 docs/versioned_docs/version-2.1.0/svm/svmsettings.md delete mode 100644 docs/versioned_docs/version-2.1.0/tutorial.md delete mode 100644 docs/versioned_docs/version-2.1.0/utils/_category_.json delete mode 100644 docs/versioned_docs/version-2.1.0/utils/graphmode.md delete mode 100644 docs/versioned_docs/version-2.1.0/utils/graphoutput.md delete mode 100644 docs/versioned_docs/version-2.1.0/utils/printconfusionmatrix.md delete mode 100644 docs/versioned_docs/version-2.1.0/utils/printtable.md delete mode 100644 docs/versioned_docs/version-2.1.0/utils/runsplittests.md delete mode 100644 docs/versioned_sidebars/version-2.1.0-sidebars.json diff --git a/docs/cookbook_versioned_docs/version-2.0.0/structure.md b/docs/cookbook_versioned_docs/version-2.0.0/structure.md index e9b28c0..3850237 100644 --- a/docs/cookbook_versioned_docs/version-2.0.0/structure.md +++ b/docs/cookbook_versioned_docs/version-2.0.0/structure.md @@ -86,7 +86,7 @@ classDiagram \ \ -The classifiers themselves, except `LR`, all inherit from a base class called [`BaseClassifier`](/docs/base/baseclassifier) in the `base` module: +The classifiers themselves all inherit from a base class called [`BaseClassifier`](/docs/base/baseclassifier) in the `base` module: ```mermaid classDiagram @@ -108,6 +108,10 @@ classDiagram ... } + class LR { + ... + } + class PLSDA { ... } @@ -141,7 +145,13 @@ classDiagram __init__(...) } + class PCADataModel { + +array_scores: ndarray + __init__(..., array_scores) + } + BaseDataModel *-- LLDFDataModel + BaseDataModel *-- PCADataModel ``` -This allows all the classifiers to use the `LLDF` data, or any other type of data as long as it follows the `BaseDataModel` template. \ No newline at end of file +This allows all the classifiers to use the `LLDF` data, dimension-reduced `PCA` data, or any other type of data as long as it follows the `BaseDataModel` template. \ No newline at end of file diff --git a/docs/cookbook_versioned_docs/version-2.1.0/case-study-classifier.md b/docs/cookbook_versioned_docs/version-2.1.0/case-study-classifier.md deleted file mode 100644 index 8b1d3d3..0000000 --- a/docs/cookbook_versioned_docs/version-2.1.0/case-study-classifier.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -sidebar_position: 3 ---- - -# Case study: training a classifier from lab data - -:::note -This case study is still **under construction**. -::: \ No newline at end of file diff --git a/docs/cookbook_versioned_docs/version-2.1.0/case-study-hybrid.md b/docs/cookbook_versioned_docs/version-2.1.0/case-study-hybrid.md deleted file mode 100644 index a7c4c5d..0000000 --- a/docs/cookbook_versioned_docs/version-2.1.0/case-study-hybrid.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -sidebar_position: 4 ---- - -# Case study: hybrid workflow - -:::note -This case study is still **under construction**. -::: \ No newline at end of file diff --git a/docs/cookbook_versioned_docs/version-2.1.0/case-study-realtime.md b/docs/cookbook_versioned_docs/version-2.1.0/case-study-realtime.md deleted file mode 100644 index 8725eac..0000000 --- a/docs/cookbook_versioned_docs/version-2.1.0/case-study-realtime.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -sidebar_position: 5 ---- - -# Case study: real-time data classification - -:::note -This case study is still **under construction**. -::: \ No newline at end of file diff --git a/docs/cookbook_versioned_docs/version-2.1.0/introduction.md b/docs/cookbook_versioned_docs/version-2.1.0/introduction.md deleted file mode 100644 index 3a4d2ef..0000000 --- a/docs/cookbook_versioned_docs/version-2.1.0/introduction.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -sidebar_position: 1 ---- - -# The ChemFuseKit Cookbook: an introduction - -*What is a cookbook, exactly?* - -> A cookbook is a comprehensive collection of recipes that guide users through -the process of learning and mastering the use of a specific library or -programming technique, by providing step-by-step instructions, explanations and -examples. - -## What you'll learn - -In this cookbook you will learn the basic principles of operation of `ChemFuseKit` through practical examples and case studies. You will be shown that all modules follow a basic structure, and once you've learned it for one module, you will be able to reapply that knowledge for all modules. - -You will be shown how to use the library on its own, and also how to use it as a part of a bigger pipeline. - -## Cookbook sectioning - -Here we go: - -- first of all, you will be shown the basic principles and structure; -- then, you will be shown three case studies; -- finally, you'll receive instructions on how to modify and expand this library for your own purposes. \ No newline at end of file diff --git a/docs/cookbook_versioned_docs/version-2.1.0/structure.md b/docs/cookbook_versioned_docs/version-2.1.0/structure.md deleted file mode 100644 index a1ec5d5..0000000 --- a/docs/cookbook_versioned_docs/version-2.1.0/structure.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -sidebar_position: 2 ---- - -# Project structure - -In this cookbook page, you will be shown how the project is structured, and the purpose of each module. - -## Project Hierarchy - -``` -chemfusekit - | - |_lda - | |_LDASettings - | |_LDA - | - |_lr - | |_LRSettings - | |_LR - | - |_plsda - | |_PLSDASettings - | |_PLSDA - | - |_pca - | |_PCASettings - | |_PCA - | - |_lldf - | |_LLDFSettings - | |_LLDF - | |_LLDFModel - | - |_svm - | |_SVMSettings - | |_SVM - | - |_knn - |_KNNSettings - |_KNN -``` - -As you can see, each module contains a class with the same name of the module, and a settings class. That's because this project tries to be as modular and as regular as possible, for clarity and interoperability. \ No newline at end of file diff --git a/docs/cookbook_versioned_sidebars/version-2.1.0-sidebars.json b/docs/cookbook_versioned_sidebars/version-2.1.0-sidebars.json deleted file mode 100644 index caea0c0..0000000 --- a/docs/cookbook_versioned_sidebars/version-2.1.0-sidebars.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "tutorialSidebar": [ - { - "type": "autogenerated", - "dirName": "." - } - ] -} diff --git a/docs/cookbook_versions.json b/docs/cookbook_versions.json index dc236fe..3aea034 100644 --- a/docs/cookbook_versions.json +++ b/docs/cookbook_versions.json @@ -1,4 +1,3 @@ [ - "2.1.0", "2.0.0" ] diff --git a/docs/versioned_docs/version-2.0.0/base/baseclassifier.md b/docs/versioned_docs/version-2.0.0/base/baseclassifier.md index d633359..9c80bcc 100644 --- a/docs/versioned_docs/version-2.0.0/base/baseclassifier.md +++ b/docs/versioned_docs/version-2.0.0/base/baseclassifier.md @@ -2,7 +2,7 @@ sidebar_position: 1 --- -# KNN class +# BaseClassifier class A base class from which all classifiers inherit. @@ -21,13 +21,13 @@ BaseClassifier(settings: BaseSettings, data: BaseDataModel) ## Fields - `settings`: object of type [`KNNSettings`](/tesi/docs/knn/knnsettings). Contains the settings for - the `KNN` object. + the `BaseClassifier` object. - `data`: object of type [`BaseDataModel`](../base/basedatamodel.md). Contains the data to be analyzed. - `model`: a `sklearn` model from `scikit-learn`. Defaults to `None`. ## Methods -- `knn(self)`: trains the k-Neighbors Analysis model -- `predict(self, x_data)`: performs LDA prediction once the model is trained. +- `import_model(import_path: str)`: loads a model from file +- `export_model(export_path: str)`: exports a model to file - *raises*: - - `RuntimeError("The kNN model is not trained yet!")` if the `KNN` model hasn't been trained yet + - `RuntimeError("You haven't trained the model yet! You cannot export it now.")` when trying to export an untrained model diff --git a/docs/versioned_docs/version-2.0.0/lldf/index.mdx b/docs/versioned_docs/version-2.0.0/lldf/index.mdx index 4e0508d..01c1d16 100644 --- a/docs/versioned_docs/version-2.0.0/lldf/index.mdx +++ b/docs/versioned_docs/version-2.0.0/lldf/index.mdx @@ -27,14 +27,14 @@ classDiagram class LLDF { +LLDFSettings settings +Table[] tables - +LLDFModel | None fused_data + +LLDFDataModel | None fused_data lldf() -_snv() +export_data(export_path: str) __init__(settings, tables[]) } - class LLDFModel { + class LLDFDataModel { +x_data: pd.DataFrame +x_train: pd.DataFrame +y: np.ndarray diff --git a/docs/versioned_docs/version-2.0.0/lldf/lldfmodel.md b/docs/versioned_docs/version-2.0.0/lldf/lldfmodel.md index 91bf654..7232264 100644 --- a/docs/versioned_docs/version-2.0.0/lldf/lldfmodel.md +++ b/docs/versioned_docs/version-2.0.0/lldf/lldfmodel.md @@ -11,7 +11,7 @@ It inherits from the [`BaseDataModel`](../base/basedatamodel.md). ## Syntax ```python -LLDFModel(x_data: pd.DataFrame, x_train: pd.DataFrame, y: pd.DataFrame) +LLDFModel(x_data: pd.DataFrame, x_train: pd.DataFrame, y: np.ndarray) ``` ## Fields and constructor parameters @@ -19,6 +19,7 @@ LLDFModel(x_data: pd.DataFrame, x_train: pd.DataFrame, y: pd.DataFrame) The first two are `Pandas` `DataFrame` objects: - `x_data` - `x_train` + The last is a `NumPy` `ndarray`: - `y` diff --git a/docs/versioned_docs/version-2.1.0/pca/pcadatamodel.md b/docs/versioned_docs/version-2.0.0/pca/pcadatamodel.md similarity index 100% rename from docs/versioned_docs/version-2.1.0/pca/pcadatamodel.md rename to docs/versioned_docs/version-2.0.0/pca/pcadatamodel.md diff --git a/docs/versioned_docs/version-2.1.0/base/_category_.json b/docs/versioned_docs/version-2.1.0/base/_category_.json deleted file mode 100644 index ac3f2d9..0000000 --- a/docs/versioned_docs/version-2.1.0/base/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "Base module", - "position": 9, - "link": { - "type": "generated-index", - "description": "A module containing base classes for all the other modules." - } -} diff --git a/docs/versioned_docs/version-2.1.0/base/baseclassifier.md b/docs/versioned_docs/version-2.1.0/base/baseclassifier.md deleted file mode 100644 index 9c80bcc..0000000 --- a/docs/versioned_docs/version-2.1.0/base/baseclassifier.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -sidebar_position: 1 ---- - -# BaseClassifier class - -A base class from which all classifiers inherit. - -## Syntax - -```python -BaseClassifier(settings: BaseSettings, data: BaseDataModel) -``` - -## Constructor parameters - -- `settings`: object of type [`BaseSettings`](./basesettings.md). Contains the settings for - the `BaseClassifier` object. -- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md). Contains the data to be analyzed. - -## Fields - -- `settings`: object of type [`KNNSettings`](/tesi/docs/knn/knnsettings). Contains the settings for - the `BaseClassifier` object. -- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md). Contains the data to be analyzed. -- `model`: a `sklearn` model from `scikit-learn`. Defaults to `None`. - -## Methods - -- `import_model(import_path: str)`: loads a model from file -- `export_model(export_path: str)`: exports a model to file - - *raises*: - - `RuntimeError("You haven't trained the model yet! You cannot export it now.")` when trying to export an untrained model diff --git a/docs/versioned_docs/version-2.1.0/base/basedatamodel.md b/docs/versioned_docs/version-2.1.0/base/basedatamodel.md deleted file mode 100644 index 727c520..0000000 --- a/docs/versioned_docs/version-2.1.0/base/basedatamodel.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -sidebar_position: 3 ---- - -# BaseDataModel class - -This class models the output data for all data-outputting operations (currently, the [`LLDF`](../lldf/lldf-class.md) operation and the [`PCA`](../pca/pca.md) operation). - -## Syntax - -```python -BaseDataModel(x_data: pd.DataFrame, x_train: pd.DataFrame, y: pd.DataFrame) -``` - -## Fields and constructor parameters - -The first two are `Pandas` `DataFrame` objects: -- `x_data` -- `x_train` -The last is a `NumPy` `ndarray`: -- `y` diff --git a/docs/versioned_docs/version-2.1.0/base/basesettings.md b/docs/versioned_docs/version-2.1.0/base/basesettings.md deleted file mode 100644 index 740581b..0000000 --- a/docs/versioned_docs/version-2.1.0/base/basesettings.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -sidebar-position: 1 ---- - -# BaseSettings class - -Holds the settings for all classifier object. It's not meant for direct usage, only for inheritance. - - -## Syntax - -```python -BaseSettings(output: GraphMode, test_split: false) -``` - -## Fields and constructor parameters -- `output`: toggles graph output mode. Defaults to [`GraphMode.NONE`](../utils/graphmode.md). -- `test_split`: toggles the training split test phase. Defaults to `False`. Requires `output` to be set to `True` to work. - -The constructor raises: -- `Warning("You selected test_split but it won't run because you disabled the output.")` if `test_split` is run with `output` set to false (split tests only produce graphical output, and are useless when run with disabled output). diff --git a/docs/versioned_docs/version-2.1.0/complete-workflow.md b/docs/versioned_docs/version-2.1.0/complete-workflow.md deleted file mode 100644 index f607520..0000000 --- a/docs/versioned_docs/version-2.1.0/complete-workflow.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -sidebar-position: 7 ---- - -# Complete workflow - -Here's a sequence diagram to represent an example workflow, from the raw data -tables to classification, including data fusion, PCA and training. - -```plantuml -actor User -participant LLDF -participant PCA -participant Classifier - -User -> LLDF : Upload training tables -User -> LLDF : Set parameters -User -> Classifier : (optional) Upload model - -LLDF -> PCA : Pass preprocessed / fused tables -LLDF --> User : Download fused tables -LLDF -> Classifier : Pass preprocessed / fused tables \nRun classification -PCA -> Classifier : (optional) Set number of components - -Classifier --> User : classification results, graphs -PCA --> User : classification results, graphs -Classifier --> User : (optional) download trained model - -User -> Classifier : pass data to classify -Classifier --> User : classification results -``` diff --git a/docs/versioned_docs/version-2.1.0/knn/_category_.json b/docs/versioned_docs/version-2.1.0/knn/_category_.json deleted file mode 100644 index e670264..0000000 --- a/docs/versioned_docs/version-2.1.0/knn/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "kNN module", - "position": 6, - "link": { - "type": "generated-index", - "description": "A module for k-nearest neighbors analysis." - } -} diff --git a/docs/versioned_docs/version-2.1.0/knn/knn.md b/docs/versioned_docs/version-2.1.0/knn/knn.md deleted file mode 100644 index 2cecad5..0000000 --- a/docs/versioned_docs/version-2.1.0/knn/knn.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -sidebar_position: 1 ---- - -# KNN class - -A class to store the data, methods and artifacts for _k-Nearest Neighbors Analysis_. - -## Syntax - -```python -KNN(settings: KNNSettings, data: LLDFModel) -``` - -## Constructor parameters - -- `settings`: object of type [`KNNSettings`](knnsettings.md). Contains the settings for - the `KNN` object. -- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md). Contains the data to be analyzed. - -## Fields - -- `settings`: object of type [`KNNSettings`](/tesi/docs/knn/knnsettings). Contains the settings for - the `KNN` object. -- `fused_data`: onject of type ['LLDFModel`](/tesi/docs/lldf/lldfmodel). Contains the - artifacts from the data fusion process. -- `model`: a `KNeighborsClassifier` model from `scikit-learn`. Defaults to `None`. - -## Methods - -- `knn(self)`: trains the k-Neighbors Analysis model -- `predict(self, x_data)`: performs LDA prediction once the model is trained. - - *raises*: - - `RuntimeError("The kNN model is not trained yet!")` if the `KNN` model hasn't been trained yet - -## Example - -```python -from chemfusekit.knn import KNN - -# Initialize and run the LDA class -knn = KNN(settings, lldf.fused_data) -knn.knn() - -# Run predictions -knn.predict(x_data) -``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/knn/knnsettings.md b/docs/versioned_docs/version-2.1.0/knn/knnsettings.md deleted file mode 100644 index 81f9486..0000000 --- a/docs/versioned_docs/version-2.1.0/knn/knnsettings.md +++ /dev/null @@ -1,64 +0,0 @@ ---- -sidebar-position: 1 ---- - -# KNNSettings class - -Holds the settings for the [`KNN`](knn.md) object. - -Inherits from [`BaseSettings`](../base/basesettings.md). - -## Syntax - -```python -KNNSettings( - n_neighbors: int, - metric: str | Callable, - weights: str | Callable, - algorithm: str, - output: GraphMode, - test_split: false -) -``` - -## Fields and constructor parameters -- `n_neighbors`: the amount of components to be used in the `KNN` model. Defaults to 15. -- `metric`: the distance metric for the model. It can take one of the following values: - - `minkwoski` - - `precomputed` - - `euclidean` - or be a callable object. -- `weights`: the weight metric for the model. It can take one of the following values: - - `uniform` - - `distance` - or be a callable object. -- `algorithm`: the algorithm for the model. It can take one of the following values: - - `auto` - - `ball_tree` - - `kd_tree` - - `brute` - or be a callable object. -- `output`: toggles graph output mode. Defaults to [`GraphMode.NONE`](../utils/graphmode.md). -- `test_split`: toggles the training split test phase. Defaults to `False`. Requires `output` to be set to `True` to work. - -The constructor raises: -- `ValueError("Invalid n_neighbors number: should be a positive integer.")` if the number of components is not valid. -- `ValueError("Invalid metric: should be 'minkwoski', 'precomputed', 'euclidean' or a callable.")` if the chosen metric is neither available nor a callable function. -- `ValueError("Invalid weight: should be 'uniform', 'distance' or a callable")` if the chosen weight is neither available nor a callable function. -- `ValueError("Invalid algorithm: should be 'auto', 'ball_tree', 'kd_tree' or 'brute'.")` if the chosen algotithm does not exist. -- `Warning("You selected test_split but it won't run because you disabled the output.")` if `test_split` is run with `output` set to false (split tests only produce graphical output, and are useless when run with disabled output). - -## Example - -```python -from chemfusekit.knn import KNNSettings, GraphMode - -settings = KNNSettings( - n_neighbors=20, # pick 20 neighbors - metric='minkowski', # choose the metric - weights='distance', # choose the weight metric - algorithm='auto', # the best algorithm gets chosen automatically - output=GraphMode.GRAPHIC, # graph output is enabled - test_split=True # the model will be split-tested at the end of the training -) -``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/lda/_category_.json b/docs/versioned_docs/version-2.1.0/lda/_category_.json deleted file mode 100644 index 8d1f6af..0000000 --- a/docs/versioned_docs/version-2.1.0/lda/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "LDA module", - "position": 4, - "link": { - "type": "generated-index", - "description": "A module for linear discriminant analysis." - } -} diff --git a/docs/versioned_docs/version-2.1.0/lda/lda.md b/docs/versioned_docs/version-2.1.0/lda/lda.md deleted file mode 100644 index 2b8f19e..0000000 --- a/docs/versioned_docs/version-2.1.0/lda/lda.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -sidebar_position: 1 ---- - -# LDA class - -A class to store the data, methods and artifacts for _Linear Discriminant Analysis_. - -## Syntax - -```python -LDA(settings: LDASettings, data: BaseDataModel) -``` - -## Constructor parameters - -- `settings`: object of type [`LDASettings`](./ldasettings.md). Contains the settings for - the `LDA` object. -- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md). Contains the data to be analyzed. - -## Fields - -- `settings`: object of type [`LDASettings`](./ldasettings.md). Contains the settings for - the `LDA` object. -- Fused data fields: - - `x_data` - - `x_train` - - `y` -- `model`: a `LinearDiscriminantAnalysis` model from `scikit-learn`. Defaults to `None`. - -## Methods - -- `lda(self)`: performs Linear Discriminant Analysis -- `__print_prediction_graphs(self, y_test, y_pred)`: helper function to print - graphs and stats about LDA predictions -- `predict(self, x_data)`: performs LDA prediction once the model is trained. - - *raises*: - - `RuntimeError("The LDA model is not trained yet!")` if the LDA model hasn't been trained yet - -## Example - -```python -from chemfusekit.lda import LDA - -# Initialize and run the LDA class -lda = LDA(lldf.fused_data, settings) -lda.lda() -``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/lda/ldasettings.md b/docs/versioned_docs/version-2.1.0/lda/ldasettings.md deleted file mode 100644 index c9c850e..0000000 --- a/docs/versioned_docs/version-2.1.0/lda/ldasettings.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -sidebar_position: 2 ---- - -# LDASettings class - -Holds the settings for the [`LDA`](./lda.md) object. - -Inherits from [`BaseSettings`](../base/basesettings.md). - -## Syntax - -```python -LDASettings(components: int, output: GraphMode, split_test: bool) -``` - -## Fields and constructor parameters - -- `components`: the amount of components to be used in the LDA model. Defaults to 3. -- `output`: toggles graph output. Defaults to [`GraphMode.NONE`](../utils/graphmode.md). -- `test_split`: toggles split testing. Defaults to `False`. - - -The constructor raises: -- `ValueError("Invalid component number: must be a > 1 integer.")` if the number of - components is not valid. -- `Warning("You selected test_split but it won't run because you disabled the output.")` if split tests are run with `output` disabled - -## Example - -```python -from chemfusekit.lda import LDASettings, GraphMode - -settings = LDASettings( - components=(pca.components - 1), # one less component than the number determined by PCA - output=GraphMode.GRAPHIC, # graphs will be printed - test_split=True # split testing is enabled -) -``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/lldf/_category_.json b/docs/versioned_docs/version-2.1.0/lldf/_category_.json deleted file mode 100644 index 096ed0c..0000000 --- a/docs/versioned_docs/version-2.1.0/lldf/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "LLDF Module", - "position": 2, - "link": { - "type": "doc", - "id": "index" - } -} diff --git a/docs/versioned_docs/version-2.1.0/lldf/index.mdx b/docs/versioned_docs/version-2.1.0/lldf/index.mdx deleted file mode 100644 index 01c1d16..0000000 --- a/docs/versioned_docs/version-2.1.0/lldf/index.mdx +++ /dev/null @@ -1,47 +0,0 @@ -import DocCardList from '@theme/DocCardList'; - -# LLDF Module - -A module for low-level data fusion. - -# Members - - - -# UML - -```mermaid -classDiagram - class Table { - +str file_path - +str sheet_name - +str preprocessing - __init__(file_path, sheet_name, preprocessing) - } - - class LLDFSettings { - +GraphOutput output - __init__(output) - } - - class LLDF { - +LLDFSettings settings - +Table[] tables - +LLDFDataModel | None fused_data - lldf() - -_snv() - +export_data(export_path: str) - __init__(settings, tables[]) - } - - class LLDFDataModel { - +x_data: pd.DataFrame - +x_train: pd.DataFrame - +y: np.ndarray - __init__(x_data, x_train, y) - } - - LLDF *-- LLDFModel - LLDF *-- Table - LLDF *-- LLDFSettings -``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/lldf/lldf-class.md b/docs/versioned_docs/version-2.1.0/lldf/lldf-class.md deleted file mode 100644 index 1a0be38..0000000 --- a/docs/versioned_docs/version-2.1.0/lldf/lldf-class.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -sidebar_position: 1 ---- - -# LLDF class - -The `LLDF` class is used for _low-level data fusion_. - -## Syntax - -```python -LLDF(tables: List[Table], lldf_settings: LLDFSettings) -``` - -## Constructor parameters - -- `tables`: `List[`[`Table`](./table.md)`]` - - A list of `Table` objects containing info about the files to import - -- `lldf_settings`: [`LLDFSettings`](./lldfsettings) - - The settings for the LLDF object. - -## Fields - -- `settings`: [`LLDFSettings`](./lldfsettings) - - The settings for the LLDF object. - -- `tables`: `List[`[`Table`](./table.md)`]` - - A list of `Table` objects containing info about the files to import - -- `fused_data`: [`LLDFModel`](./lldfmodel.md) - - The resulting model containing the data fusion artifacts. - -## Methods - -- `_snv(self, input_data)`: static method to rescale input arrays -- `lldf(self)`: performs low-level data fusion on the data passed in the settings - - *raises*: - - `FileNotFoundError("Error opening the selected files.")` - if the files specified in the settings are not valid - - `SyntaxError("LLDF: this type of preprocessing does not exist")` - if the preprocessing method specified in the settings is not valid -- `export_data(self, export_path)`: exports the data fusion artifacts to an Excel file - - *raises*: - - `RuntimeError("Cannot export data before data fusion.")` if export is - attempted before fusing the data - - `RuntimeError("Could not export data to the selected path.")` if any error - happens during the export phase - - -## Example - -```python -from chemfusekit.lldf import LLDF - -# Initialize and run low-level data fusion -lldf = LLDF(tables, lldf_settings) -lldf.lldf() - -# Export the LLDF data to an Excel file -lldf.export_data('output_file.xlsx') -``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/lldf/lldfmodel.md b/docs/versioned_docs/version-2.1.0/lldf/lldfmodel.md deleted file mode 100644 index 7232264..0000000 --- a/docs/versioned_docs/version-2.1.0/lldf/lldfmodel.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -sidebar_position: 3 ---- - -# LLDFDataModel class - -This class models the output data from the [`LLDF`](./lldf-class.md) operation. - -It inherits from the [`BaseDataModel`](../base/basedatamodel.md). - -## Syntax - -```python -LLDFModel(x_data: pd.DataFrame, x_train: pd.DataFrame, y: np.ndarray) -``` - -## Fields and constructor parameters - -The first two are `Pandas` `DataFrame` objects: -- `x_data` -- `x_train` - -The last is a `NumPy` `ndarray`: -- `y` - diff --git a/docs/versioned_docs/version-2.1.0/lldf/lldfsettings.md b/docs/versioned_docs/version-2.1.0/lldf/lldfsettings.md deleted file mode 100644 index 30f985f..0000000 --- a/docs/versioned_docs/version-2.1.0/lldf/lldfsettings.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -sidebar_position: 2 ---- - -# LLDFSettings class - -Holds the settings for the [`LLDF`](./lldf-class.md) object. - -## Syntax - -```python -LLDFSettings(output: GraphMode) -``` - -## Fields and constructor parameters -- `output`: toggles graph output. Defaults to [`GraphMode.NONE`](../utils/graphmode.md). - -## Example - -```python -from chemfusekit.lldf import LLDFSettings - -# Initialize the settings for low-level data fusion -lldf_settings = LLDFSettings(output=GraphMode.TEXT) -``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/lldf/table.md b/docs/versioned_docs/version-2.1.0/lldf/table.md deleted file mode 100644 index 782da64..0000000 --- a/docs/versioned_docs/version-2.1.0/lldf/table.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -sidebar_position: 4 ---- - -# Table class - -Holds the information for a single table to import. - -The [`LLDF`](./lldf-class.md) object takes a list of `Table` as a parameter. - -## Syntax - -```python -Table( - file_path: str - sheet_name: str - preprocessing: str -) -``` - -## Fields and constructor parameters - -- `file_path`: a `str` containing the path to the Excel datasheet -- `sheet_name`: a `str` containing the name of the sheet to select within the Excel file -- `preprocessing`: a `str` with the name of the preprocessing to be applied to the table. - Available options: `snv` (normalization), `savgol` (Savitski-Golay smoothing), `savgol+snv` (both), `none` (no processing). - -## Example - -```python -from chemfusekit.lldf import Table - -# Create a table -table1 = Table( - file_path='tests/qepas.xlsx', - sheet_name='Sheet1', - preprocessing='snv' # normalization preprocessing; other options: savgol, both or none -) -``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/lr/_category_.json b/docs/versioned_docs/version-2.1.0/lr/_category_.json deleted file mode 100644 index c9a9071..0000000 --- a/docs/versioned_docs/version-2.1.0/lr/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "LR module", - "position": 6, - "link": { - "type": "generated-index", - "description": "A module for logistic regression." - } -} diff --git a/docs/versioned_docs/version-2.1.0/lr/lr.md b/docs/versioned_docs/version-2.1.0/lr/lr.md deleted file mode 100644 index f7ded6f..0000000 --- a/docs/versioned_docs/version-2.1.0/lr/lr.md +++ /dev/null @@ -1,51 +0,0 @@ ---- -sidebar_position: 1 ---- - -# LR class - -A class to store the data, methods and artifacts for _Logistic Regression_. - -## Syntax - -```python -LR(settings: LRSettings, array_scores: np.ndarray, y: np.ndarray): -``` - -## Constructor parameters - -- `settings`: object of type [`LRSettings`](./lrsettings.md). Contains the settings for - the `LR` object. -- `array_scores`: `np.ndarray`, product of [`PCA` analysis](../pca/). -- `y`: `np.ndarray`, product of [`PCA` analysis](../pca/). - -## Fields - -- `settings`: object of type [`LRSettings`](./lrsettings.md). Contains the settings for - the `LR` object. -- `array_scores`: product of [`PCA` analysis](../pca/). -- `y`: product of [`PCA` analysis](../pca/). -- `model`: A `LR` model from `scikit-learn`. Defaults to `None`. - -## Methods - -- `lr(self)`: performs Logistic Regression. -- `predict(self, x_sample)`: performs LR-based classification on input data. - - *raises*: - - `RuntimeError("The LR model is not trained yet!")` if prediction is - started without training the model first; - - `raise TypeError("X data for LDA prediction must be non-empty.")` if - the data passed as argument is null. - -## Example - -```python -from chemfusekit.lr import LR - -# Initialize and train the LR class -lr = LR(settings, array_scores, y) -lr.lr() - -# Perform prediction -lr.predict(x_sample) -``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/lr/lrsettings.md b/docs/versioned_docs/version-2.1.0/lr/lrsettings.md deleted file mode 100644 index 847c002..0000000 --- a/docs/versioned_docs/version-2.1.0/lr/lrsettings.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -sidebar_position: 2 ---- - -# LRSettings class - -Holds the settings for the [`LR`](./lr.md) object. - -Inherits from [`BaseSettings`](../base/basesettings.md). - -## Syntax - -```python -LRSettings(algorithm: str, output: GraphMode, test_split: bool) -``` - -## Fields and constructor parameters - -- `algorithm`: the amount of components to be used in the LDA model. Defaults to - `liblinear`. Other available options: - - `lbfgs` - - `newton-cg` - - `newton-cholesky` - - `sag` - - `saga` -- `output`: toggles graph output. Defaults to [`GraphMode.NONE`](../utils/graphmode.md). -- `test_split`: toggles split testing. Defaults to `False`. - -The constructor raises: -- `ValueError("This algorithm does not exist.")` if the selected `algorithm` - is not a valid option. -- `Warning("You selected test_split but it won't run because you disabled the output.")` if split tests are run with `output` disabled - -## Example - -```python -from chemfusekit.lr import LRSettings, GraphMode - -settings = LRSettings( - algorithm='newton-cg', - output=GraphMode.GRAPHIC, # graphs will be printed - test_split=True # split testing is enabled -) -``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/pca/_category_.json b/docs/versioned_docs/version-2.1.0/pca/_category_.json deleted file mode 100644 index 266d36f..0000000 --- a/docs/versioned_docs/version-2.1.0/pca/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "PCA module", - "position": 3, - "link": { - "type": "generated-index", - "description": "A module for principal component analysis." - } -} diff --git a/docs/versioned_docs/version-2.1.0/pca/pca.md b/docs/versioned_docs/version-2.1.0/pca/pca.md deleted file mode 100644 index 1a07127..0000000 --- a/docs/versioned_docs/version-2.1.0/pca/pca.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -sidebar_position: 1 ---- - -# PCA class - -A class to store the data, methods and artifacts for _Principal Component Analysis_. - -## Syntax - -```python -PCA(settings: PCASettings, data: BaseDataModel) -``` - -## Constructor parameters - -- `settings`: object of type [`PCASettings`](./pcasettings.md). Contains the settings for - the `PCA` object. -- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md). Contains the data to be analyzed. - -## Fields - -- `fused_data`: object of type [`LLDF`](../lldf/lldf-class.md). Contains the data to be analyzed. -- `components`: Number of components for the PCA analysis. Defaults to 0. -- `pca_model`: A `PCA` model from `scikit-learn`. Defaults to `None`. -- `settings`: object of type [`PCASettings`](./pcasettings.md). Contains the settings for - the `PCA` object. - -## Methods - -- `pca(self)`: performs Principal Component Analysis -- `pca_stats(self)` produces PCA-related statistics and graphs. - -## Example - -```python -from chemfusekit.pca import PCA - -# Initialize and run the PCA class -pca = PCA(lldf.fused_data, pca_settings) -pca.pca() - -# Print the number of components and the statistics -print(pca.components) -pca.pca_stats() -``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/pca/pcasettings.md b/docs/versioned_docs/version-2.1.0/pca/pcasettings.md deleted file mode 100644 index ca533ea..0000000 --- a/docs/versioned_docs/version-2.1.0/pca/pcasettings.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -sidebar_position: 2 ---- - -# PCASettings class - -Holds the settings for the [`PCA`](./pca.md) object. - -## Syntax - -```python -PCASettings( - target_variance: float, - confidence_level: float, - initial_components: int, - output: GraphMode -) -``` - -## Fields and constructor parameters - -- `target_variance`: the minimum cumulative explained variance to reach in the analysis. - Defaults to 0.95. -- `confidence_level`: the confidence level for statistical tests. Defaults to 0.05. -- `initial_components`: the minimum amount of components to be used in the PCA model. - Defaults to 10. -- `output`: toggles graph output. Defaults to [`GraphMode.NONE`](../utils/graphmode.md). - -## Example - -```python -from chemfusekit.pca import PCASettings, GraphMode - -# Initialize the settings for Principal Component Analysis -pca_settings = PCASettings( - target_variance=0.99, - confidence_level=0.05, - initial_components=10, - output=GraphMode.GRAPHIC # graphs will be printed -) -``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/plsda/_category_.json b/docs/versioned_docs/version-2.1.0/plsda/_category_.json deleted file mode 100644 index aaf0cbf..0000000 --- a/docs/versioned_docs/version-2.1.0/plsda/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "PLSDA module", - "position": 7, - "link": { - "type": "generated-index", - "description": "A module for partial least squares discriminant analysis." - } -} diff --git a/docs/versioned_docs/version-2.1.0/plsda/plsda.md b/docs/versioned_docs/version-2.1.0/plsda/plsda.md deleted file mode 100644 index 3287fd6..0000000 --- a/docs/versioned_docs/version-2.1.0/plsda/plsda.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -sidebar_position: 1 ---- - -# PLSDA class - -A class to store the data, methods and artifacts for _Partial Least Squares Discriminant Analysis_. - -## Syntax - -```python -PLSDA(settings: PLSDASettings, data: BaseDataModel) -``` - -## Constructor parameters - -- `settings`: object of type [`PLSDASettings`](plsdasettings.md). Contains the settings for - the `PLSDA` object. -- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md). Contains the data to be analyzed. - -## Fields - -- `settings`: object of type [`PLSDASettings`](./plsdasettings.md). Contains the settings for - the `PLSDA` object. -- `fused_data`: onject of type ['LLDFModel`](../lldf/lldfmodel.md). Contains the - artifacts from the data fusion process. -- `model`: a `PLSRegression` model from `scikit-learn`. Defaults to `None`. - -## Methods - -- `plsda(self)`: trains the Partial Least Squares Discriminant Analysis model. -- `predict(self, x_data)`: performs PLSDA prediction once the model is trained. - - *raises*: - - `RuntimeError("The PLSDA model is not trained yet!")` if the `PLSDA` model hasn't been trained yet - -## Example - -```python -from chemfusekit.knn import PLSDA - -# Initialize and run the LDA class -plsda = PLSDA(settings, lldf.fused_data) -plsda.plsda() - -# Run predictions -plsda.predict(x_data) -``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/plsda/plsdasettings.md b/docs/versioned_docs/version-2.1.0/plsda/plsdasettings.md deleted file mode 100644 index 7b547a7..0000000 --- a/docs/versioned_docs/version-2.1.0/plsda/plsdasettings.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -sidebar_position: 2 ---- - -# PLSDASettings class - -Holds the settings for the [`PLSDA`](./plsda.md) object. - -Inherits from [`BaseSettings`](../base/basesettings.md). - -## Syntax - -```python -PLSDASettings(n_components: int, output: GraphMode, test_split: bool) -``` - -## Fields and constructor parameters - -- `n_components`: number of components for the PLSDA analysis. Defaults to 3. -- `output`: toggles graph output. Defaults to [`GraphMode.NONE`](../utils/graphmode.md). -- `test_split`: toggles the training split test phase. Defaults to `False`. Requires `output` to not be set to `GraphMode.NONE` to work. - -The constructor raises: -- `ValueError("Invalid n_components number: should be a positive integer.")` if the number of components is below 1. -- `Warning("You selected test_split but it won't run because you disabled the output.")` if `test_split` is run with `output` set to `GraphOutput.NONE` (split tests only produce graphical output, and are useless when run with disabled output). - -## Example - -```python -from chemfusekit.plsda import PLSDASettings, GraphMode - -# Initialize the settings for Partial Least Squares Discriminant Analysis -plsda_settings = PLSDASettings( - n_components=5, - output=GraphMode.GRAPHIC, # graphs will be printed - test_split=False # no split testing -) -``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/svm/_category_.json b/docs/versioned_docs/version-2.1.0/svm/_category_.json deleted file mode 100644 index f0ebda8..0000000 --- a/docs/versioned_docs/version-2.1.0/svm/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "SVM module", - "position": 5, - "link": { - "type": "generated-index", - "description": "A module for support vector machine analysis." - } -} diff --git a/docs/versioned_docs/version-2.1.0/svm/svm.md b/docs/versioned_docs/version-2.1.0/svm/svm.md deleted file mode 100644 index b984ff7..0000000 --- a/docs/versioned_docs/version-2.1.0/svm/svm.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -sidebar_position: 1 ---- - -# SVM class - -A class to store the data, methods and artifacts for _Support Vector Machine Analysis_. - -## Syntax - -```python -SVM(settings: SVMSettings, data: BaseDataModel) -``` - -## Constructor parameters - -- `settings`: object of type [`SVMSettings`](./svmsettings.md). Contains the settings for - the `SVM` object. -- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md). Contains the data to be analyzed. - -The constructor raises: -- `ValueError("Fused data input cannot be empty.")` if the input data is null -- `valueError("Settings cannot be empty.")` if the settings are null - -## Fields - -- `fused_data`: object of type [`LLDFModel`](../lldf/lldfmodel.md). Contains the data to be analyzed. -- `settings`: object of type [`SVMSettings`](./svmsettings.md). Contains the settings for - the `PCA` object. -- `pca_model`: an `SVM` model from `scikit-learn`. Defaults to `None`. - -## Methods - -- `svm(self)`: performs Support Vector Machine analysis. - - *raises*: - - `ValueError(SVM: this type of kernel does not exist.")` if the kernel type is invalid -- `predict(self, x_data)`: performs classification based on SVM - - *raises*: - - `RuntimeError("The model hasn't been trained yet!")` if the model is null - -## Example - -```python -from chemfusekit.svm import SVM - -# Initialize and run the SVM class -svm = LDA(lldf.fused_data, settings) -svm.svm() -``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/svm/svmsettings.md b/docs/versioned_docs/version-2.1.0/svm/svmsettings.md deleted file mode 100644 index 7ebf852..0000000 --- a/docs/versioned_docs/version-2.1.0/svm/svmsettings.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -sidebar_position: 2 ---- - -# SVMSettings class - -Holds the settings for the [`SVM`](./svm.md) object. - -Inherits from [`BaseSettings`](../base/basesettings.md). - -## Syntax - -```python -SVMSettings(kernel: str, output: GraphMode, test_split: bool) -``` - -## Fields and constructor parameters - -- `kernel`: the type of kernel to use in the SVM analysis. Available options: - - `linear` - - `poly` - - `gaussian` - - `sigmoid` - Defaults to `linear`. -- `output`: toggles graph output. Defaults to [`GraphMode.NONE`](../utils/graphmode.md). -- `test_split`: toggles split testing. Defaults to `False`. - -The constructor raises: -- `ValueError("Invalid type: must be linear, poly, gaussian or sigmoid")` if the selected kernel is not one of the available -- `Warning("You selected test_split but it won't run because you disabled the output.")` if split tests are run with `output` disabled - -## Example - -```python -from chemfusekit.svm import SVMSettings, GraphMode - -# Initialize the settings for Support Vector Machine -svm_settings = SVMSettings( - type='linear', - output=GraphMode.GRAPHIC, # graphs will be printed - test_split=True # split testing is enabled -) -``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/tutorial.md b/docs/versioned_docs/version-2.1.0/tutorial.md deleted file mode 100644 index 7945b21..0000000 --- a/docs/versioned_docs/version-2.1.0/tutorial.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -sidebar_position: 1 ---- - -# Tutorial - -Let's discover **Data Fusion**. - -As a simple example, we will train an LDA model and use it for classification. - -First of all, let's install the package: -```bash -pip install chemfusekit -``` - -## First step: data fusion - -We will load the `LLDFSettings` with the paths to some Excel datasheets, containing -respectively the data from a QEPAS spectrometer and a GC chromatographer. - -We will pick normalization as the preprocessing technique for the data. - -The `LLDF` class will take these settings and perform low-level data fusion on the -two Excel tables we picked. - -```python -from chemfusekit.lldf import LLDFSettings, LLDF - -# Initialize the settings for low-level data fusion -lldf_settings = LLDFSettings( - qepas_path='tests/qepas.xlsx', - qepas_sheet='Sheet1', - rt_path='tests/rt.xlsx', - rt_sheet='Sheet1', - preprocessing='snv' # normalization preprocessing; other options: savgol or both -) - -# Initialize and run low-level data fusion -lldf = LLDF(lldf_settings) -lldf.lldf() -``` - -Optionally, we can export the fused data into a new, single Excel datasheet: - -```python -# (optional) export the LLDF data to an Excel file -lldf.export_data('output_file.xlsx') -``` - -## Second step: PCA - -A run of Principal Component Analysis (`PCA`) will help us pick the right number -of components for the subsequent `LDA` analysis step. - -As in the previous case, we will set it up with the help of the `PCASettings` class. - -```python -from chemfusekit.pca import PCASettings, PCA - -# Initialize the settings for Principal Component Analysis -pca_settings = PCASettings( - target_variance=0.99, # the minimum acceptable level of cumulative explained covariance - confidence_level=0.05, # the desired level of confidence - initial_components=10, # the initial amount of components for the iterative analysis - output=GraphMode.GRAPHIC # graphs will be printed -) - -# Initialize and run the PCA class -pca = PCA(lldf.fused_data, pca_settings) -pca.pca() - -# Print the number of components and the statistics -print(pca.components) -pca.pca_stats() -``` - -## Third step: LDA training - -In this step we will set up the `LDASettings` and then run the `LDA` analysis with one less -component than what we figured out from the `PCA` analysis of the previous step. - -```python -from chemfusekit.lda import LDASettings, LDA - -settings = LDASettings( - components=(pca.components - 1), # one less component than the number determined by PCA - output=GraphMode.GRAPHIC, # graphs will be printed - test_split=True # Split testing is enabled -) - -# Initialize and run the LDA class -lda = LDA(lldf.fused_data, settings) -lda.lda() -``` - -## Fourth step: prediction - -We will pick a random sample from the dataset and see whether the trained `LDA` model -can identify it correctly. - -```python -# Let's pick a random sample from the dataset and see if it gets recognized correctly: -x_data_sample = lldf.fused_data.x_train.iloc[119] # should be DMMP -x_data_sample = x_data_sample.iloc[1:].to_frame().transpose() - -# Let's run the prediction: -predictions = lda.predict(x_data_sample) -print(predictions) -``` \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/utils/_category_.json b/docs/versioned_docs/version-2.1.0/utils/_category_.json deleted file mode 100644 index 322950e..0000000 --- a/docs/versioned_docs/version-2.1.0/utils/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "Utilities module", - "position": 8, - "link": { - "type": "generated-index", - "description": "A module containing helper functions." - } -} diff --git a/docs/versioned_docs/version-2.1.0/utils/graphmode.md b/docs/versioned_docs/version-2.1.0/utils/graphmode.md deleted file mode 100644 index bdfaa0a..0000000 --- a/docs/versioned_docs/version-2.1.0/utils/graphmode.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -sidebar-position: 1 ---- - -# GraphMode enum - -The `GraphMode` enum defines three possible values that the output of other classes can take: - -- `GRAPHIC`: graphs, tables and stats will be rendered with `Plotly`, `MatPlotLib` or `Seaborn`. Best used with `Jupyter Notebook`; -- `TEXT`: output will be rendered as plain text. The best option for offline batch processing; -- `NONE`: output will be suppressed completely. \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/utils/graphoutput.md b/docs/versioned_docs/version-2.1.0/utils/graphoutput.md deleted file mode 100644 index 362bd7f..0000000 --- a/docs/versioned_docs/version-2.1.0/utils/graphoutput.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -sidebar-position: 2 ---- - -# `graph_output` function - -A (partially) reusable graphing function shared by different classes. Not meant for direct usage. - -## Syntax - -```python -graph_output(scores, model, name: str, mode: GraphMode) -``` - -## Parameters - -- `scores`: the scores that are output by the model fitting function -- `model`: a `scikit-learn` classification model -- `name`: a `str` representing the name of the analysis technique -- `mode`: a [`GraphMode`](./graphmode.md) enum that acts as an output selector diff --git a/docs/versioned_docs/version-2.1.0/utils/printconfusionmatrix.md b/docs/versioned_docs/version-2.1.0/utils/printconfusionmatrix.md deleted file mode 100644 index 826c2f2..0000000 --- a/docs/versioned_docs/version-2.1.0/utils/printconfusionmatrix.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -sidebar-position: 5 ---- - -# `print_confuson_matrix` function - -A multimodal confusion matrix and classification report printer utility. Not meant for direct usage. - -## Syntax - -```python -print_confusion_matrix(y1, y2, title: str, mode: GraphMode) -``` - -## Parameters - -- `y1` and `y2`: the true and predicted values -- `title`: a `str` representing the title for the confusion matrix and classification report -- `mode`: a [`GraphMode`](./graphmode.md) enum that acts as an output selector \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/utils/printtable.md b/docs/versioned_docs/version-2.1.0/utils/printtable.md deleted file mode 100644 index a07206e..0000000 --- a/docs/versioned_docs/version-2.1.0/utils/printtable.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -sidebar-position: 3 ---- - -# `print_table` function - -A multimodal table printing utility. It can output tables as `Plotly` plots or as plain text. Not meant for direct usage. - -## Syntax - -```python -print_table(header_values, cell_values, title: str, mode: GraphMode) -``` - -## Parameters - -- `header_values`: the column titles -- `cell_values`: a row array of column arrays -- `title`: a `str` containing the title for the table -- `mode`: a [`GraphMode`](./graphmode.md) enum that acts as an output selector \ No newline at end of file diff --git a/docs/versioned_docs/version-2.1.0/utils/runsplittests.md b/docs/versioned_docs/version-2.1.0/utils/runsplittests.md deleted file mode 100644 index 410eb65..0000000 --- a/docs/versioned_docs/version-2.1.0/utils/runsplittests.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -sidebar-position: 4 ---- - -# `run_split_tests` function - -A reusable function for split testing a generic model. Not meant for direct usage. - -## Syntax - -```python -run_split_test(x, y, model, extended=False, mode: GraphMode) -``` - -## Parameters - -- `x` and `y`: the regressor and target arrays -- `model`: a `scikit-learn` classifier -- `extended`: a `bool` that selects whether a longer split analysis will be carried out -- `mode`: a [`GraphMode`](./graphmode.md) enum that acts as an output selector diff --git a/docs/versioned_sidebars/version-2.1.0-sidebars.json b/docs/versioned_sidebars/version-2.1.0-sidebars.json deleted file mode 100644 index caea0c0..0000000 --- a/docs/versioned_sidebars/version-2.1.0-sidebars.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "tutorialSidebar": [ - { - "type": "autogenerated", - "dirName": "." - } - ] -} diff --git a/docs/versions.json b/docs/versions.json index b619bc6..77b48ba 100644 --- a/docs/versions.json +++ b/docs/versions.json @@ -1,5 +1,4 @@ [ - "2.1.0", "2.0.0", "1.2.0", "1.1.3"