Skip to content

Commit

Permalink
feat!: substitute GraphMode enum with string in API
Browse files Browse the repository at this point in the history
  • Loading branch information
f-aguzzi committed Jun 20, 2024
1 parent f827b2d commit 34068a4
Show file tree
Hide file tree
Showing 34 changed files with 229 additions and 225 deletions.
15 changes: 11 additions & 4 deletions chemfusekit/__base.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,15 @@ def __getitem__(self, index):
class BaseSettings:
"""Holds the settings for all objects with settings."""

def __init__(self, output: GraphMode = GraphMode.NONE):
self.output = output
def __init__(self, output: str = 'none'):
if output == 'none':
self.output = GraphMode.NONE
elif output == 'graphical':
self.output = GraphMode.GRAPHIC
elif output == 'text':
self.output = GraphMode.TEXT
else:
raise ValueError("The output mode should be 'none', 'graphical' or 'text'.")


class BaseActionClass(ABC):
Expand Down Expand Up @@ -145,9 +152,9 @@ def export_model(self, export_path: str):
class BaseClassifierSettings(BaseSettings):
"""Holds the settings for the BaseClassifier object."""

def __init__(self, output: GraphMode = GraphMode.NONE, test_split: bool = False):
def __init__(self, output: str = 'none', test_split: bool = False):
super().__init__(output)
if test_split is True and output is GraphMode.NONE:
if test_split is True and self.output is GraphMode.NONE:
raise Warning(
"You selected test_split but it won't run because you disabled the output."
)
Expand Down
5 changes: 3 additions & 2 deletions chemfusekit/df.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

import matplotlib.pyplot as plt

from .__base import GraphMode, BaseDataModel, BaseSettings
from .__base import BaseDataModel, BaseSettings
from .__utils import GraphMode
from .pca import PCASettings, PCA
from .plsda import PLSDASettings, PLSDA

Expand Down Expand Up @@ -37,7 +38,7 @@ def __init__(self, x_data: pd.DataFrame, x_train: pd.DataFrame, y: np.ndarray,
class DFSettings(BaseSettings):
"""Holds the settings for the DF object."""

def __init__(self, output: GraphMode = GraphMode.NONE, method: str = 'concat'):
def __init__(self, output: str = 'none', method: str = 'concat'):
super().__init__(output)
if method not in ['concat', 'outer']:
raise ValueError("DF: invalid method: must be 'concat' or 'outer'")
Expand Down
4 changes: 2 additions & 2 deletions chemfusekit/knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
class KNNSettings(BaseClassifierSettings):
"""Holds the settings for the kNN object."""
def __init__(self, n_neighbors: int = 15, metric: str | Callable = 'euclidean', weights: str | Callable = 'uniform',
algorithm: str = 'auto', output: GraphMode = GraphMode.NONE, test_split: bool = False):
algorithm: str = 'auto', output: str = 'none', test_split: bool = False):

super().__init__(output, test_split)

Expand Down Expand Up @@ -68,7 +68,7 @@ def train(self):
self.settings.output
)

if self.settings.test_split and self.settings.output:
if self.settings.test_split and self.settings.output is not GraphMode.NONE:
knn_split = KNeighborsClassifier(
n_neighbors=self.settings.n_neighbors,
metric=self.settings.metric,
Expand Down
6 changes: 3 additions & 3 deletions chemfusekit/lda.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LD
from sklearn.model_selection import cross_val_score

from chemfusekit.__utils import graph_output, run_split_test
from chemfusekit.__utils import print_confusion_matrix, print_table, GraphMode
from chemfusekit.__utils import graph_output, run_split_test, GraphMode
from chemfusekit.__utils import print_confusion_matrix, print_table
from .__base import BaseDataModel, BaseClassifier, BaseClassifierSettings, BaseReducer, ReducerDataModel


Expand All @@ -25,7 +25,7 @@ def __init__(self, x_data: pd.DataFrame, x_train: pd.DataFrame, y: np.ndarray, c
class LDASettings(BaseClassifierSettings):
"""Holds the settings for the LDA object."""

def __init__(self, components: int | None = None, output: GraphMode = GraphMode.NONE, test_split: bool = False):
def __init__(self, components: int | None = None, output: str = 'none', test_split: bool = False):
super().__init__(output, test_split)
if components is not None and components <= 2:
raise ValueError("Invalid component number: must be a > 1 integer.")
Expand Down
4 changes: 2 additions & 2 deletions chemfusekit/lr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
import pandas as pd
from sklearn.linear_model import LogisticRegression

from chemfusekit.__utils import run_split_test, print_confusion_matrix, print_table, GraphMode
from chemfusekit.__utils import run_split_test, print_confusion_matrix, print_table
from .__base import BaseClassifierSettings, BaseDataModel, BaseClassifier, ReducerDataModel


class LRSettings(BaseClassifierSettings):
"""Holds the settings for the LR object."""
def __init__(self, algorithm: str = 'liblinear', output: GraphMode = GraphMode.NONE, test_split: bool = False):
def __init__(self, algorithm: str = 'liblinear', output: str = 'none', test_split: bool = False):
super().__init__(output, test_split)
if algorithm not in [
'lbfgs',
Expand Down
5 changes: 2 additions & 3 deletions chemfusekit/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@
class PCASettings(BaseSettings):
"""Holds the settings for the PCA object."""

def __init__(self, target_variance: float = 0.95,
confidence_level: float = 0.05,
initial_components: int = 10, output: GraphMode = GraphMode.NONE):
def __init__(self, target_variance: float = 0.95, confidence_level: float = 0.05, initial_components: int = 10,
output: str = 'none'):
super().__init__(output)
if target_variance < 0:
raise ValueError("Target variance should be positive or null.")
Expand Down
6 changes: 3 additions & 3 deletions chemfusekit/plsda.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
from sklearn.cross_decomposition import PLSRegression as PLSR
from sklearn.model_selection import cross_val_score

from chemfusekit.__utils import GraphMode, print_table, print_confusion_matrix, run_split_test
from chemfusekit.__utils import print_table, print_confusion_matrix, run_split_test, GraphMode
from .__base import BaseClassifierSettings, BaseDataModel, BaseClassifier, BaseReducer, ReducerDataModel


class PLSDASettings(BaseClassifierSettings):
"""Holds the settings for the PLSDA object."""

def __init__(self, components: int | None = None, output: GraphMode = GraphMode.NONE, test_split: bool = False):
def __init__(self, components: int | None = None, output: str = 'none', test_split: bool = False):
super().__init__(output, test_split)
if components is not None and components < 1:
raise ValueError("Invalid n_components number: should be a positive integer.")
Expand Down Expand Up @@ -136,7 +136,7 @@ def train(self):
if self.settings.output and self.settings.test_split:
x = self.data.x_data
y = self.data.x_train.Substance.astype('category').cat.codes
run_split_test(x, y, PLSR(self.settings.components), mode=self.settings.output)
run_split_test(x, y, PLSR(self.components), mode=self.settings.output)

def _select_feature_number(self, x, y):
# Auto-select the number of components
Expand Down
4 changes: 2 additions & 2 deletions chemfusekit/svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@

from sklearn.svm import SVC

from chemfusekit.__utils import GraphMode, run_split_test, print_confusion_matrix
from chemfusekit.__utils import run_split_test, print_confusion_matrix
from .__base import BaseClassifierSettings, BaseClassifier, BaseDataModel


class SVMSettings(BaseClassifierSettings):
"""Holds the settings for the SVM object."""
def __init__(self, kernel: str = 'linear', output: GraphMode = GraphMode.NONE, test_split: bool = False):
def __init__(self, kernel: str = 'linear', output: str = 'none', test_split: bool = False):
super().__init__(output, test_split)
if kernel not in ['linear', 'poly', 'gaussian', 'sigmoid']:
raise ValueError("Invalid type: must be linear, poly, gaussian or sigmoid")
Expand Down
4 changes: 2 additions & 2 deletions docs/docs/base/basesettings.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ Holds the settings for all classifier object. It's not meant for direct usage, o
## Syntax

```python
BaseSettings(output: GraphMode, test_split: false)
BaseSettings(output: str, test_split: false)
```

## Fields and constructor parameters
- `output`: toggles graph output mode. Defaults to [`GraphMode.NONE`](../utils/graphmode.md).
- `output`: toggles graph output. Defaults to [`none`] (other options: 'graphical', 'text'). Gets implicitly converted to a [`GraphMode` enum](../utils/graphmode.md).
- `test_split`: toggles the training split test phase. Defaults to `False`. Requires `output` to be set to `True` to work.

The constructor raises:
Expand Down
4 changes: 2 additions & 2 deletions docs/docs/base/reducerdatamodel.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ sidebar-position: 6

# ReducerDataModel class

This class models the output data for all dimensionality reduction operations (currently, the [`LDA`](../lda/lda), the [`PLSDA`](../plsda/plsda.md) and the [`PCA`](../pca/pca.md) operations).
This class models the output data for all dimensionality reduction operations (currently, the [`LDA`](../lda/lda.md), the [`PLSDA`](../plsda/plsda.md) and the [`PCA`](../pca/pca.md) operations).

It inherits from [`BaseDataModel`](../base/basedatamodel.md).
It inherits from [`BaseDataModel`](./basedatamodel.md).

## Syntax

Expand Down
20 changes: 10 additions & 10 deletions docs/docs/complete-workflow.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,25 @@ sidebar-position: 7
# Complete workflow

Here's a sequence diagram to represent an example workflow, from the raw data
tables to classification, including data fusion, PCA and training.
tables to classification, including data fusion, dimensionality reduction and training.

```plantuml
actor User
participant LLDF
participant PCA
participant DF
participant Reducer
participant Classifier
User -> LLDF : Upload training tables
User -> LLDF : Set parameters
User -> DF : Upload training tables
User -> DF : Set parameters
User -> Classifier : (optional) Upload model
LLDF -> PCA : Pass preprocessed / fused tables
LLDF --> User : Download fused tables
LLDF -> Classifier : Pass preprocessed / fused tables \nRun classification
PCA -> Classifier : (optional) Set number of components
DF -> Reducer : Pass preprocessed / fused tables
DF --> User : Download fused tables
DF -> Classifier : Pass preprocessed / fused tables \nRun classification
Reducer -> Classifier : (optional) Set number of components
Classifier --> User : classification results, graphs
PCA --> User : classification results, graphs
Reducer --> User : classification results, graphs
Classifier --> User : (optional) download trained model
User -> Classifier : pass data to classify
Expand Down
8 changes: 4 additions & 4 deletions docs/docs/df/dfsettings.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@
sidebar_position: 2
---

# LLDFSettings class
# DFSettings class

Holds the settings for the [`DF`](./df-class.md) object.

## Syntax

```python
DFSettings(output: GraphMode, method: str)
DFSettings(output: str, method: str)
```

## Fields and constructor parameters
- `output`: toggles graph output. Defaults to [`GraphMode.NONE`](../utils/graphmode.md).
- `output`: toggles graph output. Defaults to [`none`] (other options: 'graphical', 'text'). Gets implicitly converted to a [`GraphMode` enum](../utils/graphmode.md).
- `method`: a choice between `concat` (concatenation-based data fusion) and `outer` (outer matrix multiplication-based data fusion)

The constructor raises a `ValueError("DF: invalid method: must be 'concat' or 'outer'")` if an invalid parameter is provided to the `method` field.
Expand All @@ -24,5 +24,5 @@ The constructor raises a `ValueError("DF: invalid method: must be 'concat' or 'o
from chemfusekit.df import DFSettings

# Initialize the settings for low-level data fusion
df_settings = DFSettings(output=GraphMode.TEXT)
df_settings = DFSettings(output='text')
```
4 changes: 2 additions & 2 deletions docs/docs/knn/knn.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ KNN(settings: KNNSettings, data: LLDFModel)

## Fields

- `settings`: object of type [`KNNSettings`](/tesi/docs/knn/knnsettings). Contains the settings for
- `settings`: object of type [`KNNSettings`](./knnsettings.md). Contains the settings for
the `KNN` object.
- `data`: onject of type ['DFModel`](/tesi/docs/df/dfmodel). Contains the
- `data`: onject of type ['DFModel`](../df/dfmodel.md). Contains the
artifacts from the data fusion process.
- `model`: a `KNeighborsClassifier` model from `scikit-learn`. Defaults to `None`.

Expand Down
14 changes: 7 additions & 7 deletions docs/docs/knn/knnsettings.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ KNNSettings(
metric: str | Callable,
weights: str | Callable,
algorithm: str,
output: GraphMode,
output: str,
test_split: false
)
```
Expand All @@ -38,7 +38,7 @@ KNNSettings(
- `kd_tree`
- `brute`
or be a callable object.
- `output`: toggles graph output mode. Defaults to [`GraphMode.NONE`](../utils/graphmode.md).
- `output`: toggles graph output. Defaults to [`none`] (other options: 'graphical', 'text'). Gets implicitly converted to a [`GraphMode` enum](../utils/graphmode.md).
- `test_split`: toggles the training split test phase. Defaults to `False`. Requires `output` to be set to `True` to work.

The constructor raises:
Expand All @@ -51,14 +51,14 @@ The constructor raises:
## Example

```python
from chemfusekit.knn import KNNSettings, GraphMode
from chemfusekit.knn import KNNSettings

settings = KNNSettings(
n_neighbors=20, # pick 20 neighbors
metric='minkowski', # choose the metric
weights='distance', # choose the weight metric
metric='minkowski', # choose the metric
weights='distance', # choose the weight metric
algorithm='auto', # the best algorithm gets chosen automatically
output=GraphMode.GRAPHIC, # graph output is enabled
test_split=True # the model will be split-tested at the end of the training
output='graphical', # graph output is enabled
test_split=True # the model will be split-tested at the end of the training
)
```
10 changes: 5 additions & 5 deletions docs/docs/lda/ldasettings.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ Inherits from [`BaseSettings`](../base/basesettings.md).
## Syntax

```python
LDASettings(components: int, output: GraphMode, split_test: bool)
LDASettings(components: int, output: str, split_test: bool)
```

## Fields and constructor parameters

- `components`: the amount of components to be used in the LDA model. Defaults to 3.
- `output`: toggles graph output. Defaults to [`GraphMode.NONE`](../utils/graphmode.md).
- `output`: toggles graph output. Defaults to [`none`] (other options: 'graphical', 'text'). Gets implicitly converted to a [`GraphMode` enum](../utils/graphmode.md).
- `test_split`: toggles split testing. Defaults to `False`.


Expand All @@ -29,11 +29,11 @@ The constructor raises:
## Example

```python
from chemfusekit.lda import LDASettings, GraphMode
from chemfusekit.lda import LDASettings

settings = LDASettings(
components=(pca.components - 1), # one less component than the number determined by PCA
output=GraphMode.GRAPHIC, # graphs will be printed
test_split=True # split testing is enabled
output='graphical', # graphs will be printed
test_split=True # split testing is enabled
)
```
10 changes: 5 additions & 5 deletions docs/docs/lr/lrsettings.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Inherits from [`BaseSettings`](../base/basesettings.md).
## Syntax

```python
LRSettings(algorithm: str, output: GraphMode, test_split: bool)
LRSettings(algorithm: str, output: str, test_split: bool)
```

## Fields and constructor parameters
Expand All @@ -23,7 +23,7 @@ LRSettings(algorithm: str, output: GraphMode, test_split: bool)
- `newton-cholesky`
- `sag`
- `saga`
- `output`: toggles graph output. Defaults to [`GraphMode.NONE`](../utils/graphmode.md).
- `output`: toggles graph output. Defaults to [`none`] (other options: 'graphical', 'text'). Gets implicitly converted to a [`GraphMode` enum](../utils/graphmode.md).
- `test_split`: toggles split testing. Defaults to `False`.

The constructor raises:
Expand All @@ -34,11 +34,11 @@ The constructor raises:
## Example

```python
from chemfusekit.lr import LRSettings, GraphMode
from chemfusekit.lr import LRSettings

settings = LRSettings(
algorithm='newton-cg',
output=GraphMode.GRAPHIC, # graphs will be printed
test_split=True # split testing is enabled
output='graphical', # graphs will be printed
test_split=True # split testing is enabled
)
```
2 changes: 1 addition & 1 deletion docs/docs/pca/pca.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ PCA(settings: PCASettings, data: BaseDataModel)

## Fields

- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md.md). Contains the data to be analyzed.
- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md). Contains the data to be analyzed.
- `components`: Number of components for the PCA analysis. Defaults to 0.
- `model`: A `PCA` model from `scikit-learn`. Defaults to `None`.
- `settings`: object of type [`PCASettings`](./pcasettings.md). Contains the settings for
Expand Down
Loading

0 comments on commit 34068a4

Please sign in to comment.