diff --git a/chemfusekit/__base.py b/chemfusekit/__base.py index 0d1ff93..e1c63a6 100644 --- a/chemfusekit/__base.py +++ b/chemfusekit/__base.py @@ -114,6 +114,11 @@ def __init__(self, settings: BaseSettings, data: BaseDataModel): self.data = data self.model: BaseEstimator | None = None + @abstractmethod + def train(self): + """Trains the estimator model.""" + pass + @classmethod def from_file(cls, settings, model_path): """Creates a classifier instance from file""" diff --git a/chemfusekit/lldf.py b/chemfusekit/df.py similarity index 93% rename from chemfusekit/lldf.py rename to chemfusekit/df.py index 6abc178..98c1ae5 100644 --- a/chemfusekit/lldf.py +++ b/chemfusekit/df.py @@ -21,14 +21,14 @@ def __init__(self, file_path: str, sheet_name: str, preprocessing: str, class_co self.index_column = index_column -class LLDFDataModel(BaseDataModel): - """Models the output data from the LLDF operation""" +class DFDataModel(BaseDataModel): + """Models the output data from the DF operation""" def __init__(self, x_data: pd.DataFrame, x_train: pd.DataFrame, y: np.ndarray): super().__init__(x_data, x_train, y) -class LLDFSettings(BaseSettings): - """Holds the settings for the LLDF object.""" +class DFSettings(BaseSettings): + """Holds the settings for the DF object.""" def __init__(self, output: GraphMode = GraphMode.NONE): super().__init__(output) @@ -47,15 +47,15 @@ def _snv(input_data: np.ndarray): return output_data -class LLDF: +class DF: """Holds together all the data, methods and artifacts of the LLDF operation""" - def __init__(self, settings: LLDFSettings, tables: List[Table]): + def __init__(self, settings: DFSettings, tables: List[Table]): self.settings = settings self.tables = tables - self.fused_data: Optional[LLDFDataModel] = None + self.fused_data: Optional[DFDataModel] = None - def lldf(self): - """Performs low-level data fusion""" + def fuse(self): + """Performs data fusion""" x_vector = [] for table in self.tables: try: @@ -114,7 +114,7 @@ def lldf(self): preprocessed_x = x else: raise SyntaxError( - f"LLDF: this type of preprocessing does not exist ({table.preprocessing=})" + f"DF: this type of preprocessing does not exist ({table.preprocessing=})" ) if self.settings.output is GraphMode.GRAPHIC: @@ -182,7 +182,7 @@ def lldf(self): axis=1 ) - self.fused_data = LLDFDataModel(x_data, x_train, y) + self.fused_data = DFDataModel(x_data, x_train, y) def export_data(self, export_path: str, sheet_name: str = 'Sheet1'): """Exports the data fusion artifacts to a file""" diff --git a/chemfusekit/knn.py b/chemfusekit/knn.py index eb1b06d..b74deaa 100644 --- a/chemfusekit/knn.py +++ b/chemfusekit/knn.py @@ -38,7 +38,7 @@ class KNN(BaseClassifier): def __init__(self, settings: KNNSettings, data: BaseDataModel): super().__init__(settings, data) - def knn(self): + def train(self): """Performs k-Nearest Neighbors Analysis""" # Prepare and train the kNN model knn = KNeighborsClassifier( diff --git a/chemfusekit/lda.py b/chemfusekit/lda.py index 586d5a9..f573604 100644 --- a/chemfusekit/lda.py +++ b/chemfusekit/lda.py @@ -42,7 +42,7 @@ def __init__(self, settings: LDASettings, data: BaseDataModel): if isinstance(data, PCADataModel): self.settings.components = data.components - 1 - def lda(self): + def train(self): """Performs Linear Discriminant Analysis""" lda = LD(n_components=self.settings.components) # N-1 where N are the classes @@ -144,7 +144,7 @@ def rescaled_data(self) -> BaseDataModel: settings_backup = copy(self.settings) self.settings.output = GraphMode.NONE self.settings.test_split = False - self.lda() + self.train() self.settings = settings_backup x_data = pd.DataFrame(self.model.transform(self.data.x_data)) diff --git a/chemfusekit/lr.py b/chemfusekit/lr.py index 2de8b39..6c8997e 100644 --- a/chemfusekit/lr.py +++ b/chemfusekit/lr.py @@ -35,7 +35,7 @@ def __init__(self, settings: LRSettings, data: BaseDataModel): else: self.array_scores = data.x_train.drop('Substance', axis=1).values - def lr(self): + def train(self): """Performs Logistic Regression""" # Let's build our model on the training set diff --git a/chemfusekit/pca.py b/chemfusekit/pca.py index 01f36f6..ed0a223 100644 --- a/chemfusekit/pca.py +++ b/chemfusekit/pca.py @@ -52,7 +52,7 @@ def __init__(self, settings: PCASettings, data: BaseDataModel): self.model: Optional[PC] = None self.array_scores: Optional[np.ndarray] = None - def pca(self): + def train(self): """Performs Principal Component Analysis.""" # Read from the data fusion object @@ -290,7 +290,7 @@ def rescaled_data(self) -> PCADataModel: if self.model is None: settings_backup = copy(self.settings) self.settings.output = GraphMode.NONE - self.pca() + self.train() self.pca_stats() self.settings = settings_backup diff --git a/chemfusekit/plsda.py b/chemfusekit/plsda.py index 7a1b436..174481f 100644 --- a/chemfusekit/plsda.py +++ b/chemfusekit/plsda.py @@ -29,7 +29,7 @@ class PLSDA(BaseClassifier): def __init__(self, settings: PLSDASettings, data: BaseDataModel): super().__init__(settings, data) - def plsda(self): + def train(self): """Performs Partial Least Squares Discriminant Analysis""" x = self.data.x_data y = self.data.x_train.Substance.astype('category').cat.codes diff --git a/chemfusekit/svm.py b/chemfusekit/svm.py index 9d29c82..6114eec 100644 --- a/chemfusekit/svm.py +++ b/chemfusekit/svm.py @@ -21,7 +21,7 @@ class SVM(BaseClassifier): def __init__(self, settings: SVMSettings, data: BaseDataModel): super().__init__(settings, data) - def svm(self): + def train(self): """Performs Support Vector Machine analysis""" # Linear kernel diff --git a/docs/cookbook/case-study-data-fusion.md b/docs/cookbook/case-study-data-fusion.md index a1dd2a9..be4b25b 100644 --- a/docs/cookbook/case-study-data-fusion.md +++ b/docs/cookbook/case-study-data-fusion.md @@ -90,10 +90,10 @@ On the other hand, the GC data does not require any preprocessing. The data obta The most significant aspect of data preprocessing in this case study is data fusion. The three tables contained in the Excel datasheet are concatenated row-wise to form a single table that contains the data from the IMS and QEPAS spectrometers, as well as the GC retention times. ```python -from chemfusekit.lldf import LLDFSettings, LLDF, GraphMode, Table +from chemfusekit.df import DFSettings, DF, GraphMode, Table # Initialize the settings to produce graphical output for the operation -settings = LLDFSettings(output=GraphMode.GRAPHIC) +settings = DFSettings(output=GraphMode.GRAPHIC) # Set up the import settings for the first table (IMS spectral data) table1 = Table( @@ -126,7 +126,7 @@ table3 = Table( tables = [table1, table2, table3] # Let's pass the settings and the tables to the LLDF constructor -lldf = LLDF(settings, tables) +lldf = DF(settings, tables) # Let's finally perform data fusion with the lldf() method! lldf.lldf() @@ -156,7 +156,7 @@ pca_settings = PCASettings( # Initialize and run PCA on the fused dataset pca = PCA(pca_settings, fused_data) -pca.pca() +pca.train() # Run the tests and statistics pca.pca_stats() @@ -182,7 +182,7 @@ lr_settings = LRSettings(output=GraphMode.GRAPHIC, test_split=True) # Initialize and train LR lr = LR(lr_settings, reduced_dataset) -lr.lr() +lr.train() ``` ### Model evaluation @@ -206,12 +206,12 @@ pca.export_model("DMMP_acetone_pca.sklearn") In the future, when we need to classify DMMP and acetone on a new dataset, we can simply import the new dataset, perform the necessary data fusion, reduce the dimensionality through PCA, import the pre-trained `LR` model, and use it to classify the data. This streamlined process allows for efficient and consistent classification of DMMP and acetone samples. ```python -from chemfusekit.lldf import LLDFSettings, LLDF, GraphMode, Table +from chemfusekit.df import DFSettings, DF, GraphMode, Table from chemfusekit.pca import PCASettings, PCA from chemfusekit.lr import LRSettings, LR # Data fusion -lldf_settings = LLDFSettings(output=GraphMode.GRAPHIC) +lldf_settings = DFSettings(output=GraphMode.GRAPHIC) table1 = Table( file_path='new_dataset.xlsx', sheet_name='IMS', @@ -234,7 +234,7 @@ table3 = Table( index_column='Sample_id' ) tables = [table1, table2, table3] -lldf = LLDF(lldf_settings, tables) +lldf = DF(lldf_settings, tables) lldf.lldf() fused_data = lldf.fused_data diff --git a/docs/cookbook/data-operations.md b/docs/cookbook/data-operations.md index 6d56f0c..8092eb0 100644 --- a/docs/cookbook/data-operations.md +++ b/docs/cookbook/data-operations.md @@ -148,9 +148,9 @@ Even though the header names are slightly different, the content of the first tw The `LLDF` module allows us to join these two tables (the current and the one from the previous examples) to form a single dataset that contains both spectral data and retention times. Let's see how. ```python -from chemfusekit.lldf import LLDFSettings, LLDF, GraphMode, Table +from chemfusekit.df import DFSettings, DF, GraphMode, Table -settings = LLDFSettings() # Initialize the default settings +settings = DFSettings() # Initialize the default settings # Set up the import settings for the first table (spectral data) table1 = Table( @@ -173,7 +173,7 @@ table2 = Table( tables = [Table1, Table2] # Let's pass the settings and the tables to the LLDF constructor -lldf = LLDF(settings, tables) +lldf = DF(settings, tables) # Let's finally perform data fusion with the lldf() method! lldf.lldf() diff --git a/docs/cookbook/structure.md b/docs/cookbook/structure.md index 2cab077..5e31cca 100644 --- a/docs/cookbook/structure.md +++ b/docs/cookbook/structure.md @@ -72,7 +72,7 @@ Let's assume your new data is called `new_data`. Knowing that the training data, ```python knn.data = new_data -knn.knn() +knn.train() ``` The training method is always called like its container class, but in lower case. To train a `KNN` model, like in this case, you just have to call `.knn()` on it. Same goes for `.lda()` on `LDA`, `.lldf()` on `LLDF`, and so on. @@ -124,65 +124,86 @@ The classifiers themselves all inherit from a base class called [`BaseClassifier ```mermaid classDiagram - - <> BaseActionClass - class BaseActionClass { - __init__(settings, data) - +settings: BaseSettings - +data: BaseDataModel - +model: sklearn model - import_model(import_path: str) - export_model(export_path: str) - } + BaseDataModel <|-- LLDFDataModel + BaseDataModel <|-- ComponentDataModel + ComponentDataModel <|.. PCADataModel + ComponentDataModel <|.. LDADataModel - <> BaseClassifier - class BaseClassifier { - +settings: BaseSettings - __init__(settings, data) - predict(x_data: pd.DataFrame) + class BaseDataModel { + x_data + x_train + y + load_from_file() + export_to_file() } - <> BaseReducer - class BaseReducer { - +export_data() - +reduce() + <> ComponentDataModel + class ComponentDataModel { + n_components } - class KNN { - ... + class PCADataModel { + array_scores } - class LDA { - ... + class LDADataModel { + } - class LR { - ... + class LLDFDataModel { + tables } - class PLSDA { - ... + BaseActionClass <|.. BaseReducer + BaseActionClass <|.. BaseClassifier + + BaseDataModel *-- BaseActionClass + + <> BaseActionClass + class BaseActionClass { + train() + settings + data ~BaseDataModel~ + model + from_file() + import_model() + export_model() } - class SVM { - ... + BaseReducer <|.. PCA + BaseReducer <|.. LDA + BaseReducer <|.. PLSDA + + <> BaseReducer + class BaseReducer { + components + rescaled_data + export_data() + reduce() } + <> BaseClassifier + class BaseClassifier { + predict() + } + + BaseClassifier <| .. LDA + BaseClassifier <| .. LR + BaseClassifier <| .. SVM + BaseClassifier <| .. KNN + BaseClassifier <| .. PLSDA + class PCA { pca_stats() } - BaseClassifier <|.. KNN - BaseClassifier <|.. LDA - BaseClassifier <|.. LR - BaseClassifier <|.. PLSDA - BaseClassifier <|.. SVM - - BaseReducer <|.. PCA - BaseReducer <|.. LDA + class LLDF { + fuse_data() + } - BaseActionClass <|.. BaseReducer - BaseActionClass <|.. BaseClassifier + class LR { + array_scores + } ``` @@ -216,6 +237,7 @@ classDiagram This allows all the classifiers to use the `LLDF` data, dimension-reduced `PCA` data, or any other type of data as long as it follows the `BaseDataModel` template. + ## File import and export All the data models (`BaseDataModel`, and its derived, `LLDFDataModel` and `PCADataModel`) can export their content to Excel tables. diff --git a/docs/docs/lldf/_category_.json b/docs/docs/df/_category_.json similarity index 100% rename from docs/docs/lldf/_category_.json rename to docs/docs/df/_category_.json diff --git a/docs/docs/lldf/lldf-class.md b/docs/docs/df/df-class.md similarity index 69% rename from docs/docs/lldf/lldf-class.md rename to docs/docs/df/df-class.md index 7cd653e..7999a5b 100644 --- a/docs/docs/lldf/lldf-class.md +++ b/docs/docs/df/df-class.md @@ -2,19 +2,19 @@ sidebar_position: 1 --- -# LLDF class +# DF class -The `LLDF` class is used for _low-level data fusion_. +The `DF` class is used for _data fusion_ (low-level or mid-level). ## Syntax ```python -LLDF(lldf_settings: LLDFSettings, tables: List[Table]) +DF(df_settings: DFSettings, tables: List[Table]) ``` ## Constructor parameters -- `lldf_settings`: [`LLDFSettings`](./lldfsettings) +- `df_settings`: [`DFSettings`](./dfsettings) The settings for the LLDF object. @@ -24,7 +24,7 @@ LLDF(lldf_settings: LLDFSettings, tables: List[Table]) ## Fields -- `settings`: [`LLDFSettings`](./lldfsettings) +- `settings`: [`DFSettings`](./dfsettings) The settings for the LLDF object. @@ -32,18 +32,18 @@ LLDF(lldf_settings: LLDFSettings, tables: List[Table]) A list of `Table` objects containing info about the files to import -- `fused_data`: [`LLDFModel`](./lldfmodel.md) +- `fused_data`: [`DFModel`](./dfmodel.md) The resulting model containing the data fusion artifacts. ## Methods - `_snv(self, input_data)`: static method to rescale input arrays -- `lldf(self)`: performs low-level data fusion on the data passed in the settings +- `fuse(self)`: performs data fusion on the data passed in the settings - *raises*: - `FileNotFoundError("Error opening the selected files.")` if the files specified in the settings are not valid - - `SyntaxError("LLDF: this type of preprocessing does not exist")` + - `SyntaxError("DF: this type of preprocessing does not exist")` if the preprocessing method specified in the settings is not valid - `export_data(self, export_path)`: exports the data fusion artifacts to an Excel file - *raises*: @@ -56,12 +56,12 @@ LLDF(lldf_settings: LLDFSettings, tables: List[Table]) ## Example ```python -from chemfusekit.lldf import LLDF +from chemfusekit.df import DF # Initialize and run low-level data fusion -lldf = LLDF(tables, lldf_settings) -lldf.lldf() +df = DF(tables, lldf_settings) +df.fuse() # Export the LLDF data to an Excel file -lldf.export_data('output_file.xlsx') +df.export_data('output_file.xlsx') ``` \ No newline at end of file diff --git a/docs/docs/lldf/lldfmodel.md b/docs/docs/df/dfmodel.md similarity index 81% rename from docs/docs/lldf/lldfmodel.md rename to docs/docs/df/dfmodel.md index 60efd09..0c0c159 100644 --- a/docs/docs/lldf/lldfmodel.md +++ b/docs/docs/df/dfmodel.md @@ -2,16 +2,16 @@ sidebar_position: 3 --- -# LLDFDataModel class +# DFDataModel class -This class models the output data from the [`LLDF`](./lldf-class.md) operation. +This class models the output data from the [`DF`](./df-class.md) operation. It inherits from the [`BaseDataModel`](../base/basedatamodel.md). ## Syntax ```python -LLDFModel(x_data: pd.DataFrame, x_train: pd.DataFrame, y: np.ndarray) +DFModel(x_data: pd.DataFrame, x_train: pd.DataFrame, y: np.ndarray) ``` ## Fields and constructor parameters diff --git a/docs/docs/lldf/lldfsettings.md b/docs/docs/df/dfsettings.md similarity index 60% rename from docs/docs/lldf/lldfsettings.md rename to docs/docs/df/dfsettings.md index 30f985f..20e923e 100644 --- a/docs/docs/lldf/lldfsettings.md +++ b/docs/docs/df/dfsettings.md @@ -4,12 +4,12 @@ sidebar_position: 2 # LLDFSettings class -Holds the settings for the [`LLDF`](./lldf-class.md) object. +Holds the settings for the [`DF`](./df-class.md) object. ## Syntax ```python -LLDFSettings(output: GraphMode) +DFSettings(output: GraphMode) ``` ## Fields and constructor parameters @@ -18,8 +18,8 @@ LLDFSettings(output: GraphMode) ## Example ```python -from chemfusekit.lldf import LLDFSettings +from chemfusekit.df import DFSettings # Initialize the settings for low-level data fusion -lldf_settings = LLDFSettings(output=GraphMode.TEXT) +df_settings = DFSettings(output=GraphMode.TEXT) ``` \ No newline at end of file diff --git a/docs/docs/lldf/index.mdx b/docs/docs/df/index.mdx similarity index 71% rename from docs/docs/lldf/index.mdx rename to docs/docs/df/index.mdx index 01c1d16..eb05ab7 100644 --- a/docs/docs/lldf/index.mdx +++ b/docs/docs/df/index.mdx @@ -2,7 +2,7 @@ import DocCardList from '@theme/DocCardList'; # LLDF Module -A module for low-level data fusion. +A module for data fusion (low-level or mid-level). # Members @@ -19,29 +19,29 @@ classDiagram __init__(file_path, sheet_name, preprocessing) } - class LLDFSettings { + class DFSettings { +GraphOutput output __init__(output) } - class LLDF { - +LLDFSettings settings + class DF { + +DFSettings settings +Table[] tables - +LLDFDataModel | None fused_data + +DFDataModel | None fused_data lldf() -_snv() +export_data(export_path: str) __init__(settings, tables[]) } - class LLDFDataModel { + class DFDataModel { +x_data: pd.DataFrame +x_train: pd.DataFrame +y: np.ndarray __init__(x_data, x_train, y) } - LLDF *-- LLDFModel - LLDF *-- Table - LLDF *-- LLDFSettings + DF *-- DFModel + DF *-- Table + DF *-- DFSettings ``` \ No newline at end of file diff --git a/docs/docs/lldf/table.md b/docs/docs/df/table.md similarity index 79% rename from docs/docs/lldf/table.md rename to docs/docs/df/table.md index 200181e..d16c82b 100644 --- a/docs/docs/lldf/table.md +++ b/docs/docs/df/table.md @@ -6,7 +6,7 @@ sidebar_position: 4 Holds the information for a single table to import. -The [`LLDF`](./lldf-class.md) object takes a list of `Table` as a parameter. +The [`DF`](./df-class.md) object takes a list of `Table` as a parameter. ## Syntax @@ -32,14 +32,14 @@ Table( ## Example ```python -from chemfusekit.lldf import Table +from chemfusekit.df import Table # Create a table table1 = Table( file_path='tests/qepas.xlsx', sheet_name='Sheet1', - preprocessing='snv', # normalization preprocessing; other options: savgol, both or none + preprocessing='snv', # normalization preprocessing; other options: savgol, both or none class_column: 'substance', # The column called 'substance' in the datase will be treated as the class column - index_column: 'sample' # The column named 'index' in the dataset will be treated as the index column +index_column: 'sample' # The column named 'index' in the dataset will be treated as the index column ) ``` \ No newline at end of file diff --git a/docs/docs/knn/knn.md b/docs/docs/knn/knn.md index 2cecad5..01dafcf 100644 --- a/docs/docs/knn/knn.md +++ b/docs/docs/knn/knn.md @@ -22,7 +22,7 @@ KNN(settings: KNNSettings, data: LLDFModel) - `settings`: object of type [`KNNSettings`](/tesi/docs/knn/knnsettings). Contains the settings for the `KNN` object. -- `fused_data`: onject of type ['LLDFModel`](/tesi/docs/lldf/lldfmodel). Contains the +- `data`: onject of type ['DFModel`](/tesi/docs/df/dfmodel). Contains the artifacts from the data fusion process. - `model`: a `KNeighborsClassifier` model from `scikit-learn`. Defaults to `None`. @@ -40,7 +40,7 @@ from chemfusekit.knn import KNN # Initialize and run the LDA class knn = KNN(settings, lldf.fused_data) -knn.knn() +knn.train() # Run predictions knn.predict(x_data) diff --git a/docs/docs/lda/lda.md b/docs/docs/lda/lda.md index bf958e6..9d88792 100644 --- a/docs/docs/lda/lda.md +++ b/docs/docs/lda/lda.md @@ -31,7 +31,7 @@ LDA(settings: LDASettings, data: BaseDataModel) ## Methods -- `lda(self)`: performs Linear Discriminant Analysis +- `train(self)`: performs Linear Discriminant Analysis - `__print_prediction_graphs(self, y_test, y_pred)`: helper function to print graphs and stats about LDA predictions - `predict(self, x_data)`: performs LDA prediction once the model is trained. @@ -45,5 +45,5 @@ from chemfusekit.lda import LDA # Initialize and run the LDA class lda = LDA(lldf.fused_data, settings) -lda.lda() +lda.train() ``` \ No newline at end of file diff --git a/docs/docs/lr/lr.md b/docs/docs/lr/lr.md index f7ded6f..1a9db3e 100644 --- a/docs/docs/lr/lr.md +++ b/docs/docs/lr/lr.md @@ -44,7 +44,7 @@ from chemfusekit.lr import LR # Initialize and train the LR class lr = LR(settings, array_scores, y) -lr.lr() +lr.train() # Perform prediction lr.predict(x_sample) diff --git a/docs/docs/pca/pca.md b/docs/docs/pca/pca.md index 69aa3ac..b3d427e 100644 --- a/docs/docs/pca/pca.md +++ b/docs/docs/pca/pca.md @@ -21,7 +21,7 @@ PCA(settings: PCASettings, data: BaseDataModel) ## Fields -- `fused_data`: object of type [`LLDF`](../lldf/lldf-class.md). Contains the data to be analyzed. +- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md.md). Contains the data to be analyzed. - `components`: Number of components for the PCA analysis. Defaults to 0. - `model`: A `PCA` model from `scikit-learn`. Defaults to `None`. - `settings`: object of type [`PCASettings`](./pcasettings.md). Contains the settings for @@ -29,7 +29,7 @@ PCA(settings: PCASettings, data: BaseDataModel) ## Methods -- `pca(self)`: performs Principal Component Analysis +- `train(self)`: performs Principal Component Analysis - `pca_stats(self)`: produces PCA-related statistics and graphs. - `export_data(self) -> PCADataModel`: exports a [`PCADataModel`](./pcadatamodel.md) with rescaled `array_scores` while maintaining the original `x_data`, `x_train` and `y` inherited by the constructor. - `@classmethod from_file(cls, settings: PCASettings, model_path: str)`: creates a PCA instance from a file containing its sklearn core model. @@ -53,7 +53,7 @@ from chemfusekit.pca import PCA # Initialize and run the PCA class pca = PCA(lldf.fused_data, pca_settings) -pca.pca() +pca.train() # Print the number of components and the statistics print(pca.components) diff --git a/docs/docs/plsda/plsda.md b/docs/docs/plsda/plsda.md index 3287fd6..a7d8d76 100644 --- a/docs/docs/plsda/plsda.md +++ b/docs/docs/plsda/plsda.md @@ -22,13 +22,12 @@ PLSDA(settings: PLSDASettings, data: BaseDataModel) - `settings`: object of type [`PLSDASettings`](./plsdasettings.md). Contains the settings for the `PLSDA` object. -- `fused_data`: onject of type ['LLDFModel`](../lldf/lldfmodel.md). Contains the - artifacts from the data fusion process. +- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md.md). Contains the data to be analyzed. - `model`: a `PLSRegression` model from `scikit-learn`. Defaults to `None`. ## Methods -- `plsda(self)`: trains the Partial Least Squares Discriminant Analysis model. +- `train(self)`: trains the Partial Least Squares Discriminant Analysis model. - `predict(self, x_data)`: performs PLSDA prediction once the model is trained. - *raises*: - `RuntimeError("The PLSDA model is not trained yet!")` if the `PLSDA` model hasn't been trained yet @@ -40,7 +39,7 @@ from chemfusekit.knn import PLSDA # Initialize and run the LDA class plsda = PLSDA(settings, lldf.fused_data) -plsda.plsda() +plsda.train() # Run predictions plsda.predict(x_data) diff --git a/docs/docs/svm/svm.md b/docs/docs/svm/svm.md index b984ff7..a13cd3a 100644 --- a/docs/docs/svm/svm.md +++ b/docs/docs/svm/svm.md @@ -24,14 +24,14 @@ The constructor raises: ## Fields -- `fused_data`: object of type [`LLDFModel`](../lldf/lldfmodel.md). Contains the data to be analyzed. +- `data`: object of type [`BaseDataModel`](../base/basedatamodel.md). Contains the data to be analyzed. - `settings`: object of type [`SVMSettings`](./svmsettings.md). Contains the settings for - the `PCA` object. + the `SVM` object. - `pca_model`: an `SVM` model from `scikit-learn`. Defaults to `None`. ## Methods -- `svm(self)`: performs Support Vector Machine analysis. +- `train(self)`: performs Support Vector Machine analysis. - *raises*: - `ValueError(SVM: this type of kernel does not exist.")` if the kernel type is invalid - `predict(self, x_data)`: performs classification based on SVM @@ -44,6 +44,6 @@ The constructor raises: from chemfusekit.svm import SVM # Initialize and run the SVM class -svm = LDA(lldf.fused_data, settings) -svm.svm() +svm = SVM(lldf.fused_data, settings) +svm.train() ``` \ No newline at end of file diff --git a/docs/docs/tutorial.md b/docs/docs/tutorial.md index 7945b21..163fa6a 100644 --- a/docs/docs/tutorial.md +++ b/docs/docs/tutorial.md @@ -24,10 +24,10 @@ The `LLDF` class will take these settings and perform low-level data fusion on t two Excel tables we picked. ```python -from chemfusekit.lldf import LLDFSettings, LLDF +from chemfusekit.df import DFSettings, DF # Initialize the settings for low-level data fusion -lldf_settings = LLDFSettings( +lldf_settings = DFSettings( qepas_path='tests/qepas.xlsx', qepas_sheet='Sheet1', rt_path='tests/rt.xlsx', @@ -36,7 +36,7 @@ lldf_settings = LLDFSettings( ) # Initialize and run low-level data fusion -lldf = LLDF(lldf_settings) +lldf = DF(lldf_settings) lldf.lldf() ``` @@ -59,15 +59,15 @@ from chemfusekit.pca import PCASettings, PCA # Initialize the settings for Principal Component Analysis pca_settings = PCASettings( - target_variance=0.99, # the minimum acceptable level of cumulative explained covariance + target_variance=0.99, # the minimum acceptable level of cumulative explained covariance confidence_level=0.05, # the desired level of confidence initial_components=10, # the initial amount of components for the iterative analysis - output=GraphMode.GRAPHIC # graphs will be printed + output=GraphMode.GRAPHIC # graphs will be printed ) # Initialize and run the PCA class pca = PCA(lldf.fused_data, pca_settings) -pca.pca() +pca.train() # Print the number of components and the statistics print(pca.components) @@ -83,14 +83,14 @@ component than what we figured out from the `PCA` analysis of the previous step. from chemfusekit.lda import LDASettings, LDA settings = LDASettings( - components=(pca.components - 1), # one less component than the number determined by PCA - output=GraphMode.GRAPHIC, # graphs will be printed - test_split=True # Split testing is enabled + components=(pca.components - 1), # one less component than the number determined by PCA + output=GraphMode.GRAPHIC, # graphs will be printed + test_split=True # Split testing is enabled ) # Initialize and run the LDA class lda = LDA(lldf.fused_data, settings) -lda.lda() +lda.train() ``` ## Fourth step: prediction diff --git a/tests/test_base.py b/tests/test_base.py index 38bd2b7..a4fef99 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -5,7 +5,7 @@ import numpy as np -from chemfusekit.lldf import LLDFSettings, LLDF, Table +from chemfusekit.df import DFSettings, DF, Table from chemfusekit.__base import BaseDataModel from chemfusekit.lda import LDASettings, LDA @@ -15,7 +15,7 @@ def test_import_export(self): """Test case for table import and export.""" # Import and fuse data from tables - lldf_settings = LLDFSettings() + df_settings = DFSettings() table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -27,11 +27,11 @@ def test_import_export(self): preprocessing="none" ) tables = [table1, table2] - lldf = LLDF(lldf_settings, tables) - lldf.lldf() + df = DF(df_settings, tables) + df.fuse() # Export the fused dataset to file - lldf.export_data("export_test.xlsx") + df.export_data("export_test.xlsx") # Import the fused dataset from file imported_data = BaseDataModel.load_from_file("export_test.xlsx", "Sheet1") @@ -41,11 +41,11 @@ def test_import_export(self): tolerance = 1e-6 self.assertTrue( # Compare the DataFrames using numpy.allclose(): true if they match within tolerance - np.allclose(lldf.fused_data.x_data.values, imported_data.x_data.values, atol=tolerance) + np.allclose(df.fused_data.x_data.values, imported_data.x_data.values, atol=tolerance) ) self.assertTrue( # The comparison between ndarrays returns an array of booleans, collapsed by "all()" - (lldf.fused_data.y == imported_data.y).all() + (df.fused_data.y == imported_data.y).all() ) # Second phase: re-export and re-import from BaseDataModel @@ -53,8 +53,8 @@ def test_import_export(self): reimported_data = BaseDataModel.load_from_file("export_test_2.xlsx") # Assert the equality between the re-exported data and the re-reimported data - self.assertTrue(np.allclose(lldf.fused_data.x_data.values, imported_data.x_data.values, atol=tolerance)) - self.assertTrue((lldf.fused_data.y == reimported_data.y).all()) + self.assertTrue(np.allclose(df.fused_data.x_data.values, imported_data.x_data.values, atol=tolerance)) + self.assertTrue((df.fused_data.y == reimported_data.y).all()) # Clean up os.remove("export_test.xlsx") @@ -63,7 +63,7 @@ def test_import_export(self): def test_model_import(self): """Integration test for model dumping and reloading.""" # Let's start by creating and training an LDA model - lldf_settings = LLDFSettings() + lldf_settings = DFSettings() table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -75,17 +75,17 @@ def test_model_import(self): preprocessing="none" ) tables = [table1, table2] - lldf = LLDF(lldf_settings, tables) - lldf.lldf() + df = DF(lldf_settings, tables) + df.fuse() lda_settings = LDASettings() - lda = LDA(lda_settings, lldf.fused_data) - lda.lda() + lda = LDA(lda_settings, df.fused_data) + lda.train() # Dump the model to file lda.export_model("modelfile.sklearn") # Reload the model - lda2 = LDA(lda_settings, lldf.fused_data) + lda2 = LDA(lda_settings, df.fused_data) lda2.import_model("modelfile.sklearn") # Check whether the imported model is the same as the exported model @@ -98,7 +98,7 @@ def test_from_file(self): """Test case for classifier import from file""" '''Integration test for model dumping and reloading.''' # Let's start by creating and training an LDA model - lldf_settings = LLDFSettings() + df_settings = DFSettings() table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -110,11 +110,11 @@ def test_from_file(self): preprocessing="none" ) tables = [table1, table2] - lldf = LLDF(lldf_settings, tables) - lldf.lldf() + df = DF(df_settings, tables) + df.fuse() lda_settings = LDASettings() - lda = LDA(lda_settings, lldf.fused_data) - lda.lda() + lda = LDA(lda_settings, df.fused_data) + lda.train() # Dump the model to file lda.export_model("modelfile.sklearn") diff --git a/tests/test_lldf.py b/tests/test_df.py similarity index 71% rename from tests/test_lldf.py rename to tests/test_df.py index 7d787ae..91a2380 100644 --- a/tests/test_lldf.py +++ b/tests/test_df.py @@ -1,6 +1,6 @@ """This module contains the test cases for the LLDF module.""" import unittest -from chemfusekit.lldf import LLDFSettings, LLDF, GraphMode, Table +from chemfusekit.df import DFSettings, DF, GraphMode, Table class TestLLDF(unittest.TestCase): @@ -9,7 +9,7 @@ class TestLLDF(unittest.TestCase): def test_file_loading(self): """Test case against file loading errors.""" # load a non-existent file on purpose - settings = LLDFSettings( + settings = DFSettings( output=GraphMode.NONE ) @@ -20,13 +20,13 @@ def test_file_loading(self): ) files = [table1] - lldf = LLDF(settings=settings, tables=files) - self.assertRaises(FileNotFoundError, lldf.lldf) + df = DF(settings=settings, tables=files) + self.assertRaises(FileNotFoundError, df.fuse) def test_preprocessing_techniques(self): """Test case against wrong preprocessing user input.""" with self.assertRaises(SyntaxError): - settings = LLDFSettings( + settings = DFSettings( output=GraphMode.NONE ) @@ -36,39 +36,39 @@ def test_preprocessing_techniques(self): preprocessing='qpl' ) - lldf = LLDF(settings, [table1]) - lldf.lldf() + df = DF(settings, [table1]) + df.fuse() # Now a correct value: - settings = LLDFSettings(output=GraphMode.NONE) + settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path='tests/qepas.xlsx', sheet_name='Sheet1', preprocessing='snv' ) - lldf = LLDF(settings, [table1]) - lldf.lldf() + df = DF(settings, [table1]) + df.fuse() def test_export(self): """Test case against wrong export settings.""" - settings = LLDFSettings(output=GraphMode.NONE) + settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path='tests/qepas.xlsx', sheet_name='Sheet1', preprocessing='snv' ) - lldf = LLDF(settings, [table1]) + df = DF(settings, [table1]) # Try exporting data before data fusion with self.assertRaises(RuntimeError): - lldf.export_data('path') + df.export_data('path') # Perform data fusion - lldf.lldf() + df.fuse() # Try exporting data to an invalid path with self.assertRaises(ValueError): - lldf.export_data('$£=0\//|') + df.export_data('$£=0\//|') if __name__ == '__main__': diff --git a/tests/test_knn.py b/tests/test_knn.py index 32fdf26..6cbe2bd 100644 --- a/tests/test_knn.py +++ b/tests/test_knn.py @@ -5,7 +5,7 @@ import numpy as np from chemfusekit.knn import KNNSettings, KNN, GraphMode -from chemfusekit.lldf import LLDFSettings, LLDF, LLDFDataModel, Table +from chemfusekit.df import DFSettings, DF, DFDataModel, Table class TestKNN(unittest.TestCase): @@ -66,7 +66,7 @@ def test_knn_settings(self): def test_knn_constructor(self): """Test case against constructor errors.""" # Perform preliminary data fusion - lldf_settings = LLDFSettings(output=GraphMode.NONE) + df_settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -77,24 +77,24 @@ def test_knn_constructor(self): sheet_name="Sheet1", preprocessing="none" ) - lldf = LLDF(lldf_settings, [table1, table2]) - lldf.lldf() + df = DF(df_settings, [table1, table2]) + df.fuse() # settings parameter - wrong_settings = LLDFDataModel(pd.DataFrame([1]), pd.DataFrame([1]), np.asarray([1])) + wrong_settings = DFDataModel(pd.DataFrame([1]), pd.DataFrame([1]), np.asarray([1])) with self.assertRaises(TypeError): - KNN(wrong_settings, lldf.fused_data) # pass an object of the wrong class as settings + KNN(wrong_settings, df.fused_data) # pass an object of the wrong class as settings # fused_data parameter knn_settings = KNNSettings() - wrong_fused_data = lldf_settings + wrong_fused_data = df_settings with self.assertRaises(TypeError): KNN(knn_settings, wrong_fused_data) # pass an object of the wrong class as fused_data def test_knn(self): """Integration test case for the training function.""" # Perform preliminary data fusion - lldf_settings = LLDFSettings(output=GraphMode.NONE) + df_settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -105,28 +105,28 @@ def test_knn(self): sheet_name="Sheet1", preprocessing="none" ) - lldf = LLDF(lldf_settings, [table1, table2]) - lldf.lldf() + df = DF(df_settings, [table1, table2]) + df.fuse() # Set up and run KNN (no output) knn_settings = KNNSettings() - knn = KNN(knn_settings, lldf.fused_data) - knn.knn() + knn = KNN(knn_settings, df.fused_data) + knn.train() # With graph output knn_settings = KNNSettings(output=GraphMode.GRAPHIC) - knn = KNN(knn_settings, lldf.fused_data) - knn.knn() + knn = KNN(knn_settings, df.fused_data) + knn.train() # With text output knn_settings = KNNSettings(output=GraphMode.TEXT) - knn = KNN(knn_settings, lldf.fused_data) - knn.knn() + knn = KNN(knn_settings, df.fused_data) + knn.train() def test_prediction(self): """Test case against prediction parameter issues.""" # Perform preliminary data fusion - lldf_settings = LLDFSettings(output=GraphMode.NONE) + df_settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -137,15 +137,15 @@ def test_prediction(self): sheet_name="Sheet1", preprocessing="none" ) - lldf = LLDF(lldf_settings, [table1, table2]) - lldf.lldf() + df = DF(df_settings, [table1, table2]) + df.fuse() # Set up KNN without training it knn_settings = KNNSettings() - knn = KNN(knn_settings, lldf.fused_data) + knn = KNN(knn_settings, df.fused_data) # Pick a random sample for prediction - x_data_sample = lldf.fused_data.x_train.iloc[119] # should be DMMP + x_data_sample = df.fused_data.x_train.iloc[119] # should be DMMP x_data_sample = x_data_sample.iloc[1:].to_frame().transpose() # Run prediction with untrained model (should throw exception) @@ -153,7 +153,7 @@ def test_prediction(self): knn.predict(x_data_sample) # Run training - knn.knn() + knn.train() # Run prediction with empty data (should throw exception) with self.assertRaises(TypeError): diff --git a/tests/test_lda.py b/tests/test_lda.py index 9e1d9ec..828465d 100644 --- a/tests/test_lda.py +++ b/tests/test_lda.py @@ -1,7 +1,7 @@ """This module contains the test cases for the LDA module.""" import unittest from chemfusekit.lda import LDASettings, LDA, GraphMode -from chemfusekit.lldf import LLDFSettings, LLDF, Table +from chemfusekit.df import DFSettings, DF, Table class TestLDA(unittest.TestCase): @@ -20,7 +20,7 @@ def test_lda_constructor(self): """Test case against constructor parameter issues.""" # Perform preliminary data fusion - lldf_settings = LLDFSettings(output=GraphMode.NONE) + df_settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -31,8 +31,8 @@ def test_lda_constructor(self): sheet_name="Sheet1", preprocessing="none" ) - lldf = LLDF(lldf_settings, [table1, table2]) - lldf.lldf() + df = DF(df_settings, [table1, table2]) + df.fuse() lda_settings = LDASettings() @@ -42,19 +42,19 @@ def test_lda_constructor(self): # Then, construct the object with null settings: with self.assertRaises(TypeError): - LDA(None, lldf.fused_data) + LDA(None, df.fused_data) # Now, with both null: with self.assertRaises(TypeError): LDA(None, None) # Finally, with proper values: - LDA(lda_settings, lldf.fused_data) + LDA(lda_settings, df.fused_data) def test_lda(self): """Integration test case.""" # Perform preliminary data fusion - lldf_settings = LLDFSettings(output=GraphMode.NONE) + df_settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -65,34 +65,34 @@ def test_lda(self): sheet_name="Sheet1", preprocessing="none" ) - lldf = LLDF(lldf_settings, [table1, table2]) - lldf.lldf() + df = DF(df_settings, [table1, table2]) + df.fuse() # Create an LDA object and train it, with graphical output lda_settings = LDASettings(output=GraphMode.GRAPHIC) - lda = LDA(lda_settings, lldf.fused_data) - lda.lda() + lda = LDA(lda_settings, df.fused_data) + lda.train() # Create an LDA object and train it, with text output lda_settings = LDASettings(output=GraphMode.TEXT) - lda = LDA(lda_settings, lldf.fused_data) - lda.lda() + lda = LDA(lda_settings, df.fused_data) + lda.train() # Create an LDA object and train it, with no output lda_settings = LDASettings(output=GraphMode.NONE) - lda = LDA(lda_settings, lldf.fused_data) - lda.lda() + lda = LDA(lda_settings, df.fused_data) + lda.train() # Create an LDA object and train it, with true output and split tests lda_settings = LDASettings(output=GraphMode.TEXT, test_split=True) - lda = LDA(lda_settings, lldf.fused_data) - lda.lda() + lda = LDA(lda_settings, df.fused_data) + lda.train() def test_lda_predict(self): """Test case against prediction parameter issues.""" # Perform preliminary data fusion - lldf_settings = LLDFSettings(output=GraphMode.NONE) + df_settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -103,15 +103,15 @@ def test_lda_predict(self): sheet_name="Sheet1", preprocessing="none" ) - lldf = LLDF(lldf_settings, [table1, table2]) - lldf.lldf() + df = DF(df_settings, [table1, table2]) + df.fuse() # Create an LDA object without training it lda_settings = LDASettings() - lda = LDA(lda_settings, lldf.fused_data) + lda = LDA(lda_settings, df.fused_data) # Pick a random sample for prediction - x_data_sample = lldf.fused_data.x_train.iloc[119] # should be DMMP + x_data_sample = df.fused_data.x_train.iloc[119] # should be DMMP x_data_sample = x_data_sample.iloc[1:].to_frame().transpose() # Run prediction with untrained model (should throw exception) @@ -119,7 +119,7 @@ def test_lda_predict(self): lda.predict(x_data_sample) # Train the LDA object - lda.lda() + lda.train() # Run prediction with empty data (should throw exception) with self.assertRaises(TypeError): diff --git a/tests/test_lr.py b/tests/test_lr.py index 688fe7c..869cd62 100644 --- a/tests/test_lr.py +++ b/tests/test_lr.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -from chemfusekit.lldf import LLDFSettings, LLDF, Table +from chemfusekit.df import DFSettings, DF, Table from chemfusekit.pca import PCASettings, PCA, PCADataModel from chemfusekit.lr import LRSettings, LR, GraphMode @@ -82,7 +82,7 @@ def test_lr_constructor(self): def test_lr(self): """Integration test case for LR training.""" # Perform preliminary data fusion - lldf_settings = LLDFSettings(output=GraphMode.NONE) + df_settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -93,12 +93,12 @@ def test_lr(self): sheet_name="Sheet1", preprocessing="none" ) - lldf = LLDF(lldf_settings, [table1, table2]) - lldf.lldf() + df = DF(df_settings, [table1, table2]) + df.fuse() pca_settings = PCASettings() - pca = PCA(pca_settings, lldf.fused_data) - pca.pca() + pca = PCA(pca_settings, df.fused_data) + pca.train() pca.pca_stats() pca_data = pca.export_data() @@ -106,33 +106,33 @@ def test_lr(self): # With no output lr_settings = LRSettings() lr = LR(lr_settings, pca_data) - lr.lr() + lr.train() # With text output lr_settings = LRSettings(output=GraphMode.TEXT) lr = LR(lr_settings, pca_data) - lr.lr() + lr.train() # With graph output # With text output lr_settings = LRSettings(output=GraphMode.GRAPHIC) lr = LR(lr_settings, pca_data) - lr.lr() + lr.train() # With text output and split tests lr_settings = LRSettings(output=GraphMode.TEXT, test_split=True) lr = LR(lr_settings, pca_data) - lr.lr() + lr.train() # With graph output and split tests lr_settings = LRSettings(output=GraphMode.GRAPHIC, test_split=True) lr = LR(lr_settings, pca_data) - lr.lr() + lr.train() - # A final test with just the LLDF data: + # A final test with just the df data: lr_settings = LRSettings() - lr = LR(lr_settings, lldf.fused_data) - lr.lr() + lr = LR(lr_settings, df.fused_data) + lr.train() def test_lr_predict(self): """Test case against prediction input errors.""" diff --git a/tests/test_pca.py b/tests/test_pca.py index d57bb71..aa35e4c 100644 --- a/tests/test_pca.py +++ b/tests/test_pca.py @@ -2,7 +2,7 @@ import unittest import copy from chemfusekit.pca import PCASettings, PCA, GraphMode -from chemfusekit.lldf import LLDFSettings, LLDF, Table +from chemfusekit.df import DFSettings, DF, Table from chemfusekit.lr import LRSettings, LR @@ -35,7 +35,7 @@ def test_pca_constructor(self): """Test case against constructor parameter issues.""" # Perform preliminary data fusion - lldf_settings = LLDFSettings(output=GraphMode.NONE) + df_settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -46,8 +46,8 @@ def test_pca_constructor(self): sheet_name="Sheet1", preprocessing="none" ) - lldf = LLDF(lldf_settings, [table1, table2]) - lldf.lldf() + df = DF(df_settings, [table1, table2]) + df.fuse() lda_settings = PCASettings() @@ -57,14 +57,14 @@ def test_pca_constructor(self): # Then, construct the object with null settings: with self.assertRaises(TypeError): - PCA(None, lldf.fused_data) + PCA(None, df.fused_data) # Now, with both null: with self.assertRaises(TypeError): PCA(None, None) # Finally, with proper values: - PCA(lda_settings, lldf.fused_data) + PCA(lda_settings, df.fused_data) def test_pca(self): """ @@ -72,7 +72,7 @@ def test_pca(self): whether the output is set to true or false """ # Perform preliminary data fusion - lldf_settings = LLDFSettings(output=GraphMode.NONE) + df_settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -83,13 +83,13 @@ def test_pca(self): sheet_name="Sheet1", preprocessing="none" ) - lldf = LLDF(lldf_settings, [table1, table2]) - lldf.lldf() + df = DF(df_settings, [table1, table2]) + df.fuse() # Set up and execute PCA (graph output) pca_settings = PCASettings(output=GraphMode.GRAPHIC) - pca = PCA(pca_settings, lldf.fused_data) - pca.pca() + pca = PCA(pca_settings, df.fused_data) + pca.train() # Save the results result_true_components = copy.deepcopy(pca.components) @@ -97,8 +97,8 @@ def test_pca(self): # Set up and execute PCA (again) pca_settings = PCASettings(output=GraphMode.NONE) - pca = PCA(pca_settings, lldf.fused_data) - pca.pca() + pca = PCA(pca_settings, df.fused_data) + pca.train() # Save the results result_false_components = pca.components @@ -109,8 +109,8 @@ def test_pca(self): # Set up and execute PCA (text output) pca_settings = PCASettings(output=GraphMode.GRAPHIC) - pca = PCA(pca_settings, lldf.fused_data) - pca.pca() + pca = PCA(pca_settings, df.fused_data) + pca.train() # Save the results result_true_components = copy.deepcopy(pca.components) @@ -118,8 +118,8 @@ def test_pca(self): # Set up and execute PCA (again) pca_settings = PCASettings(output=GraphMode.NONE) - pca = PCA(pca_settings, lldf.fused_data) - pca.pca() + pca = PCA(pca_settings, df.fused_data) + pca.train() # Save the results result_false_components = pca.components @@ -132,7 +132,7 @@ def test_pca_integration_lr(self): """Integration test for PCA+LR""" # Perform preliminary data fusion - lldf_settings = LLDFSettings(output=GraphMode.NONE) + df_settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -143,23 +143,23 @@ def test_pca_integration_lr(self): sheet_name="Sheet1", preprocessing="none" ) - lldf = LLDF(lldf_settings, [table1, table2]) - lldf.lldf() + df = DF(df_settings, [table1, table2]) + df.fuse() # Set up PCA and get the rescaled_data property directly pca_settings = PCASettings() - pca = PCA(pca_settings, lldf.fused_data) + pca = PCA(pca_settings, df.fused_data) rescaled_data = pca.rescaled_data # Set up and execute LR lr_settings = LRSettings() lr = LR(lr_settings, rescaled_data) - lr.lr() + lr.train() def test_pca_import_export(self): """Test case for the import and export of PCA models.""" # Perform preliminary data fusion - lldf_settings = LLDFSettings(output=GraphMode.NONE) + df_settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -170,19 +170,19 @@ def test_pca_import_export(self): sheet_name="Sheet1", preprocessing="none" ) - lldf = LLDF(lldf_settings, [table1, table2]) - lldf.lldf() + df = DF(df_settings, [table1, table2]) + df.fuse() # Set up PCA pca_settings = PCASettings() - pca = PCA(pca_settings, lldf.fused_data) + pca = PCA(pca_settings, df.fused_data) # Try exporting the model before executing pca() with self.assertRaises(RuntimeError): pca.export_model('pca_model.sklearn') # Execute PCA and retry exporting - pca.pca() + pca.train() pca.export_model('pca_model.sklearn') # Try creating a new PCA object from the wrong type of file @@ -198,7 +198,7 @@ def test_pca_import_export(self): def test_pca_reduce(self): """Test case for data dimensionality reduction.""" # Perform preliminary data fusion - lldf_settings = LLDFSettings(output=GraphMode.NONE) + df_settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -209,20 +209,20 @@ def test_pca_reduce(self): sheet_name="Sheet1", preprocessing="none" ) - lldf = LLDF(lldf_settings, [table1, table2]) - lldf.lldf() + df = DF(df_settings, [table1, table2]) + df.fuse() # Set up PCA pca_settings = PCASettings() - pca = PCA(pca_settings, lldf.fused_data) + pca = PCA(pca_settings, df.fused_data) # Try rescaling data before training the model with self.assertRaises(RuntimeError): - pca.reduce(lldf.fused_data) + pca.reduce(df.fused_data) # Execute PCA and then rescale - pca.pca() - reduced_data = pca.reduce(lldf.fused_data) + pca.train() + reduced_data = pca.reduce(df.fused_data) # Check that the dimensionality (number of columns) is reduced - self.assertLess(reduced_data.x_data.shape[1], lldf.fused_data.x_data.shape[1]) + self.assertLess(reduced_data.x_data.shape[1], df.fused_data.x_data.shape[1]) diff --git a/tests/test_plsda.py b/tests/test_plsda.py index 2b27319..763cce5 100644 --- a/tests/test_plsda.py +++ b/tests/test_plsda.py @@ -5,7 +5,7 @@ import pandas as pd from chemfusekit.plsda import PLSDASettings, PLSDA, GraphMode -from chemfusekit.lldf import LLDFSettings, LLDF, LLDFDataModel, Table +from chemfusekit.df import DFSettings, DF, DFDataModel, Table class TestPLSDA(unittest.TestCase): @@ -36,7 +36,7 @@ def test_plsda_settings(self): def test_plsda_constructor(self): """Test case against constructor errors.""" # Perform preliminary data fusion - lldf_settings = LLDFSettings(output=GraphMode.NONE) + df_settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -47,24 +47,24 @@ def test_plsda_constructor(self): sheet_name="Sheet1", preprocessing="none" ) - lldf = LLDF(lldf_settings, [table1, table2]) - lldf.lldf() + df = DF(df_settings, [table1, table2]) + df.fuse() # settings parameter - wrong_settings = LLDFDataModel(pd.DataFrame([1]), pd.DataFrame([1]), np.asarray([1])) + wrong_settings = DFDataModel(pd.DataFrame([1]), pd.DataFrame([1]), np.asarray([1])) with self.assertRaises(TypeError): - PLSDA(wrong_settings, lldf.fused_data) # pass an object of the wrong class as settings + PLSDA(wrong_settings, df.fused_data) # pass an object of the wrong class as settings # fused_data parameter knn_settings = PLSDASettings() - wrong_fused_data = lldf_settings + wrong_fused_data = df_settings with self.assertRaises(TypeError): PLSDA(knn_settings, wrong_fused_data) # pass an object of the wrong class as fused_data def test_plsda(self): """Integration test case for the training function.""" # Perform preliminary data fusion - lldf_settings = LLDFSettings(output=GraphMode.NONE) + df_settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -75,38 +75,38 @@ def test_plsda(self): sheet_name="Sheet1", preprocessing="none" ) - lldf = LLDF(lldf_settings, [table1, table2]) - lldf.lldf() + df = DF(df_settings, [table1, table2]) + df.fuse() # Set up and run PLSDA (no output) plsda_settings = PLSDASettings() - plsda = PLSDA(plsda_settings, lldf.fused_data) - plsda.plsda() + plsda = PLSDA(plsda_settings, df.fused_data) + plsda.train() # Set up and run PLSDA with text output plsda_settings = PLSDASettings(output=GraphMode.TEXT) - plsda = PLSDA(plsda_settings, lldf.fused_data) - plsda.plsda() + plsda = PLSDA(plsda_settings, df.fused_data) + plsda.train() # Set up and run PLSDA with graphical output plsda_settings = PLSDASettings(output=GraphMode.GRAPHIC) - plsda = PLSDA(plsda_settings, lldf.fused_data) - plsda.plsda() + plsda = PLSDA(plsda_settings, df.fused_data) + plsda.train() # Run with text output and split testing plsda_settings = PLSDASettings(output=GraphMode.TEXT, test_split=True) - plsda = PLSDA(plsda_settings, lldf.fused_data) - plsda.plsda() + plsda = PLSDA(plsda_settings, df.fused_data) + plsda.train() # Run with graphical output and split testing plsda_settings = PLSDASettings(output=GraphMode.GRAPHIC, test_split=True) - plsda = PLSDA(plsda_settings, lldf.fused_data) - plsda.plsda() + plsda = PLSDA(plsda_settings, df.fused_data) + plsda.train() def test_prediction(self): """Test case against prediction parameter issues.""" # Perform preliminary data fusion - lldf_settings = LLDFSettings(output=GraphMode.NONE) + df_settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -117,15 +117,15 @@ def test_prediction(self): sheet_name="Sheet1", preprocessing="none" ) - lldf = LLDF(lldf_settings, [table1, table2]) - lldf.lldf() + df = DF(df_settings, [table1, table2]) + df.fuse() # Set up KNN without training it plsda_settings = PLSDASettings() - plsda = PLSDA(plsda_settings, lldf.fused_data) + plsda = PLSDA(plsda_settings, df.fused_data) # Pick a random sample for prediction - x_data_sample = lldf.fused_data.x_train.iloc[119] # should be DMMP + x_data_sample = df.fused_data.x_train.iloc[119] # should be DMMP x_data_sample = x_data_sample.iloc[1:].to_frame().transpose() # Run prediction with untrained model (should throw exception) @@ -133,7 +133,7 @@ def test_prediction(self): plsda.predict(x_data_sample) # Run training - plsda.plsda() + plsda.train() # Run prediction with empty data (should throw exception) with self.assertRaises(TypeError): diff --git a/tests/test_svm.py b/tests/test_svm.py index 46d7a8c..f1d9b5d 100644 --- a/tests/test_svm.py +++ b/tests/test_svm.py @@ -1,7 +1,7 @@ """This module contains the test cases for the SVM module.""" import unittest from chemfusekit.svm import SVMSettings, SVM, GraphMode -from chemfusekit.lldf import LLDFSettings, LLDF, Table +from chemfusekit.df import DFSettings, DF, Table class TestSVM(unittest.TestCase): @@ -33,7 +33,7 @@ def test_svm_constructor(self): """Test case against constructor parameter issues.""" # Perform preliminary data fusion - lldf_settings = LLDFSettings(output=GraphMode.NONE) + df_settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -44,8 +44,8 @@ def test_svm_constructor(self): sheet_name="Sheet1", preprocessing="none" ) - lldf = LLDF(lldf_settings, [table1, table2]) - lldf.lldf() + df = DF(df_settings, [table1, table2]) + df.fuse() svm_settings = SVMSettings() @@ -55,20 +55,20 @@ def test_svm_constructor(self): # Then, construct the object with null settings: with self.assertRaises(TypeError): - SVM(None, lldf.fused_data) + SVM(None, df.fused_data) # Now, with both null: with self.assertRaises(TypeError): SVM(None, None) # Finally, with proper values: - SVM(svm_settings, lldf.fused_data) + SVM(svm_settings, df.fused_data) def test_svm(self): """Integration test case.""" # Perform preliminary data fusion - lldf_settings = LLDFSettings(output=GraphMode.NONE) + df_settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -79,29 +79,29 @@ def test_svm(self): sheet_name="Sheet1", preprocessing="none" ) - lldf = LLDF(lldf_settings, [table1, table2]) - lldf.lldf() + df = DF(df_settings, [table1, table2]) + df.fuse() # Create an SVM object and train it, with no output svm_settings = SVMSettings(output=GraphMode.NONE) - svm = SVM(svm_settings, lldf.fused_data) - svm.svm() + svm = SVM(svm_settings, df.fused_data) + svm.train() # Create an SVM object and train it, with graphical output svm_settings = SVMSettings(output=GraphMode.GRAPHIC) - svm = SVM(svm_settings, lldf.fused_data) - svm.svm() + svm = SVM(svm_settings, df.fused_data) + svm.train() # Create an SVM object and train it, with text output svm_settings = SVMSettings(output=GraphMode.TEXT) - svm = SVM(svm_settings, lldf.fused_data) - svm.svm() + svm = SVM(svm_settings, df.fused_data) + svm.train() def test_svm_predict(self): """Test case against prediction parameter issues.""" # Perform preliminary data fusion - lldf_settings = LLDFSettings(output=GraphMode.NONE) + df_settings = DFSettings(output=GraphMode.NONE) table1 = Table( file_path="tests/qepas.xlsx", sheet_name="Sheet1", @@ -112,15 +112,15 @@ def test_svm_predict(self): sheet_name="Sheet1", preprocessing="none" ) - lldf = LLDF(lldf_settings, [table1, table2]) - lldf.lldf() + df = DF(df_settings, [table1, table2]) + df.fuse() # Create an SVM object without training it svm_settings = SVMSettings() - svm = SVM(svm_settings, lldf.fused_data) + svm = SVM(svm_settings, df.fused_data) # Pick a random sample for prediction - x_data_sample = lldf.fused_data.x_train.iloc[119] # should be DMMP + x_data_sample = df.fused_data.x_train.iloc[119] # should be DMMP x_data_sample = x_data_sample.iloc[1:].to_frame().transpose() # Run prediction with untrained model (should throw exception) @@ -128,7 +128,7 @@ def test_svm_predict(self): svm.predict(x_data_sample) # Train the SVM object - svm.svm() + svm.train() # Run prediction with empty data (should throw exception) with self.assertRaises(TypeError):