Skip to content

Commit

Permalink
fix(LDA, BaseReducer): components bug
Browse files Browse the repository at this point in the history
  • Loading branch information
f-aguzzi committed Jun 20, 2024
1 parent 35cf415 commit 0f87962
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 28 deletions.
1 change: 1 addition & 0 deletions chemfusekit/__base.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ class BaseReducer(BaseActionClass):
def __init__(self, settings: BaseSettings, data: BaseDataModel):
super().__init__(settings, data)
self.array_scores: Optional[np.ndarray] = None
self.components: Optional[int] = None

@abstractmethod
def export_data(self) -> BaseDataModel:
Expand Down
36 changes: 19 additions & 17 deletions chemfusekit/__utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,23 +49,25 @@ def graph_output(scores, model, name: str, mode: GraphMode = GraphMode.GRAPHIC):
f"{name} Scores"
)

# Scores plot
fig = px.scatter(scores, x="LV1", y="LV2", color="Substance", hover_data=['Substance'])
fig.update_xaxes(zeroline=True, zerolinewidth=1, zerolinecolor='Black')
fig.update_yaxes(zeroline=True, zerolinewidth=1, zerolinecolor='Black')
fig.update_layout(
height=600,
width=800,
title_text='Scores Plot')
fig.show()

# Plot 3D scores
fig = px.scatter_3d(scores, x='LV1', y='LV2', z='LV3',
color='Substance', hover_data=['Substance'],
hover_name=scores.index
)
fig.update_layout(title_text=f"3D colored by Substance for {name}")
fig.show()
if model.n_components >= 2:
# Scores plot
fig = px.scatter(scores, x="LV1", y="LV2", color="Substance", hover_data=['Substance'])
fig.update_xaxes(zeroline=True, zerolinewidth=1, zerolinecolor='Black')
fig.update_yaxes(zeroline=True, zerolinewidth=1, zerolinecolor='Black')
fig.update_layout(
height=600,
width=800,
title_text='Scores Plot')
fig.show()

if model.n_components >= 3:
# Plot 3D scores
fig = px.scatter_3d(scores, x='LV1', y='LV2', z='LV3',
color='Substance', hover_data=['Substance'],
hover_name=scores.index
)
fig.update_layout(title_text=f"3D colored by Substance for {name}")
fig.show()


def print_table(header_values, cell_values, title: str, mode: GraphMode = GraphMode.GRAPHIC):
Expand Down
24 changes: 13 additions & 11 deletions chemfusekit/lda.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class LDASettings(BaseClassifierSettings):

def __init__(self, components: int | None = None, output: GraphMode = GraphMode.NONE, test_split: bool = False):
super().__init__(output, test_split)
if components <= 2:
if components is not None and components <= 2:
raise ValueError("Invalid component number: must be a > 1 integer.")
self.components = components

Expand All @@ -41,17 +41,17 @@ def __init__(self, settings: LDASettings, data: BaseDataModel):
self.data = data
# Self-detect components if the data is from PCA
if isinstance(data, ReducerDataModel):
self.settings.components = data.components - 1
self.components = data.components - 1
self.array_scores: Optional[np.ndarray] = None

def train(self):
"""Performs Linear Discriminant Analysis"""

# Auto-selection of the number of components if not specified
if self.components is None:
if self.settings.components is None and self.components is None:
self._select_feature_number(self.data.x_data, self.data.y)

lda = LD(n_components=self.settings.components)
lda = LD(n_components=self.components)
self.array_scores = lda.fit_transform(self.data.x_data, self.data.y)
pred = lda.predict(self.data.x_data)

Expand Down Expand Up @@ -79,8 +79,8 @@ def train(self):
)
print_table(
["Component"] + [f"Feature {i + 1}" for i in range(lda.coef_.shape[0])],
[[f"Component {i + 1}" for i in range(min(self.settings.components, lda.coef_.shape[1]))]] + [
list(lda.coef_[:self.settings.components, i]) for i in range(lda.coef_.shape[0])],
[[f"Component {i + 1}" for i in range(min(self.components, lda.coef_.shape[1]))]] + [
list(lda.coef_[:self.components, i]) for i in range(lda.coef_.shape[1])],
"LDA Coefficients",
self.settings.output
)
Expand All @@ -100,7 +100,7 @@ def train(self):
mode=self.settings.output
)

lv_cols = [f'LV{i + 1}' for i in range(self.settings.components)]
lv_cols = [f'LV{i + 1}' for i in range(self.components)]
scores = pd.DataFrame(data=self.array_scores, columns=lv_cols) # latent variables
scores.index = self.data.x_data.index
y_dataframe = pd.DataFrame(self.data.y, columns=['Substance'])
Expand All @@ -123,7 +123,7 @@ def train(self):
run_split_test(
scores.drop('Substance', axis=1).values,
self.data.y,
LD(n_components=self.settings.components),
LD(n_components=self.components),
mode=self.settings.output
)

Expand All @@ -134,14 +134,15 @@ def import_model(self, import_path: str):
self.model = model_backup
raise ImportError("The file you tried to import is not a LinearDiscriminantAnalysis classifier.")
self.settings.components = self.model.n_components
self.components = self.model.n_components

def export_data(self) -> LDADataModel:
"""Export the data to an object."""
return LDADataModel(
x_data=self.data.x_data,
x_train=self.data.x_train,
y=self.data.y,
components=self.settings.components
components=self.components
)

@cached_property
Expand All @@ -168,8 +169,9 @@ def rescaled_data(self) -> BaseDataModel:

def _select_feature_number(self, x, y):
# Auto-select the number of components
max_comps = min(self.data.x_data.shape[1], self.settings.components)
n_components = np.arange(1, max_comps + 1)
max_comps = min(self.data.x_data.shape[1], 20, len(np.unique(y)))
n_components = np.arange(1, max_comps)
print(n_components)
cv_scores = []
for n in n_components:
lda = LD(n_components=n)
Expand Down

0 comments on commit 0f87962

Please sign in to comment.