Skip to content

Commit

Permalink
feat(DF): add file-like input capabilities
Browse files Browse the repository at this point in the history
  • Loading branch information
f-aguzzi committed Jun 29, 2024
1 parent 5573f1a commit e5c707b
Showing 1 changed file with 45 additions and 33 deletions.
78 changes: 45 additions & 33 deletions src/chemfusekit/df.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Performs low-level data fusion on input arrays, outputs the results"""
from typing import Optional, List
from typing import Optional, List, IO

import numpy as np
import pandas as pd
Expand All @@ -16,7 +16,7 @@
class Table:
"""Holds the path, preprocessing choice and sheet name for a single Excel table."""

def __init__(self, file_path: str, sheet_name: str, preprocessing: str, feature_selection: str | None = None,
def __init__(self, file_path: str | IO, sheet_name: str, preprocessing: str, feature_selection: str | None = None,
class_column: str = 'Substance', index_column: str | None = None):
self.file_path = file_path
self.sheet_name = sheet_name
Expand Down Expand Up @@ -133,21 +133,35 @@ def fuse(self):
ax1.set_title(f'Original data')
ax2.plot(wl, preprocessed_x.T)
ax2.set_title(f'Processed table with {table.preprocessing}')
if table.file_path.endswith('.xlsx'):
fig.suptitle(f'Imported table: {table.sheet_name} from {table.file_path}')
if isinstance(table.file_path, str):
if table.file_path.endswith('.xlsx'):
fig.suptitle(f'Imported table: {table.sheet_name} from {table.file_path}')
else:
fig.suptitle(f'Imported table: {table.file_path}')
else:
fig.suptitle(f'Imported table: {table.file_path}')
file_path = table.file_path.name
if file_path.endswith('.xlsx'):
fig.suptitle(f'Imported table: {table.sheet_name} from {file_path}')
else:
fig.suptitle(f'Imported table: {file_path}')
else:
# Let's plot the different datasets we preprocessed
fig, ax1 = plt.subplots(1, figsize=(15, 15))
if x.shape[1] == 1:
ax1.plot(x)
else:
ax1.plot(wl, x.T)
if table.file_path.endswith('.xlsx'):
fig.suptitle(f'Imported table: {table.sheet_name} from {table.file_path} (no preprocessing)')
if isinstance(table.file_path, str):
if table.file_path.endswith('.xlsx'):
fig.suptitle(f'Imported table: {table.sheet_name} from {table.file_path} (no preprocessing')
else:
fig.suptitle(f'Imported table: {table.file_path} (no preprocessing)')
else:
fig.suptitle(f'Imported table: {table.file_path} (no preprocessing)')
file_path = table.file_path.name
if file_path.endswith('.xlsx'):
fig.suptitle(f'Imported table: {table.sheet_name} from {file_path}')
else:
fig.suptitle(f'Imported table: {file_path}')

# Create a new DataFrame with the processed numerical attributes
processed_dataframe_x = pd.DataFrame(
Expand Down Expand Up @@ -230,31 +244,29 @@ def _perform_feature_selection(table: Table, data_model: BaseDataModel) -> BaseD

@staticmethod
def _import_table(file_path, sheet_name) -> pd.DataFrame:
"""Imports a table from a file"""
try:
# Autodetect the format based on the file extension
if file_path.endswith('.xlsx'):
table_data = pd.read_excel(
file_path,
sheet_name=sheet_name,
index_col=0,
header=0
)
elif file_path.endswith('.csv'):
table_data = pd.read_csv(
file_path,
index_col=0,
header=0
)
elif file_path.endswith('.json'):
table_data = pd.read_json(
file_path,
orient='table' # or other orientations based on your json format
)
else:
raise ValueError(f"Unsupported file format: {file_path}")
except Exception as exc:
raise FileNotFoundError("Error opening the selected files.") from exc
"""Imports a table from a file or file-like object"""
if isinstance(file_path, IO):
# Handle file-like objects
try:
table_data = pd.read_excel(file_path, sheet_name=sheet_name, index_col=0, header=0)
except Exception as exc:
raise ValueError("Error reading the file-like object.") from exc
elif isinstance(file_path, str):
# Handle file paths
try:
# Autodetect the format based on the file extension
if file_path.endswith('.xlsx'):
table_data = pd.read_excel(file_path, sheet_name=sheet_name, index_col=0, header=0)
elif file_path.endswith('.csv'):
table_data = pd.read_csv(file_path, index_col=0, header=0)
elif file_path.endswith('.json'):
table_data = pd.read_json(file_path, orient='table')
else:
raise ValueError(f"Unsupported file format: {file_path}")
except Exception as exc:
raise FileNotFoundError("Error opening the selected files.") from exc
else:
raise TypeError("Unsupported file type. Expected str or IO.")

return table_data

Expand Down

0 comments on commit e5c707b

Please sign in to comment.