Skip to content

Commit

Permalink
fix(pylint): added requirements, increased scores
Browse files Browse the repository at this point in the history
  • Loading branch information
f-aguzzi committed Apr 23, 2024
1 parent 234b200 commit bef1cdf
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 56 deletions.
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
numpy>=1.24.4
scipy>=1.12.0
matplotlib>=3.8.2
pandas>=2.2.1
seaborn>=0.13.2
97 changes: 52 additions & 45 deletions src/lldf.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,39 @@
'''Performs low-level data fusion on input arrays, outputs the results'''
import numpy as np
import pandas as pd
from scipy.signal import savgol_filter

# COSE DA FARE: SCELTA PREPROCESSING, TESTARE EXPORT

def snv(input_data):

# Define a new array and populate it with the corrected data
output_data = np.zeros_like(input_data)
for i in range(input_data.shape[0]):

# Apply correction
output_data[i,:] = (input_data[i,:] - np.mean(input_data[i,:])) / np.std(input_data[i,:])

return output_data

class LLDF_Model:
def __init__(self, X_data, Y_dataframe, X_train, Y):
self.X_data = X_data
self.Y_dataframe = Y_dataframe
self.X_train = X_train
self.Y = Y
class LLDFModel:
'''Models the output data from the LLDF operation'''
def __init__(self, x_data, y_dataframe, x_train, y):
self.x_data = x_data
self.x_dataframe = y_dataframe
self.x_train = x_train
self.y = y

class LLDF:
'''Holds together all the data, methods and artifacts of the LLDF operation'''
def __init__(self, preprocessing='snv'):
self.preprocessing = preprocessing
self.fused_data = None

def _snv(self, input_data):
'''Applies normalization to an input array'''
# Define a new array and populate it with the corrected data
output_data = np.zeros_like(input_data)
for i in range(input_data.shape[0]):

# Apply correction
output_data[i,:] = (
(input_data[i,:] - np.mean(input_data[i,:])) / np.std(input_data[i,:])
)

return output_data

def lldf(self):
'''Performs low-level data fusion'''
path = input("Insert the QEPAS file path:")
sheet = input("Insert the QEPAS sheet name:")
spectra = pd.read_excel(path, sheet_name=sheet, index_col=0, header=0)
Expand All @@ -34,57 +42,56 @@ def lldf(self):
ret_time = pd.read_excel(path, sheet_name=sheet, index_col=0, header=0)

# select only numerical attributes
X_spectra = spectra.iloc[:, 1:]
X_time = ret_time.iloc[:, 1:]
x_spectra = spectra.iloc[:, 1:]
x_time = ret_time.iloc[:, 1:]

#Selection of classes from the spectra database
Y = spectra.loc[:, 'Substance'].values
y = spectra.loc[:, 'Substance'].values

Y_dataframe = pd.DataFrame(Y, columns=['Substance'])
y_dataframe = pd.DataFrame(y, columns=['Substance'])

# It is necessary to convert the column names as string to select them
spectra.columns = spectra.columns.astype(str) # to make the colnames as text

#wavelenghts for plots (variables)
# Your string with numbers separated by spaces
numbers_string = "8 8,0126 8,0253 8,038 8,0507 8,0635 8,0763 8,0892 8,1021 8,115 8,128 8,141 8,1541 8,1672 8,1804 8,1935 8,2068 8,2201 8,2334 8,2468 8,2602 8,2736 8,2871 8,3007 8,3142 8,3279 8,3415 8,3553 8,369 8,3828 8,3967 8,4106 8,4245 8,4385 8,4526 8,4667 8,4808 8,495 8,5092 8,5235 8,5378 8,5522 8,5666 8,5811 8,5956 8,6102 8,6248 8,6395 8,6542 8,6689 8,6838 8,6986 8,7136 8,7285 8,7435 8,7586 8,7737 8,7889 8,8042 8,8194 8,8348 8,8502 8,8656 8,8811 8,8967 8,9123 8,9279 8,9437 8,9594 8,9753 8,9912 9,0071 9,0231 9,0391 9,0553 9,0714 9,0877 9,1039 9,1203 9,1367 9,1532 9,1697 9,1863 9,2029 9,2196 9,2364 9,2532 9,2701 9,287 9,304 9,3211 9,3382 9,3554 9,3727 9,39 9,4074 9,4249 9,4424 9,46 9,4776 9,4953 9,5131 9,531 9,5489 9,5669 9,5849 9,603 9,6212 9,6395 9,6578 9,6762 9,6947 9,7132 9,7318 9,7505 9,7692 9,7881 9,8069 9,8259 9,845 9,8641 9,8833 9,9025 9,9219 9,9413 9,9608 9,9804 10"

# Replace commas with points and join the numbers with a space
wl = np.array(list(map(lambda x: float(x.replace(',', '.')), numbers_string.split())))

# Preprocessing
if self.preprocessing == 'snv':
# Compute the SNV on spectra
preprocessed_spectra = snv(X_spectra.values)
preprocessed_spectra = self._snv(x_spectra.values)
elif self.preprocessing == 'savgol':
# Preprocessing with Savitzki-Golay - smoothing, defining the window, the order and the use of derivatives
from scipy.signal import savgol_filter
preprocessed_spectra = savgol_filter(X_spectra, 7, polyorder = 2, deriv=0)
# Preprocessing with Savitzki-Golay
# smoothing, defining the window, the order and the use of derivatives
preprocessed_spectra = savgol_filter(x_spectra, 7, polyorder = 2, deriv=0)
elif self.preprocessing == 'savgol+snv':
# We can also combine the preprocessing strategies together: Savitzki-Golay - smoothing + SNV
X_savgol = savgol_filter(X_spectra, 7, polyorder = 2, deriv=0)
preprocessed_spectra = snv(X_savgol)
# We can also combine the preprocessing strategies together:
# Savitzki-Golay - smoothing + SNV
preprocessed_spectra = self._snv(savgol_filter(x_spectra, 7, polyorder = 2, deriv=0))
else:
raise Exception(f"LLDF: this type of preprocessing does not exist ({self.preprocessing=})")
raise Exception(
f"LLDF: this type of preprocessing does not exist ({self.preprocessing=})"
)


# Create a new DataFrame with the processed numerical attributes
processed_dataframe_spectra = pd.DataFrame(preprocessed_spectra, columns=spectra.columns[1:])
processed_dataframe_spectra.head()

X_RT_array = X_time.values
processed_dataframe_spectra = pd.DataFrame(
preprocessed_spectra,
columns=spectra.columns[1:]
)

# Create a new DataFrame with the processed numerical attributes
processed_dataframe_rt = pd.DataFrame(X_RT_array, columns=ret_time.columns[1:])
processed_dataframe_rt = pd.DataFrame(x_time.values, columns=ret_time.columns[1:])

# X training set
X_train = pd.concat([Y_dataframe, processed_dataframe_spectra, processed_dataframe_rt], axis = 1)
x_train = pd.concat(
[y_dataframe, processed_dataframe_spectra, processed_dataframe_rt],
axis = 1
)

# select only numerical attributes
X_data = X_train.iloc[:, 1:]
x_data = x_train.iloc[:, 1:]

self.fused_data = LLDF_Model(X_data, Y_dataframe, X_train, Y)
self.fused_data = LLDFModel(x_data, y_dataframe, x_train, y)

def export_data(self):
'''Exports the data fusion artifacts to a file'''
path = input("Insert the output file path: ")
pd.DataFrame(self.fused_data).to_excel(path)
pd.DataFrame(self.fused_data).to_excel(path)
42 changes: 31 additions & 11 deletions src/svm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
'''Support Vector Machine module.'''
import matplotlib.pyplot as plt
# matplotlib inline

Expand All @@ -8,17 +9,26 @@
from sklearn.svm import SVC

class SVM:
'''Class for Support Vector Machine analysis of the data'''
def __init__(self, fused_data):
self.fused_data = fused_data
self.fused_data = fused_data
self.settings = None

def svm(self, type="linear"):
def svm(self, type='linear'):
'''Performs Support Vector Machine analysis'''
self.settings.type = type

X_data = self.fused_data.X_data
X_train = self.fused_data.X_train
Y = self.fused_data.Y
x_data = self.fused_data.x_data
x_train = self.fused_data.x_train
y = self.fused_data.y

X_train, X_test, y_train, y_test = train_test_split(X_data, Y, train_size=0.7, shuffle=True, stratify=Y)
x_train, x_test, y_train, y_test = train_test_split(
x_data,
y,
train_size=0.7,
shuffle=True,
stratify=y
)

# Linear kernel
if self.settings.type == "linear":
Expand All @@ -35,9 +45,9 @@ def svm(self, type="linear"):
svm_model = SVC(kernel='sigmoid')
else:
raise Exception(f"SVM: this type of kernel does not exist ({self.settings.type=})")
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)

svm_model.fit(x_train, y_train)
y_pred = svm_model.predict(x_test)

# Assuming 'y_true' and 'y_pred' are your true and predicted labels
cm = confusion_matrix(y_test, y_pred)
Expand All @@ -46,12 +56,22 @@ def svm(self, type="linear"):
class_labels = sorted(set(y_test))

# Plot the confusion matrix using seaborn with custom colormap (Blues)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_labels, yticklabels=class_labels, cbar=False, vmin=0, vmax=cm.max())
sns.heatmap(
cm,
annot=True,
fmt='d',
cmap='Blues',
xticklabels=class_labels,
yticklabels=class_labels,
cbar=False,
vmin=0,
vmax=cm.max()
)

plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix based on evaluation set')
plt.show()

# Print the classification report
print(classification_report(y_test, y_pred, digits=2))
print(classification_report(y_test, y_pred, digits=2))

0 comments on commit bef1cdf

Please sign in to comment.