fix(pylint): added requirements, increased scores

f-aguzzi · Apr 23, 2024 · bef1cdf · bef1cdf
1 parent 234b200
commit bef1cdf
Show file tree

Hide file tree

Showing 3 changed files with 88 additions and 56 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,5 @@
+numpy>=1.24.4
+scipy>=1.12.0
+matplotlib>=3.8.2
+pandas>=2.2.1
+seaborn>=0.13.2
diff --git a/src/lldf.py b/src/lldf.py
@@ -1,31 +1,39 @@
+'''Performs low-level data fusion on input arrays, outputs the results'''
 import numpy as np
 import pandas as pd
+from scipy.signal import savgol_filter
 
 # COSE DA FARE: SCELTA PREPROCESSING, TESTARE EXPORT
 
-def snv(input_data):
-
-    # Define a new array and populate it with the corrected data  
-    output_data = np.zeros_like(input_data)
-    for i in range(input_data.shape[0]):
-
-        # Apply correction
-        output_data[i,:] = (input_data[i,:] - np.mean(input_data[i,:])) / np.std(input_data[i,:])
-
-    return output_data
-
-class LLDF_Model:
-    def __init__(self, X_data, Y_dataframe, X_train, Y):
-        self.X_data = X_data
-        self.Y_dataframe = Y_dataframe
-        self.X_train = X_train
-        self.Y = Y
+class LLDFModel:
+    '''Models the output data from the LLDF operation'''
+    def __init__(self, x_data, y_dataframe, x_train, y):
+        self.x_data = x_data
+        self.x_dataframe = y_dataframe
+        self.x_train = x_train
+        self.y = y
 
 class LLDF:
+    '''Holds together all the data, methods and artifacts of the LLDF operation'''
     def __init__(self, preprocessing='snv'):
         self.preprocessing = preprocessing
+        self.fused_data = None
+
+    def _snv(self, input_data):
+        '''Applies normalization to an input array'''
+        # Define a new array and populate it with the corrected data
+        output_data = np.zeros_like(input_data)
+        for i in range(input_data.shape[0]):
+
+            # Apply correction
+            output_data[i,:] = (
+                (input_data[i,:] - np.mean(input_data[i,:])) / np.std(input_data[i,:])
+            )
+
+        return output_data
 
     def lldf(self):
+        '''Performs low-level data fusion'''
         path = input("Insert the QEPAS file path:")
         sheet = input("Insert the QEPAS sheet name:")
         spectra = pd.read_excel(path, sheet_name=sheet, index_col=0, header=0)
@@ -34,57 +42,56 @@ def lldf(self):
         ret_time = pd.read_excel(path, sheet_name=sheet, index_col=0, header=0)
 
         # select only numerical attributes
-        X_spectra = spectra.iloc[:, 1:]
-        X_time = ret_time.iloc[:, 1:]
+        x_spectra = spectra.iloc[:, 1:]
+        x_time = ret_time.iloc[:, 1:]
 
         #Selection of classes from the spectra database
-        Y = spectra.loc[:, 'Substance'].values
+        y = spectra.loc[:, 'Substance'].values
 
-        Y_dataframe = pd.DataFrame(Y, columns=['Substance'])
+        y_dataframe = pd.DataFrame(y, columns=['Substance'])
 
         # It is necessary to convert the column names as string to select them
         spectra.columns = spectra.columns.astype(str) # to make the colnames as text
 
-        #wavelenghts for plots (variables)
-        # Your string with numbers separated by spaces
-        numbers_string = "8 8,0126	8,0253 8,038	8,0507	8,0635	8,0763	8,0892	8,1021	8,115	8,128	8,141	8,1541	8,1672	8,1804	8,1935	8,2068	8,2201	8,2334	8,2468	8,2602	8,2736	8,2871	8,3007	8,3142	8,3279	8,3415	8,3553	8,369	8,3828	8,3967	8,4106	8,4245	8,4385	8,4526	8,4667	8,4808	8,495	8,5092	8,5235	8,5378	8,5522	8,5666	8,5811	8,5956	8,6102	8,6248	8,6395	8,6542	8,6689	8,6838	8,6986	8,7136	8,7285	8,7435	8,7586	8,7737	8,7889	8,8042	8,8194	8,8348	8,8502	8,8656	8,8811	8,8967	8,9123	8,9279	8,9437	8,9594	8,9753	8,9912	9,0071	9,0231	9,0391	9,0553	9,0714	9,0877	9,1039	9,1203	9,1367	9,1532	9,1697	9,1863	9,2029	9,2196	9,2364	9,2532	9,2701	9,287	9,304	9,3211	9,3382	9,3554	9,3727	9,39	9,4074	9,4249	9,4424	9,46	9,4776	9,4953	9,5131	9,531	9,5489	9,5669	9,5849	9,603	9,6212	9,6395	9,6578	9,6762	9,6947	9,7132	9,7318	9,7505	9,7692	9,7881	9,8069	9,8259	9,845	9,8641	9,8833	9,9025	9,9219	9,9413	9,9608	9,9804	10"
-
-        # Replace commas with points and join the numbers with a space
-        wl = np.array(list(map(lambda x: float(x.replace(',', '.')), numbers_string.split())))
-
         # Preprocessing
         if self.preprocessing == 'snv':
             # Compute the SNV on spectra
-            preprocessed_spectra = snv(X_spectra.values)
+            preprocessed_spectra = self._snv(x_spectra.values)
         elif self.preprocessing == 'savgol':
-            # Preprocessing with Savitzki-Golay - smoothing, defining the window, the order and the use of derivatives
-            from scipy.signal import savgol_filter
-            preprocessed_spectra = savgol_filter(X_spectra, 7, polyorder = 2, deriv=0)
+            # Preprocessing with Savitzki-Golay
+            # smoothing, defining the window, the order and the use of derivatives
+            preprocessed_spectra = savgol_filter(x_spectra, 7, polyorder = 2, deriv=0)
         elif self.preprocessing == 'savgol+snv':
-            # We can also combine the preprocessing strategies together: Savitzki-Golay - smoothing + SNV
-            X_savgol = savgol_filter(X_spectra, 7, polyorder = 2, deriv=0)
-            preprocessed_spectra = snv(X_savgol)
+            # We can also combine the preprocessing strategies together:
+            # Savitzki-Golay - smoothing + SNV
+            preprocessed_spectra = self._snv(savgol_filter(x_spectra, 7, polyorder = 2, deriv=0))
         else:
-            raise Exception(f"LLDF: this type of preprocessing does not exist ({self.preprocessing=})")
+            raise Exception(
+                f"LLDF: this type of preprocessing does not exist ({self.preprocessing=})"
+            )
 
 
         # Create a new DataFrame with the processed numerical attributes
-        processed_dataframe_spectra = pd.DataFrame(preprocessed_spectra, columns=spectra.columns[1:])
-        processed_dataframe_spectra.head()
-
-        X_RT_array = X_time.values
+        processed_dataframe_spectra = pd.DataFrame(
+            preprocessed_spectra,
+            columns=spectra.columns[1:]
+        )
 
         # Create a new DataFrame with the processed numerical attributes
-        processed_dataframe_rt = pd.DataFrame(X_RT_array, columns=ret_time.columns[1:])
+        processed_dataframe_rt = pd.DataFrame(x_time.values, columns=ret_time.columns[1:])
 
         # X training set
-        X_train = pd.concat([Y_dataframe, processed_dataframe_spectra, processed_dataframe_rt], axis = 1)
+        x_train = pd.concat(
+            [y_dataframe, processed_dataframe_spectra, processed_dataframe_rt],
+            axis = 1
+        )
 
         # select only numerical attributes
-        X_data = X_train.iloc[:, 1:]
+        x_data = x_train.iloc[:, 1:]
 
-        self.fused_data = LLDF_Model(X_data, Y_dataframe, X_train, Y)
+        self.fused_data = LLDFModel(x_data, y_dataframe, x_train, y)
 
     def export_data(self):
+        '''Exports the data fusion artifacts to a file'''
         path = input("Insert the output file path: ")
-        pd.DataFrame(self.fused_data).to_excel(path)
+        pd.DataFrame(self.fused_data).to_excel(path)
diff --git a/src/svm.py b/src/svm.py
@@ -1,3 +1,4 @@
+'''Support Vector Machine module.'''
 import matplotlib.pyplot as plt
 # matplotlib inline
 
@@ -8,17 +9,26 @@
 from sklearn.svm import SVC
 
 class SVM:
+    '''Class for Support Vector Machine analysis of the data'''
     def __init__(self, fused_data):
-            self.fused_data = fused_data
+        self.fused_data = fused_data
+        self.settings = None
 
-    def svm(self, type="linear"):
+    def svm(self, type='linear'):
+        '''Performs Support Vector Machine analysis'''
         self.settings.type = type
 
-        X_data = self.fused_data.X_data
-        X_train = self.fused_data.X_train
-        Y = self.fused_data.Y
+        x_data = self.fused_data.x_data
+        x_train = self.fused_data.x_train
+        y = self.fused_data.y
 
-        X_train, X_test, y_train, y_test = train_test_split(X_data, Y, train_size=0.7, shuffle=True, stratify=Y)
+        x_train, x_test, y_train, y_test = train_test_split(
+            x_data,
+            y,
+            train_size=0.7,
+            shuffle=True,
+            stratify=y
+        )
 
         # Linear kernel
         if self.settings.type == "linear":
@@ -35,9 +45,9 @@ def svm(self, type="linear"):
             svm_model = SVC(kernel='sigmoid')
         else:
             raise Exception(f"SVM: this type of kernel does not exist ({self.settings.type=})")
-        
-        svm_model.fit(X_train, y_train)
-        y_pred = svm_model.predict(X_test)
+
+        svm_model.fit(x_train, y_train)
+        y_pred = svm_model.predict(x_test)
 
         # Assuming 'y_true' and 'y_pred' are your true and predicted labels
         cm = confusion_matrix(y_test, y_pred)
@@ -46,12 +56,22 @@ def svm(self, type="linear"):
         class_labels = sorted(set(y_test))
 
         # Plot the confusion matrix using seaborn with custom colormap (Blues)
-        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_labels, yticklabels=class_labels, cbar=False, vmin=0, vmax=cm.max())
+        sns.heatmap(
+            cm,
+            annot=True,
+            fmt='d',
+            cmap='Blues',
+            xticklabels=class_labels,
+            yticklabels=class_labels,
+            cbar=False,
+            vmin=0,
+            vmax=cm.max()
+        )
 
         plt.xlabel('Predicted')
         plt.ylabel('True')
         plt.title('Confusion Matrix based on evaluation set')
         plt.show()
 
         # Print the classification report
-        print(classification_report(y_test, y_pred, digits=2))
+        print(classification_report(y_test, y_pred, digits=2))