Merge branch 'main' of https://github.com/valeriocaporioniunipi/brainage

valeriocaporioniunipi · May 16, 2024 · 908a695 · 908a695
1 parent 7de4a9e
commit 908a695
Show file tree

Hide file tree

Showing 17 changed files with 302 additions and 95 deletions.
diff --git a/code/__pycache__/abspath.cpython-39.pyc b/code/__pycache__/abspath.cpython-39.pyc
diff --git a/code/__pycache__/csvreader.cpython-39.pyc b/code/__pycache__/csvreader.cpython-39.pyc
diff --git a/code/__pycache__/gaussian_reg.cpython-39.pyc b/code/__pycache__/gaussian_reg.cpython-39.pyc
diff --git a/code/__pycache__/linear_reg.cpython-39.pyc b/code/__pycache__/linear_reg.cpython-39.pyc
diff --git a/code/__pycache__/neural_net.cpython-39.pyc b/code/__pycache__/neural_net.cpython-39.pyc
diff --git a/code/csvreader.py b/code/csvreader.py
@@ -36,14 +36,15 @@ def get_data(filename, target_name, ex_cols = 0):
     """
     get_data obtains the features and target arrays
 
-    Arguments:
-    - filename (str): name of the file which data are read from
-    - target_name (str): name of the column of the csv file that contains targets
-    - ex_cols (int): optional, default = 0. Excluded columns
-
-    Return:
-    - features (ndarray): array of features
-    - targets (ndarray): array of targets
+    :param filename: path to the CSV file with the data
+    :type filename: str
+    :param target_name: optional (default = None): name of the column of the csv file that contains targets
+    :type target_name: str
+    :param ex_cols: optional (default = 0): initial excluded columns
+    :type ex_cols: int
+    :return: numpy arrays of features and target
+    :rtype: numpy.ndarray, numpy.array
+
     """
     logger.info(f'Reading data from file {os.path.basename(filename)}, with {target_name} as target column ')
     features = csv_reader(filename)[:, ex_cols:]
@@ -56,7 +57,33 @@ def get_data(filename, target_name, ex_cols = 0):
     return features, targets
 
 
-def main():
+def csv_reader_parsing():
+
+    """
+    csv_reader_parsing allows to print the data from a csv file.
+    The parameters listed below are not parameters of the functions but are parsing arguments that have 
+    to be passed to command line when executing the program as follow:
+
+    .. code::
+
+        Your_PC>python csvreader.py show/show_column  csvfile_path --column 
+
+    where first two are mandatory argument, while column is optional and if has to be modified,
+    that can be achieved with this notation in this example:
+
+    .. code::
+
+        Your_PC>python csvreader.py show C:/users/.../file.csv --column 4  
+
+    :param command: can be 'show' or 'show_column'. Is used to decide to print or entire dataset or a single column
+    :type filename: str
+    :param filename: path to the CSV file
+    :type target_name: str
+    :param column: optional: name of the column to display (required for 'show_column' command)
+    :type column: str
+    :return: None
+    """
+
     parser = argparse.ArgumentParser(description="CSV Reader - A tool to read CSV files with Pandas.")
 
     parser.add_argument("command", choices=["show", "show_column"], help="Choose the command to execute")
@@ -77,4 +104,4 @@ def main():
         logger.error("File not found", e)
 
 if __name__ == "__main__":
-    main()
+    csv_reader_parsing()
diff --git a/code/gaussian_reg.py b/code/gaussian_reg.py
@@ -13,15 +13,14 @@
 
 def gaussian_reg(filename, n_splits, ex_cols=0,  plot_flag=False):
     """
-    gaussian_reg performs a Gaussian regression with k-fold cross-validation on the given dataset
-    and prints evaluation metrics of the gaussian regression model.
+    gaussian_reg performs gaussian regression with k-fold cross-validation on the
+    given dataset and prints evaluation metrics of the gaussian regression model
+    such as MAE (mean absolute error), MSE (mean squared error) and R-squared.
 
     :param filename: path to the CSV file containing the dataset 
     :type filename: str
     :param n_splits: number of folds for cross-validation
     :type n_splits: int
-    :param ex_cols: optional (default = 0): number of folds for cross-validation
-    :type ex_cols: int
     :param plot_flag: optional (default = False): Whether to plot the actual vs. predicted values
     :type plot_flag: bool
     :return: None
@@ -97,7 +96,42 @@ def gaussian_reg(filename, n_splits, ex_cols=0,  plot_flag=False):
 
 def gaussian_reg_parsing():
     """
-    Parsing from terminal
+    gaussian_reg function parsed that runs when the .py file is called.
+    It performs a  gaussian regression with k-fold cross-validation
+    predicting the age of patients from magnetic resonance imaging and
+    prints evaluation metrics of the linear regression model 
+    such as MAE (mean absolute error), MSE (mean squared error) and R-squared.
+    There are two ways to pass the csv file to this function. It's possible to
+    pass the absolutepath of the dataset or you can store the dataset in a brother folder
+    of the one containing code, and pass to the parsing function the filename and his container-folder.
+    The parameters listed below are not parameters of the functions but are parsing arguments that have 
+    to be passed to command line when executing the program as follow:
+
+    .. code::
+
+        $Your_PC>python gaussian_reg.py file.csv --target --location --folds --ex_cols --plot 
+
+    where file.csv is the only mandatory argument, while others are optional and takes some default values,
+    that if they have to be modified you can write for example:
+
+    .. code::
+
+        $Your_PC>python gaussian_reg.py file.csv --folds 10  
+
+    :param filename: path to the CSV file containing the dataset or the name of the file if --location argument is passed 
+    :type filename: str
+    :param target: optional (default = AGE_AT_SCAN): Name of the column holding target values
+    :type target: str
+    :param location: optional: Location of the file, i.e. folder containing it 
+    :type location: str
+    :param folds: optional (>4, default 5):number of folds for cross-validation
+    :type folds: int
+    :param ex_cols: optional (default = 3): columns excluded when importing file
+    :type ex_cols: int
+    :param plot: optional (default = False): Show the plot of actual vs predicted brain age
+    :type plot: bool
+    :return: None
+
     """
     parser = argparse.ArgumentParser(description=
         'Gaussian regression predicting the age of patients from magnetic resonance imaging')

diff --git a/code/linear_reg.py b/code/linear_reg.py
@@ -7,31 +7,31 @@
 from sklearn.linear_model import LinearRegression
 from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
 from sklearn.preprocessing import StandardScaler
+from pathlib import Path
 
 from abspath import abs_path
 from csvreader import get_data
 
-def linear_reg(filename, n_splits, ex_cols=0, plot_flag=False):
+def linear_reg(features, target, n_splits, plot_flag=False):
 
     """
     linear_reg performs linear regression with k-fold cross-validation on the
-    given dataset and prints evaluation metrics of the linear regression model.
+    given dataset and prints evaluation metrics of the linear regression model
+    such as MAE (mean absolute error), MSE (mean squared error) and R-squared.
 
     :param filename: path to the CSV file containing the dataset 
     :type filename: str
     :param n_splits: number of folds for cross-validation
     :type n_splits: int
-    :param ex_cols: optional (default = 0): number of folds for cross-validation
-    :type ex_cols: int
     :param plot_flag: optional (default = False): Whether to plot the actual vs. predicted values
     :type plot_flag: bool
     :return: None
 
     """
     # Loading data...
     #Importing features excluded first three columns: FILE_ID, AGE_AT_SCAN, SEX
-    x = get_data(filename)[:, ex_cols:]
-    y = get_data(filename, "AGE_AT_SCAN")
+    x = features
+    y = target
 
     # Standardize features
     scaler = StandardScaler()
@@ -97,15 +97,52 @@ def linear_reg(filename, n_splits, ex_cols=0, plot_flag=False):
 
 def linear_reg_parsing():
     """
-    Parsing from terminal
+    linear_reg function parsed that runs when the .py file is called.
+    It performs a  linear regression with k-fold cross-validation
+    predicting the age of patients from magnetic resonance imaging and
+    prints evaluation metrics of the linear regression model 
+    such as MAE (mean absolute error), MSE (mean squared error) and R-squared.
+    There are two ways to pass the csv file to this function. It's possible to
+    pass the absolutepath of the dataset or you can store the dataset in a brother folder
+    of the one containing code, and pass to the parsing function the filename and his container-folder.
+    The parameters listed below are not parameters of the functions but are parsing arguments that have 
+    to be passed to command line when executing the program as follow:
+
+    .. code::
+
+        $Your_PC>python linear_reg.py file.csv --target --location --folds --ex_cols --plot 
+
+    where file.csv is the only mandatory argument, while others are optional and takes some default values,
+    that if they have to be modified you can write for example:
+
+    .. code::
+
+        $Your_PC>python linear_reg.py file.csv --folds 10  
+
+    :param filename: path to the CSV file containing the dataset or the name of the file if --location argument is passed 
+    :type filename: str
+    :param target: optional (default = AGE_AT_SCAN): Name of the column holding target values
+    :type target: str
+    :param location: optional: Location of the file, i.e. folder containing it 
+    :type location: str
+    :param folds: optional (>4, default 5):number of folds for cross-validation
+    :type folds: int
+    :param ex_cols: optional (default = 3): columns excluded when importing file
+    :type ex_cols: int
+    :param plot: optional (default = False): Show the plot of actual vs predicted brain age
+    :type plot: bool
+    :return: None
+
     """
+
     parser = argparse.ArgumentParser(description=
         'Linear regression predicting the age of patients from magnetic resonance imaging')
 
     parser.add_argument("filename",
-                         help="Name of the file that has to be analized")
+                         help="Name of the file that has to be analized if --location argument is"
+                        " passed. Otherwise pass to filename the absolutepath of the file")
     parser.add_argument("--target", default = "AGE_AT_SCAN",
-                        help="Name of the colums holding target values")
+                        help="Name of the column holding target values")
     parser.add_argument("--location",
                          help="Location of the file, i.e. folder containing it")
     parser.add_argument("--folds", type = int, default = 5,
@@ -119,12 +156,10 @@ def linear_reg_parsing():
 
     if args.folds > 4:
         try:
-            args.filename = abs_path(args.filename,
-                                          args.location) if args.location else args.filename
+            args.filename = abs_path(args.filename,args.location) if args.location else args.filename
             logger.info(f"Opening file : {args.filename}")
             features, targets = get_data(args.filename, args.target, args.ex_cols)
-            linear_reg(features, targets, args.epochs, args.folds,
-                       args.summary, args.history, args.plot)
+            linear_reg(features, targets, args.folds, args.plot)
         except FileNotFoundError:
             logger.error("File not found.")
     else:

diff --git a/docs/build/doctrees/API Reference.doctree b/docs/build/doctrees/API Reference.doctree
diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle