Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
dottordepie committed May 16, 2024
1 parent 7de4a9e commit 908a695
Show file tree
Hide file tree
Showing 17 changed files with 302 additions and 95 deletions.
Binary file modified code/__pycache__/abspath.cpython-39.pyc
Binary file not shown.
Binary file modified code/__pycache__/csvreader.cpython-39.pyc
Binary file not shown.
Binary file modified code/__pycache__/gaussian_reg.cpython-39.pyc
Binary file not shown.
Binary file modified code/__pycache__/linear_reg.cpython-39.pyc
Binary file not shown.
Binary file modified code/__pycache__/neural_net.cpython-39.pyc
Binary file not shown.
47 changes: 37 additions & 10 deletions code/csvreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,15 @@ def get_data(filename, target_name, ex_cols = 0):
"""
get_data obtains the features and target arrays
Arguments:
- filename (str): name of the file which data are read from
- target_name (str): name of the column of the csv file that contains targets
- ex_cols (int): optional, default = 0. Excluded columns
Return:
- features (ndarray): array of features
- targets (ndarray): array of targets
:param filename: path to the CSV file with the data
:type filename: str
:param target_name: optional (default = None): name of the column of the csv file that contains targets
:type target_name: str
:param ex_cols: optional (default = 0): initial excluded columns
:type ex_cols: int
:return: numpy arrays of features and target
:rtype: numpy.ndarray, numpy.array
"""
logger.info(f'Reading data from file {os.path.basename(filename)}, with {target_name} as target column ')
features = csv_reader(filename)[:, ex_cols:]
Expand All @@ -56,7 +57,33 @@ def get_data(filename, target_name, ex_cols = 0):
return features, targets


def main():
def csv_reader_parsing():

"""
csv_reader_parsing allows to print the data from a csv file.
The parameters listed below are not parameters of the functions but are parsing arguments that have
to be passed to command line when executing the program as follow:
.. code::
Your_PC>python csvreader.py show/show_column csvfile_path --column
where first two are mandatory argument, while column is optional and if has to be modified,
that can be achieved with this notation in this example:
.. code::
Your_PC>python csvreader.py show C:/users/.../file.csv --column 4
:param command: can be 'show' or 'show_column'. Is used to decide to print or entire dataset or a single column
:type filename: str
:param filename: path to the CSV file
:type target_name: str
:param column: optional: name of the column to display (required for 'show_column' command)
:type column: str
:return: None
"""

parser = argparse.ArgumentParser(description="CSV Reader - A tool to read CSV files with Pandas.")

parser.add_argument("command", choices=["show", "show_column"], help="Choose the command to execute")
Expand All @@ -77,4 +104,4 @@ def main():
logger.error("File not found", e)

if __name__ == "__main__":
main()
csv_reader_parsing()
44 changes: 39 additions & 5 deletions code/gaussian_reg.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,14 @@

def gaussian_reg(filename, n_splits, ex_cols=0, plot_flag=False):
"""
gaussian_reg performs a Gaussian regression with k-fold cross-validation on the given dataset
and prints evaluation metrics of the gaussian regression model.
gaussian_reg performs gaussian regression with k-fold cross-validation on the
given dataset and prints evaluation metrics of the gaussian regression model
such as MAE (mean absolute error), MSE (mean squared error) and R-squared.
:param filename: path to the CSV file containing the dataset
:type filename: str
:param n_splits: number of folds for cross-validation
:type n_splits: int
:param ex_cols: optional (default = 0): number of folds for cross-validation
:type ex_cols: int
:param plot_flag: optional (default = False): Whether to plot the actual vs. predicted values
:type plot_flag: bool
:return: None
Expand Down Expand Up @@ -97,7 +96,42 @@ def gaussian_reg(filename, n_splits, ex_cols=0, plot_flag=False):

def gaussian_reg_parsing():
"""
Parsing from terminal
gaussian_reg function parsed that runs when the .py file is called.
It performs a gaussian regression with k-fold cross-validation
predicting the age of patients from magnetic resonance imaging and
prints evaluation metrics of the linear regression model
such as MAE (mean absolute error), MSE (mean squared error) and R-squared.
There are two ways to pass the csv file to this function. It's possible to
pass the absolutepath of the dataset or you can store the dataset in a brother folder
of the one containing code, and pass to the parsing function the filename and his container-folder.
The parameters listed below are not parameters of the functions but are parsing arguments that have
to be passed to command line when executing the program as follow:
.. code::
$Your_PC>python gaussian_reg.py file.csv --target --location --folds --ex_cols --plot
where file.csv is the only mandatory argument, while others are optional and takes some default values,
that if they have to be modified you can write for example:
.. code::
$Your_PC>python gaussian_reg.py file.csv --folds 10
:param filename: path to the CSV file containing the dataset or the name of the file if --location argument is passed
:type filename: str
:param target: optional (default = AGE_AT_SCAN): Name of the column holding target values
:type target: str
:param location: optional: Location of the file, i.e. folder containing it
:type location: str
:param folds: optional (>4, default 5):number of folds for cross-validation
:type folds: int
:param ex_cols: optional (default = 3): columns excluded when importing file
:type ex_cols: int
:param plot: optional (default = False): Show the plot of actual vs predicted brain age
:type plot: bool
:return: None
"""
parser = argparse.ArgumentParser(description=
'Gaussian regression predicting the age of patients from magnetic resonance imaging')
Expand Down
61 changes: 48 additions & 13 deletions code/linear_reg.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,31 @@
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from pathlib import Path

from abspath import abs_path
from csvreader import get_data

def linear_reg(filename, n_splits, ex_cols=0, plot_flag=False):
def linear_reg(features, target, n_splits, plot_flag=False):

"""
linear_reg performs linear regression with k-fold cross-validation on the
given dataset and prints evaluation metrics of the linear regression model.
given dataset and prints evaluation metrics of the linear regression model
such as MAE (mean absolute error), MSE (mean squared error) and R-squared.
:param filename: path to the CSV file containing the dataset
:type filename: str
:param n_splits: number of folds for cross-validation
:type n_splits: int
:param ex_cols: optional (default = 0): number of folds for cross-validation
:type ex_cols: int
:param plot_flag: optional (default = False): Whether to plot the actual vs. predicted values
:type plot_flag: bool
:return: None
"""
# Loading data...
#Importing features excluded first three columns: FILE_ID, AGE_AT_SCAN, SEX
x = get_data(filename)[:, ex_cols:]
y = get_data(filename, "AGE_AT_SCAN")
x = features
y = target

# Standardize features
scaler = StandardScaler()
Expand Down Expand Up @@ -97,15 +97,52 @@ def linear_reg(filename, n_splits, ex_cols=0, plot_flag=False):

def linear_reg_parsing():
"""
Parsing from terminal
linear_reg function parsed that runs when the .py file is called.
It performs a linear regression with k-fold cross-validation
predicting the age of patients from magnetic resonance imaging and
prints evaluation metrics of the linear regression model
such as MAE (mean absolute error), MSE (mean squared error) and R-squared.
There are two ways to pass the csv file to this function. It's possible to
pass the absolutepath of the dataset or you can store the dataset in a brother folder
of the one containing code, and pass to the parsing function the filename and his container-folder.
The parameters listed below are not parameters of the functions but are parsing arguments that have
to be passed to command line when executing the program as follow:
.. code::
$Your_PC>python linear_reg.py file.csv --target --location --folds --ex_cols --plot
where file.csv is the only mandatory argument, while others are optional and takes some default values,
that if they have to be modified you can write for example:
.. code::
$Your_PC>python linear_reg.py file.csv --folds 10
:param filename: path to the CSV file containing the dataset or the name of the file if --location argument is passed
:type filename: str
:param target: optional (default = AGE_AT_SCAN): Name of the column holding target values
:type target: str
:param location: optional: Location of the file, i.e. folder containing it
:type location: str
:param folds: optional (>4, default 5):number of folds for cross-validation
:type folds: int
:param ex_cols: optional (default = 3): columns excluded when importing file
:type ex_cols: int
:param plot: optional (default = False): Show the plot of actual vs predicted brain age
:type plot: bool
:return: None
"""

parser = argparse.ArgumentParser(description=
'Linear regression predicting the age of patients from magnetic resonance imaging')

parser.add_argument("filename",
help="Name of the file that has to be analized")
help="Name of the file that has to be analized if --location argument is"
" passed. Otherwise pass to filename the absolutepath of the file")
parser.add_argument("--target", default = "AGE_AT_SCAN",
help="Name of the colums holding target values")
help="Name of the column holding target values")
parser.add_argument("--location",
help="Location of the file, i.e. folder containing it")
parser.add_argument("--folds", type = int, default = 5,
Expand All @@ -119,12 +156,10 @@ def linear_reg_parsing():

if args.folds > 4:
try:
args.filename = abs_path(args.filename,
args.location) if args.location else args.filename
args.filename = abs_path(args.filename,args.location) if args.location else args.filename
logger.info(f"Opening file : {args.filename}")
features, targets = get_data(args.filename, args.target, args.ex_cols)
linear_reg(features, targets, args.epochs, args.folds,
args.summary, args.history, args.plot)
linear_reg(features, targets, args.folds, args.plot)
except FileNotFoundError:
logger.error("File not found.")
else:
Expand Down
Binary file modified docs/build/doctrees/API Reference.doctree
Binary file not shown.
Binary file modified docs/build/doctrees/environment.pickle
Binary file not shown.
Loading

0 comments on commit 908a695

Please sign in to comment.