Skip to content

Commit

Permalink
Merge branch 'main' into jacopo
Browse files Browse the repository at this point in the history
  • Loading branch information
valeriocaporioniunipi authored May 16, 2024
2 parents 7b1519e + 5b4d9f9 commit 341fbc4
Show file tree
Hide file tree
Showing 7 changed files with 310 additions and 139 deletions.
Binary file modified code/__pycache__/abspath.cpython-311.pyc
Binary file not shown.
Binary file modified code/__pycache__/csvreader.cpython-311.pyc
Binary file not shown.
4 changes: 2 additions & 2 deletions code/abspath.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import os


def AbsolutePath(local_filename, data_folder):
def abs_path(local_filename, data_folder):
"""
AbsolutePath gets the absolute path of the file given the name of the folder containing the data
abs_path gets the absolute path of the file given the name of the folder containing the data
and the name of the file inside that folder and assuming that the repository contains a data folder
and a code folder.
Expand Down
37 changes: 29 additions & 8 deletions code/csvreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@
import pandas as pd
from loguru import logger


def GetData(csv_file, column_name=None, show_flag=False):

def csv_reader(csv_file, column_name=None, show_flag=False):
"""
GetData allows to read the data from a CSV file and converts them into a NumPy array.
csv_reader allows to read the data from a CSV file and converts them into a NumPy array.
It can also show the entire dataset as a Pandas dataframe on terminal
or show a single column of the data table.
the GetData function does not show the dataframe, unless specified by changing show_flag argument.
The csv_reader function does not show the dataframe, unless specified by changing show_flag argument.
:param csvfile: path to the CSV file
:type csvfile: str
Expand All @@ -21,7 +19,7 @@ def GetData(csv_file, column_name=None, show_flag=False):
:type show_flag: bool
:return: the function returns a multidimensional numpy array if no column_name is passed as argument, otherwise it returns a unidimensional numpy array
:rtype: numpy.ndarray
"""
df = pd.read_csv(csv_file, delimiter=';')
if column_name is None:
Expand All @@ -34,6 +32,29 @@ def GetData(csv_file, column_name=None, show_flag=False):
print(df[column_name])
return np.array(df[column_name].values)

def get_data(filename, target_name, ex_cols = 0):
"""
get_data obtains the features and target arrays
Arguments:
- filename (str): name of the file which data are read from
- target_name (str): name of the column of the csv file that contains targets
- ex_cols (int): optional, default = 0. Excluded columns
Return:
- features (ndarray): array of features
- targets (ndarray): array of targets
"""
logger.info(f'Reading data from file {os.path.basename(filename)}, with {target_name} as target column ')
features = csv_reader(filename)[:, ex_cols:]
targets = csv_reader(filename, target_name)

# Checking if the first dimension of features matches the length of targets
if len(features) != len(targets):
logger.error("Number of samples in features and targets do not match")

return features, targets


def main():
parser = argparse.ArgumentParser(description="CSV Reader - A tool to read CSV files with Pandas.")
Expand All @@ -46,12 +67,12 @@ def main():

try:
if args.command == "show":
GetData(args.filename, show_flag=True)
csv_reader(args.filename, show_flag=True)
elif args.command == "show_column":
if not args.column:
parser.error("The '--column' argument is required for 'show_column' command.")
else:
GetData(args.filename, args.column, show_flag=True)
csv_reader(args.filename, args.column, show_flag=True)
except FileNotFoundError as e:
logger.error("File not found", e)

Expand Down
62 changes: 37 additions & 25 deletions code/gaussian_reg.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

from abspath import AbsolutePath
from csvreader import GetData
from abspath import abs_path
from csvreader import get_data

def GaussRegression(filename, n_splits, ex_cols=0, plot_flag=False):
def gaussian_reg(filename, n_splits, ex_cols=0, plot_flag=False):
"""
GaussRegression performs a gaussian regression with k-fold cross-validation
on the given dataset and prints evaluation metrics of the gaussian regression model.
gaussian_reg performs a Gaussian regression with k-fold cross-validation on the given dataset
and prints evaluation metrics of the gaussian regression model.
:param filename: path to the CSV file containing the dataset
:type filename: str
Expand All @@ -29,8 +29,8 @@ def GaussRegression(filename, n_splits, ex_cols=0, plot_flag=False):
"""
# Loading data...
#Importing features excluded first three columns: FILE_ID, AGE_AT_SCAN, SEX
x = GetData(filename)[:, ex_cols:]
y = GetData(filename, "AGE_AT_SCAN")
x = get_data(filename)[:, ex_cols:]
y = get_data(filename, "AGE_AT_SCAN")

# Standardize features
scaler = StandardScaler()
Expand Down Expand Up @@ -95,30 +95,42 @@ def GaussRegression(filename, n_splits, ex_cols=0, plot_flag=False):
# Show the plot
plt.show()

def main():
parser = argparse.ArgumentParser(description='Gaussian regression with k-fold cross-validation predicting the age of patients from magnetic resonance imaging')

parser.add_argument("filename", help="Name of the file that has to be analyzed")
parser.add_argument("--location", help="Location of the file, i.e. folder containing it")
parser.add_argument("--n_splits", type=int, default=5, help="Number of folds for k-folding cross-validation")
parser.add_argument("--ex_cols", type = int, default=3, help="Number of columns excluded when importing data")
parser.add_argument("--plot", action='store_true', help="Show the plot of actual vs predicted brain age")
def gaussian_reg_parsing():
"""
Parsing from terminal
"""
parser = argparse.ArgumentParser(description=
'Gaussian regression predicting the age of patients from magnetic resonance imaging')

parser.add_argument("filename",
help="Name of the file that has to be analized")
parser.add_argument("--target", default = "AGE_AT_SCAN",
help="Name of the colums holding target values")
parser.add_argument("--location",
help="Location of the file, i.e. folder containing it")
parser.add_argument("--folds", type = int, default = 5,
help="Number of folds in the k-folding (>4, default 5)")
parser.add_argument("--ex_cols", type = int, default = 3,
help="Number of columns excluded when importing (default 3)")
parser.add_argument("--plot", action="store_true",
help="Show the plot of actual vs predicted brain age")

args = parser.parse_args()

if args.n_splits > 4:
if args.folds > 4:
try:
if not args.location:
GaussRegression(args.filename, n_splits=args.n_splits, ex_cols = args.ex_cols, plot_flag=args.plot)
else:
args.filename = AbsolutePath(args.filename, args.location)
GaussRegression(args.filename, n_splits=args.n_splits, ex_cols = args.ex_cols, plot_flag=args.plot)
args.filename = abs_path(args.filename,
args.location) if args.location else args.filename
logger.info(f"Opening file : {args.filename}")
features, targets = get_data(args.filename, args.target, args.ex_cols)
gaussian_reg(features, targets, args.epochs, args.folds,
args.summary, args.history, args.plot)
except FileNotFoundError:
logger.error("File not found.")
return None
else:
logger.error("Invalid number of folds: at least 5 folds required")
else:
logger.error("Invalid number of folds: at least 5 folds required.")


if __name__ == "__main__":
main()
gaussian_reg_parsing()

60 changes: 36 additions & 24 deletions code/linear_reg.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

from abspath import AbsolutePath
from csvreader import GetData
from abspath import abs_path
from csvreader import get_data

def LinRegression(filename, n_splits, ex_cols=0, plot_flag=False):
def linear_reg(filename, n_splits, ex_cols=0, plot_flag=False):

"""
LinRegression performs linear regression with k-fold cross-validation on the
linear_reg performs linear regression with k-fold cross-validation on the
given dataset and prints evaluation metrics of the linear regression model.
:param filename: path to the CSV file containing the dataset
Expand All @@ -30,8 +30,8 @@ def LinRegression(filename, n_splits, ex_cols=0, plot_flag=False):
"""
# Loading data...
#Importing features excluded first three columns: FILE_ID, AGE_AT_SCAN, SEX
x = GetData(filename)[:, ex_cols:]
y = GetData(filename, "AGE_AT_SCAN")
x = get_data(filename)[:, ex_cols:]
y = get_data(filename, "AGE_AT_SCAN")

# Standardize features
scaler = StandardScaler()
Expand Down Expand Up @@ -95,29 +95,41 @@ def LinRegression(filename, n_splits, ex_cols=0, plot_flag=False):
# Show the plot
plt.show()

def main():
parser = argparse.ArgumentParser(description='Linear regression with k-fold cross-validation predicting the age of patients from magnetic resonance imaging')

parser.add_argument("filename", help="Name of the file that has to be analyzed")
parser.add_argument("--location", help="Location of the file, i.e. folder containing it")
parser.add_argument("--ex_cols", type = int, default = 3, help="Number of columns excluded when importing data")
parser.add_argument("--n_splits", type=int, default=5, help="Number of folds for k-folding cross-validation")
parser.add_argument("--plot", action='store_true', help="Show the plot of actual vs predicted brain age")
def linear_reg_parsing():
"""
Parsing from terminal
"""
parser = argparse.ArgumentParser(description=
'Linear regression predicting the age of patients from magnetic resonance imaging')

parser.add_argument("filename",
help="Name of the file that has to be analized")
parser.add_argument("--target", default = "AGE_AT_SCAN",
help="Name of the colums holding target values")
parser.add_argument("--location",
help="Location of the file, i.e. folder containing it")
parser.add_argument("--folds", type = int, default = 5,
help="Number of folds in the k-folding (>4, default 5)")
parser.add_argument("--ex_cols", type = int, default = 3,
help="Number of columns excluded when importing (default 3)")
parser.add_argument("--plot", action="store_true",
help="Show the plot of actual vs predicted brain age")

args = parser.parse_args()

if args.n_splits > 4:
if args.folds > 4:
try:
if not args.location:
LinRegression(args.filename, n_splits=args.n_splits, ex_cols = args.ex_cols, plot_flag=args.plot)
else:
args.filename = AbsolutePath(args.filename, args.location)
LinRegression(args.filename, n_splits=args.n_splits, ex_cols = args.ex_cols, plot_flag=args.plot)
args.filename = abs_path(args.filename,
args.location) if args.location else args.filename
logger.info(f"Opening file : {args.filename}")
features, targets = get_data(args.filename, args.target, args.ex_cols)
linear_reg(features, targets, args.epochs, args.folds,
args.summary, args.history, args.plot)
except FileNotFoundError:
logger.error("File not found.")
return None
else:
logger.error("Invalid number of folds: at least 5 folds required")
else:
logger.error("Invalid number of folds: at least 5 folds required.")


if __name__ == "__main__":
main()
linear_reg_parsing()
Loading

0 comments on commit 341fbc4

Please sign in to comment.