Skip to content

Commit

Permalink
Merge pull request #1389 from ashishsingh18/PerfusionPCATrain
Browse files Browse the repository at this point in the history
Perfusion PCA updates
  • Loading branch information
ashishsingh18 authored Jan 11, 2021
2 parents 0de24ae + ca7bc7b commit 9597bad
Show file tree
Hide file tree
Showing 10 changed files with 971 additions and 263 deletions.
455 changes: 323 additions & 132 deletions src/applications/PerfusionPCA.cpp

Large diffs are not rendered by default.

167 changes: 142 additions & 25 deletions src/applications/PerfusionPCA.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,17 @@ std::vector<std::map<CAPTK::ImageModalityType, std::string>> LoadQualifiedSubjec

int main(int argc, char **argv)
{
std::cout << "This functionality has been removed from this CaPTk release, and we are actively working on an optimized robust implementation that should enable generalization in multi-institutional data. We expect this to be released in our next patch release, in Q4 2020.\n";
return EXIT_FAILURE;
cbica::CmdParser parser = cbica::CmdParser(argc, argv, "PerfusionPCA");
parser.addRequiredParameter("i", "input", cbica::Parameter::STRING, "", "The input directory.");
parser.addRequiredParameter("t", "type", cbica::Parameter::INTEGER, "", "The option of preparing a new model (=0), and for testing on an existing model (=1)");
parser.addRequiredParameter("n", "number of PCAs", cbica::Parameter::FLOAT, "", "The number of principal components.");
parser.addOptionalParameter("m", "model", cbica::Parameter::STRING, "", "The directory having PCA models");
parser.addRequiredParameter("t", "type", cbica::Parameter::INTEGER, "", "The option of extracting a new PCA parameters (=0), and for applying existing PCA parameters (=1)");
parser.addOptionalParameter("n", "number of PCAs", cbica::Parameter::FLOAT, "", "The number of principal components.");
parser.addOptionalParameter("m", "model", cbica::Parameter::STRING, "", "The directory having PCA parameters");
parser.addOptionalParameter("vt", "variance threshold", cbica::Parameter::FLOAT, "", "The variance threshold");
parser.addOptionalParameter("dif", "dump intermediate files", cbica::Parameter::BOOLEAN, "", "Write intermediate file containing perfusion data for whole population.");
parser.addRequiredParameter("o", "output", cbica::Parameter::STRING, "", "The output directory.");
parser.addOptionalParameter("L", "Logger", cbica::Parameter::STRING, "log file which user has write access to", "Full path to log file to store console outputs", "By default, only console output is generated");
parser.addExampleUsage("-t 0 -i C:/properly/formatted/inputDir -o C:/outputDir -n 5", "Trains a new model based on the samples in inputDir");
parser.addExampleUsage("-t 1 -i C:/input -m C:/model -o C:/output -m C:/modelDir -n 5", "Tests an existing model for inputs in 'C:/input' based on 'C:/modelDir' ");
parser.addExampleUsage("-t 0 -i C:/properly/formatted/inputDir -o C:/outputDir -n 5", "Extracts new PCA parameters based on the samples in inputDir");
parser.addExampleUsage("-t 1 -i C:/input -o C:/output -m C:/modelDir -n 5", "Tests existing PCA parameters for inputs in 'C:/input' based on 'C:/modelDir' ");
parser.addApplicationDescription("Perfusion PCA Training and Prediction application");

// parameters to get from the command line
Expand All @@ -71,12 +71,43 @@ int main(int argc, char **argv)
applicationType = 0;

float inputPCs = 0;
float varianceThreshold = 0.0;
std::string inputFileName, inputMaskName, outputDirectoryName, modelDirectoryName;
bool nPCsDefined = false;
bool varianceThresholdDefined = false;
bool dif = false;

//get params supplied by user
parser.getParameterValue("i", inputFileName);
parser.getParameterValue("n", inputPCs);
parser.getParameterValue("o", outputDirectoryName);
parser.getParameterValue("t", applicationType);

if(parser.isPresent("dif"))
{
parser.getParameterValue("dif", dif);
}
else
dif = false;

if (parser.isPresent("n"))
{
nPCsDefined = true;
parser.getParameterValue("n", inputPCs);
}

if (parser.isPresent("vt"))
{
varianceThresholdDefined = true;
parser.getParameterValue("vt", varianceThreshold);
}

//if user provides both n and vt, then don't run app
//put a msg to provide any one
if (nPCsDefined && varianceThresholdDefined)
{
std::cout << "Please provide either the number of principal components or the variance threshold and re-run the application." << std::endl;
return EXIT_FAILURE;
}

if (parser.isPresent("m"))
{
Expand All @@ -89,43 +120,61 @@ int main(int argc, char **argv)
logger.UseNewFile(loggerFile);
}

std::cout << "Input File:" << inputFileName << std::endl;
std::cout << "Output Directory:" << outputDirectoryName << std::endl;

//check if input dir exists
if (!cbica::isDir(inputFileName))
{
std::cout << "The input directory does not exist:" << inputFileName << std::endl;
return EXIT_FAILURE;
}

//check if output dir exists
if (!cbica::directoryExists(outputDirectoryName))
{
if (!cbica::createDirectory(outputDirectoryName))
std::cout << "The output directory can not be created:" << outputDirectoryName << std::endl;
return EXIT_FAILURE;
}

std::vector<std::map<CAPTK::ImageModalityType, std::string>> QualifiedSubjects = LoadQualifiedSubjectsFromGivenDirectoryForPCA(inputFileName);
PerfusionPCA object_pca; //pca algorithm object

if (QualifiedSubjects.size() == 0)
//sort and arrange input data
object_pca.SortValidSubjectsFromGivenDirectory(inputFileName);

//check if input has valid subjects
if(!object_pca.HasValidSubjects())
{
std::cout << "There is no subject with the required input in the given directory." << std::endl;
return EXIT_FAILURE;
}
std::cout << "Number of subjects with the required input: " << QualifiedSubjects.size() << std::endl;
PerfusionPCA object_pca;

//load the input dataset
std::string inValidSubject;
if (object_pca.LoadData(inValidSubject) == PerfusionPCA::MismatchedTimePoints)
{
std::string msg = "Could not load data. Please check that all input data has the same number of time points. Look at file: " + inValidSubject;
std::cout << msg << std::endl;
return EXIT_FAILURE;
}

if (applicationType == CAPTK::MachineLearningApplicationSubtype::TESTING)
{
std::cout << "Model directory name:" << modelDirectoryName << std::endl;
//check if PCA parameter dir exists
if (!cbica::directoryExists(modelDirectoryName))
{
std::cout << "The model directory does not exist:" << modelDirectoryName << std::endl;
return EXIT_FAILURE;
}
if (!cbica::fileExists(modelDirectoryName +"/PCA_PERF.csv") || !cbica::fileExists(modelDirectoryName + "/Mean_PERF.csv"))

//check if PCA parameter dir contains the required files
if (!cbica::fileExists(modelDirectoryName +"/PCA_PERF.csv") ||
!cbica::fileExists(modelDirectoryName + "/Mean_PERF.csv") ||
!cbica::fileExists(modelDirectoryName + "/PCCumulativeVariance.csv"))
{
std::cout << "The model files PCA_PERF.csv and Mean_PERF.csv do not exist in the model directory:" << modelDirectoryName << std::endl;
std::cout << "The required files PCA_PERF.csv, Mean_PERF.csv and PCCumulativeVariance.csv do not exist in the PCA parameter directory:" << modelDirectoryName << std::endl;
return EXIT_FAILURE;
}

// check if version is compatible
if (cbica::isFile(modelDirectoryName + "/VERSION.yaml"))
{
if (!cbica::IsCompatible(modelDirectoryName + "/VERSION.yaml"))
Expand All @@ -134,18 +183,86 @@ int main(int argc, char **argv)
return EXIT_FAILURE;
}
}
object_pca.ApplyExistingPCAModel(inputPCs, inputFileName, outputDirectoryName, QualifiedSubjects,modelDirectoryName);

//when both number of PCA images and variance threshold are not provided by the user,
//ask the user to provide any one
if (!nPCsDefined && !varianceThresholdDefined)
{
std::cout << "Please provide either the number of principal components or the variance threshold and re-run the application." << std::endl;
return EXIT_FAILURE;
}

//pass params to object
if (nPCsDefined)
object_pca.SetNumberOfPCs(inputPCs);
else if (varianceThresholdDefined)
object_pca.SetVarianceThreshold(varianceThreshold);

//call the algorithm
std::cout << " Applying extracted PCA parameters." << std::endl;
PerfusionPCA::ErrorCode code = object_pca.ApplyExistingPCAModel(inputPCs, inputFileName, outputDirectoryName/*, QualifiedSubjects*/,modelDirectoryName);
if (code == PerfusionPCA::ErrorCode::MismatchedTimePoints)
{
std::cout << "Number of time points in the input does not match with the pca parameters. Cannot proceed." << std::endl;
return EXIT_FAILURE;
}
else if (code == PerfusionPCA::ErrorCode::NoError)
{
std::cout << "principal components have been saved at the specified locations.\n";
std::cout << "Finished successfully.\n";
return EXIT_SUCCESS;
}
}
else if (applicationType == CAPTK::MachineLearningApplicationSubtype::TRAINING)
object_pca.TrainNewPerfusionModel(inputPCs, inputFileName, outputDirectoryName, QualifiedSubjects);
{
//if user doesn't provide n or vt, give a msg stating that we will provide only PCA parameters
// and not the PCA images and that the run may take quite some time and request for confirmation
// press y to continue or n to exit(and provide n or vt).
if (!nPCsDefined && !varianceThresholdDefined)
{
std::cout << "You did not provide the number of principal components or the variance threshold. \
We will provide only the PCA parameters and not any PCA images. The application will take a long time to run. \
Do you want to continue? Press 'y' to contine or 'n' (and press Enter) to exit and provide either of the parameters.";
char ch = cin.get();
if (ch == 'y')
{
nPCsDefined = true;
inputPCs = 0; //we don't provide any PCA images
}
else
{
std::cout << "Please provide either the number of principal components or the variance threshold and re-run the application." << std::endl;
return EXIT_FAILURE;
}
}

//pass params to object
if (nPCsDefined)
object_pca.SetNumberOfPCs(inputPCs);
else if (varianceThresholdDefined)
object_pca.SetVarianceThreshold(varianceThreshold);
//we won't have a situation here that both nPCsDefined and varianceThresholdDefined
//are defined. This is handled upstream.

object_pca.RequestPerfusionDataWholePopulation(dif);//dump intermediate files or not

//calling the algorithm
std::cout << " Extracting PCA parameters." << std::endl;
if (object_pca.TrainNewPerfusionModel(inputPCs, inputFileName, outputDirectoryName))
{
std::cout << "principal components have been saved at the specified locations.\n";
std::cout << "Finished successfully.\n";
return EXIT_SUCCESS;
}
else
{
std::cout << "Something went wrong during the calculation. Please contact software@cbica.upenn.edu \n";
return EXIT_FAILURE;
}
}
else
{
parser.echoVersion();
return EXIT_SUCCESS;
}

std::cout<<"principal components have been saved at the specified locations.\n";
std::cout << "Finished successfully.\n";

return EXIT_SUCCESS;
}
61 changes: 57 additions & 4 deletions src/applications/PerfusionPCA.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ class PerfusionPCA
#endif
{
public:

enum ErrorCode
{
IncorrectInputStructure = 0,
MismatchedTimePoints,
NoError
};

cbica::Logging logger;
//! Default constructor
PerfusionPCA()
Expand All @@ -60,19 +68,64 @@ class PerfusionPCA
/*template<class ImageTypeFloat4D, class ImageTypeFloat3D>
std::vector<typename ImageTypeFloat3D::Pointer> Run(typename ImageTypeFloat3D::Pointer maskImagePointerNifti, typename ImageTypeFloat4D::Pointer perfImagePointerNifti);
*/

bool TrainNewPerfusionModel(const int number, const std::string inputdirectory, const std::string outputdirectory,std::vector<std::map<CAPTK::ImageModalityType, std::string>> QualifiedSubjects);
bool ApplyExistingPCAModel(const int number, const std::string inputdirectory, const std::string outputdirectory, std::vector<std::map<CAPTK::ImageModalityType, std::string>> QualifiedSubjects,const std::string ModelDirectoryName);
/*
\brief Load Perfusion and Segmentation images into Perfusion Data Map
\return ErrorCode Indicating the status of load operation
\return inValidSubject If unable to load, path of the invalid subject
*/
ErrorCode LoadData(std::string &inValidSubject);

//Extract PCA Parameters
bool TrainNewPerfusionModel(const int number, const std::string inputdirectory, const std::string outputdirectory);

//Apply extracted PCA paramters
ErrorCode ApplyExistingPCAModel(const int number, const std::string inputdirectory, const std::string outputdirectory,const std::string ModelDirectoryName);

//Load perfusion data into vector and matrix
template<class PerfusionImageType, class ImageType>
VariableSizeMatrixType LoadPerfusionData(typename ImageType::Pointer maskImagePointerNifti, typename PerfusionImageType::Pointer perfImagePointerNifti, std::vector< typename ImageType::IndexType> &indices);

PerfusionMapType CombineAndCalculatePerfusionPCA(PerfusionMapType PerfusionDataMap, VariableSizeMatrixType &TransformationMatrix, VariableLengthVectorType &MeanVector, vtkSmartPointer<vtkTable> &ReducedPCAs);
PerfusionMapType CombineAndCalculatePerfusionPCA(PerfusionMapType PerfusionDataMap, VariableSizeMatrixType &TransformationMatrix, VariableLengthVectorType &MeanVector, vtkSmartPointer<vtkTable> &ReducedPCAs, vtkSmartPointer<vtkDoubleArray> &variance);
VariableSizeMatrixType ColumnWiseScaling(VariableSizeMatrixType inputdata);

PerfusionMapType CombineAndCalculatePerfusionPCAForTestData(PerfusionMapType PerfusionDataMap, VariableSizeMatrixType &TransformationMatrix, VariableLengthVectorType &MeanVector);

//write vtk array
void WritevtkArray(vtkDoubleArray* inputdata, std::string filepath);

//determine # PCs
int DetermineNumberOfPCs(vtkSmartPointer<vtkDoubleArray> variance);

//setter for Variance theshold
void SetVarianceThreshold(float threshold);

//setter for number of PCA images to produce
void SetNumberOfPCs(int pcs);

//setter to request intermediate perfusion data for whole population
void RequestPerfusionDataWholePopulation(bool request);

//sort and arrange valid input data
void SortValidSubjectsFromGivenDirectory(const std::string directoryname);

//check if there are any valid subjects in input data
bool HasValidSubjects();
private:

int m_TotalTimePoints = 0;

//map of subject id to tuple of vector of mask indices and matrix of perfusion pixel data
//size of perfusion data map is equal to the number of subjects
PerfusionMapType m_PerfusionDataMap;

float m_VarianceThreshold = 0.0;
int m_NumberOfPCs = 0;
bool m_VarianceThresholdDefined = false;
bool m_NumberOfPCsDefined = false;
bool m_PerfusionDataForWholePopulationRequested = false;

//vector of valid subjects in input
std::vector<std::map<CAPTK::ImageModalityType, std::string>> m_ValidSubjectList;
};

template<class PerfusionImageType, class ImageType>
Expand Down
Loading

0 comments on commit 9597bad

Please sign in to comment.