Skip to content

Commit

Permalink
Updated quandenser injections
Browse files Browse the repository at this point in the history
  • Loading branch information
MatthewThe committed Jul 3, 2020
1 parent 20e3d17 commit fe30040
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 77 deletions.
15 changes: 11 additions & 4 deletions Quandenser_pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ do
# Check if user wants to install local scripts
read_config_location="true"
elif [ "$read_config_location" = "true" ]; then
read_config_location="false"
read_config_location="changed"
if [[ -d $var ]]; then # If it is an existing path, set it
config_location=$(realpath "$var")
else
Expand Down Expand Up @@ -231,23 +231,30 @@ fi

if [ "$gui_free_installation" == "true" ]; then # Check if user wants to install local scripts and exit
mkdir -p ./out
#base_url="$SCRIPTDIR/dependencies/ui/config/"

#base_url="/home/matthewt/storage/quandenser-pipeline/dependencies/ui/config/"
#cp $base_url/run_quandenser.sh ./
#cp $base_url/run_quandenser.nf ./
#cp $base_url/nextflow ./
#cp $base_url/nf.config ./out/

base_url="https://raw.githubusercontent.com/statisticalbiotechnology/quandenser-pipeline/master/dependencies/ui/config/"
wget -q $base_url/run_quandenser.sh
wget -q $base_url/run_quandenser.nf
wget -q $base_url/nextflow
wget -qO ./out/nf.config $base_url/nf.config

chmod a+x run_quandenser.sh
chmod a+x nextflow
if [ ! -f file_list.txt ]; then
echo -e "file1.mzML\tcase\nfile2.mzML\tcase\nfile3.mzML\tcontrol\nfile4.mzML\tcontrol" > file_list.txt
fi

sed -i 's|CONFIG_LOCATION="/home/$USER/.quandenser_pipeline"|CONFIG_LOCATION=`pwd`|g' run_quandenser.sh
if [ "$read_config_location" == "changed" ]; then
sed -i "s|CONFIG_LOCATION=\"\"|CONFIG_LOCATION=\"${config_location}\"|g" run_quandenser.sh
else
sed -i 's|CONFIG_LOCATION=""|CONFIG_LOCATION=`pwd`|g' run_quandenser.sh
fi
sed -i 's|OUTPUT_PATH=""|export OUTPUT_PATH=`pwd`/out|g' run_quandenser.sh
if [ "$use_docker" == "true" ]; then
sed -i 's|USE_DOCKER="false"|USE_DOCKER="true"|g' run_quandenser.sh
Expand All @@ -260,7 +267,7 @@ if [ "$gui_free_installation" == "true" ]; then # Check if user wants to install

printf "${YELLOW}Installed scripts in current working directory.${RESET}"
printf "${YELLOW}Please configure scripts by editing out/nf.config and run_quandenser.sh.${RESET}"
printf "${YELLOW}You will also need to edit the tab-delimited file, file_list.txt, specifying"
printf "${YELLOW}You will also need to edit the tab-delimited file, file_list.txt, specifying "
printf "the files you want to process and their sample group.${RESET}"
printf "${YELLOW}You will then be able to run the pipleline with run_quandenser.sh${RESET}"
exit 0
Expand Down
1 change: 1 addition & 0 deletions build_image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
# Note, You can change the tmpdir to another location
#sudo SINGULARITY_TMPDIR=/tmp singularity build SingulQuand.SIF Singularity

rsync -azP /home/matthewt/quandenser/ quandenser
docker build -t matthewthe/quandenser-pipeline-i-agree-to-the-vendor-licenses:latest .
Empty file modified dependencies/ui/config/nextflow_alignment_tree_test/run.sh
100644 → 100755
Empty file.
146 changes: 73 additions & 73 deletions dependencies/ui/config/run_quandenser.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,43 +14,6 @@ if (params.resume_directory != "") {

file_def = file(params.batch_file) // batch_file

// change the path to where the database is and the batch file is if you are only doing msconvert
if( params.workflow == "MSconvert" || params.workflow == "Quandenser" ) {
db_file = params.batch_file // Will not be used, so junk name
} else {
db_file = params.db
}
db = file(db_file) // Sets "db" as the file defined above
seq_index_name = "${db.getName()}.index" // appends "index" to the db filename

if (params.workflow == "MSconvert") {
spectra_in = Channel.from(1) // This prevents name collision crash

Channel // non-mzML files with proper labeling which will be converted
.from(file_def.readLines())
.map { it -> it.tokenize('\t') }
.filter { it.size() > 1 } // filters any input that is not <path> <X>
.map { it -> file(it[0]) }
.into { spectra_convert }
} else {
// Preprocessing file_list
Channel // mzML files with proper labeling
.from(file_def.readLines())
.map { it -> it.tokenize('\t') }
.filter { it.size() > 1 } // filters any input that is not <path> <label>
.filter { it[0].tokenize('.')[-1] == "mzML" } // filters any input that is not .mzML
.map { it -> file(it[0]) }
.into { spectra_in } // Puts the files into spectra_in

Channel // non-mzML files with proper labeling which will be converted
.from(file_def.readLines())
.map { it -> it.tokenize('\t') }
.filter { it.size() > 1 } // filters any input that is not <path> <X>
.filter{ it[0].tokenize('.')[-1] != "mzML" } // filters any input that is .mzML
.map { it -> file(it[0]) }
.into { spectra_convert }
}

// Replaces the lines of non-mzML with their corresponding converted mzML counterpart
count = 0
amount_of_non_mzML = 0
Expand Down Expand Up @@ -88,6 +51,40 @@ for( line in all_lines ){
println("Total files = " + total_files)
//println("Files that will be converted = " + amount_of_non_mzML)

Channel // non-mzML files with proper labeling which will be converted
.from(file_def.readLines())
.map { it -> it.tokenize('\t') }
.filter { it.size() > 1 } // filters any input that is not <path> <X>
.map { it -> file(it[0]) }
.merge ( Channel.from(1..total_files) )
.set{ file_list }

// change the path to where the database is and the batch file is if you are only doing msconvert
if( params.workflow == "MSconvert" || params.workflow == "Quandenser" ) {
db_file = params.batch_file // Will not be used, so junk name
} else {
db_file = params.db
}
db = file(db_file) // Sets "db" as the file defined above
seq_index_name = "${db.getName()}.index" // appends "index" to the db filename

if (params.workflow == "MSconvert") {
spectra_in = Channel.from(1) // This prevents name collision crash

file_list.set { spectra_convert }
} else {
file_list.into { file_list_mzML; file_list_non_mzML }

// Preprocessing file_list
file_list_mzML
.filter { it[0].getExtension() == "mzML" } // filters any input that is not .mzML
.set { spectra_in } // Puts the files into spectra_in

file_list_non_mzML // non-mzML files with proper labeling which will be converted
.filter{ it[0].getExtension() != "mzML" } // filters any input that is .mzML
.set { spectra_convert }
}

file_params = file("$publish_output_path/params.txt")
file_params << "$params" + '\n' // need to add \n

Expand All @@ -113,9 +110,9 @@ process msconvert {
containerOptions "$params.custom_mounts"
maxForks params.parallel_msconvert_max_forks
input:
file f from spectra_convert_channel
set file(f), val(file_idx) from spectra_convert_channel
output:
file("converted/*") into spectra_converted
set file("converted/*"), val(file_idx) into spectra_converted
script:
"""
mkdir -p converted
Expand All @@ -134,9 +131,9 @@ if ( params.boxcar_convert == true) {
}
containerOptions "$params.custom_mounts"
input:
file('mzML/*') from combined_channel.collect()
set file('mzML/*'), val(file_idx) from combined_channel.collect()
output:
file("mzML/boxcar_converted/*") into boxcar_channel
set file("mzML/boxcar_converted/*"), val(file_idx) into boxcar_channel
script:
"""
python -s /usr/local/bin/boxcar_converter.py mzML/ ${params.boxcar_convert_additional_arguments} 2>&1 | tee -a stdout.txt
Expand All @@ -147,10 +144,10 @@ if ( params.boxcar_convert == true) {
}

// Clone channel we created to use in multiple processes (one channel per process, no more)
boxcar_channel.flatten().into {
combined_channel_normal
combined_channel_parallel_1
combined_channel_parallel_2
boxcar_channel.into {
mzml_input_non_parallel
mzml_input_parallel_1
mzml_input_parallel_2
}

// Normal, non-parallel process
Expand All @@ -162,7 +159,7 @@ process quandenser {
containerOptions "$params.custom_mounts"
input:
file 'list.txt' from file_def
file('mzML/*') from combined_channel_normal.collect()
file('mzML/*') from mzml_input_non_parallel.collect()
file('Quandenser_output_resume') from resume_directory // optional
output:
file("Quandenser_output/consensus_spectra/**") into spectra_normal includeInputs true
Expand All @@ -179,34 +176,37 @@ process quandenser {
"""
}

process quandenser_parallel_1 { // About 3 min/run
process quandenser_parallel_1_dinosaur { // About 3 min/run
// Parallel 1: Take 1 file, run it throught dinosaur. Exit when done. Parallel process
if( params.publish_quandenser == true ){
publishDir publish_output_path, mode: 'copy', overwrite: true, pattern: "Quandenser_output/dinosaur/*"
publishDir publish_output_path, mode: 'copy', overwrite: false, pattern: "Quandenser_output/dinosaur/*"
}
containerOptions "$params.custom_mounts"
maxForks params.parallel_quandenser_max_forks // Defaults to infinite
errorStrategy 'retry' // If actor cell does something stupid, this should retry it once on clusters when the time runs out
input:
file 'list.txt' from file_def
file('mzML/*') from combined_channel_parallel_1
set file('mzML/*'), val(file_idx) from mzml_input_parallel_1
each prev_tmp from Channel.fromPath("${params.output_path}/work/tmp_${params.random_hash}")
file('Quandenser_output_resume') from resume_directory // optional
output:
file "Quandenser_output/dinosaur/*" into quandenser_out_1_to_2_dinosaur includeInputs true
file "Quandenser_output/dinosaur/*" into quandenser_out_dinosaur includeInputs true
when:
(params.workflow == "Full" || params.workflow == "Quandenser") && params.parallel_quandenser == true
script:
"""
if [ -n "${params.resume_directory}" ]; then
mkdir -p Quandenser_output/dinosaur
mzML_file=\$(basename -s .mzML mzML/*)
cp \$(pwd)/Quandenser_output_resume/dinosaur/\$mzML_file* Quandenser_output/dinosaur/
cp -as \$(pwd)/Quandenser_output_resume/dinosaur/\$mzML_file* Quandenser_output/dinosaur/
if [ -e "\$(pwd)/Quandenser_output_resume/dinosaur/features.${file_idx-1}.dat" ]; then
cp -as \$(pwd)/Quandenser_output_resume/dinosaur/features.${file_idx-1}.dat Quandenser_output/dinosaur/
fi
echo "FILE IS \$mzML_file"
fi
cp -L list.txt modified_list.txt # Need to copy not link, but a copy of file which I can modify
filename=\$(find mzML/* | xargs basename)
sed -i "/\$filename/!d" modified_list.txt
python -s /usr/local/bin/command_wrapper.py 'quandenser --batch modified_list.txt --parallel-1 true ${params.quandenser_additional_arguments} 2>&1 | tee -a stdout.txt'
rm -rf Quandenser_output/tmp
ln -s ${prev_tmp} Quandenser_output/tmp
python -s /usr/local/bin/command_wrapper.py 'quandenser --batch list.txt --partial-1-dinosaur ${file_idx} ${params.quandenser_additional_arguments} 2>&1 | tee -a stdout.txt'
"""
}

Expand Down Expand Up @@ -250,17 +250,17 @@ if (params.parallel_quandenser == false){



process quandenser_parallel_2 {
process quandenser_parallel_2_maracluster {
// Parallel 2: Take all dinosaur files and run maracluster. Exit when done. Non-parallel process
if( params.publish_quandenser == true ){
publishDir publish_output_path, mode: 'copy', overwrite: true, pattern: "Quandenser_output/maracluster/*"
publishDir publish_output_path, mode: 'copy', overwrite: true, pattern: "Quandenser_output/dinosaur/*"
publishDir publish_output_path, mode: 'copy', overwrite: false, pattern: "Quandenser_output/maracluster/*"
}
containerOptions "$params.custom_mounts"
input:
file 'list.txt' from file_def
file('mzML/*') from combined_channel_parallel_2.collect()
file('Quandenser_output/dinosaur/*') from quandenser_out_1_to_2_dinosaur.collect()
file('mzML/*') from mzml_input_parallel_2.collect()
file('Quandenser_output/dinosaur/*') from quandenser_out_dinosaur.collect()
each prev_tmp from Channel.fromPath("${params.output_path}/work/tmp_${params.random_hash}")
file('Quandenser_output_resume') from resume_directory // optional
output:
file "Quandenser_output/*" into quandenser_out_2_to_3, quandenser_out_2_to_4 includeInputs true
Expand All @@ -274,7 +274,9 @@ process quandenser_parallel_2 {
mkdir -p Quandenser_output
cp -asf \$(pwd)/Quandenser_output_resume/* Quandenser_output/
fi
quandenser --batch list.txt --parallel-2 true ${params.quandenser_additional_arguments} 2>&1 | tee -a stdout.txt
rm -rf Quandenser_output/tmp
ln -s ${prev_tmp} Quandenser_output/tmp
quandenser --batch list.txt --partial-2-maracluster ${params.quandenser_additional_arguments} 2>&1 | tee -a stdout.txt
"""
}

Expand All @@ -290,30 +292,28 @@ This is then piped to a function that spawns the exact amount of processes
needed for the next round.
*/
class Tree {
private int processed_alignments = -1; // set processed_alignments to be 0 after the dummy round
private int processed_alignments_current_round = 0;
private int current_round = -1; // Dummy round to get tree started
private int current_round = 0; // Dummy round to get tree started
private Map tree_map;
public int num_rounds;

int initialize(tree) {
tree_map = tree.countBy { it }; // create map with number of alignments per round

num_rounds = tree_map.size();
tree_map[-1] = 0; // Dummy round to get tree started
tree_map[num_rounds] = 0; // Dummy round at end of the tree, prevents nextRound() to fail after last round
tree_map[0] = 0; // Dummy round to get tree started
tree_map[num_rounds+1] = 0; // Dummy round at end of the tree, prevents nextRound() to fail after last round
return 0;
}

int alignmentFinished() {
processed_alignments++;
return ++processed_alignments_current_round;
}

List nextRound() {
current_round++;
processed_alignments_current_round = 0;
return [processed_alignments] * tree_map[current_round];
return [current_round] * tree_map[current_round];
}

int currentRoundAlignments() {
Expand Down Expand Up @@ -355,7 +355,7 @@ if (params.parallel_quandenser_tree == true && params.parallel_quandenser == tru
processing_tree_changed = processing_tree.collect()
}

process quandenser_parallel_3 { // About 3 min/run
process quandenser_parallel_3_match_features { // About 3 min/run
// Parallel 3: matchFeatures. Parallel
containerOptions "$params.custom_mounts"
maxForks params.parallel_quandenser_max_forks // Defaults to infinite
Expand Down Expand Up @@ -395,14 +395,14 @@ process quandenser_parallel_3 { // About 3 min/run
ln -s ${prev_percolator} Quandenser_output/percolator
rm -rf Quandenser_output/tmp
ln -s ${prev_tmp} Quandenser_output/tmp
python -s /usr/local/bin/command_wrapper.py 'quandenser --batch list.txt --parallel-3 ${feedback_val + 1} ${params.quandenser_additional_arguments} 2>&1 | tee -a stdout.txt'
python -s /usr/local/bin/command_wrapper.py 'quandenser --batch list.txt --partial-3-match-round ${feedback_val} ${params.quandenser_additional_arguments} 2>&1 | tee -a stdout.txt'
"""
}

process quandenser_parallel_4 {
process quandenser_parallel_4_consensus {
// Parallel 4: Run through maracluster extra features. Non-parallel
if( params.publish_quandenser == true ){
publishDir publish_output_path, mode: 'copy', overwrite: true, pattern: "Quandenser_output/*"
publishDir publish_output_path, mode: 'copy', overwrite: false, pattern: "Quandenser_output/*"
}
containerOptions "$params.custom_mounts"
input:
Expand All @@ -422,7 +422,7 @@ process quandenser_parallel_4 {
ln -s ${prev_percolator} Quandenser_output/percolator # Create link to publishDir
rm -rf Quandenser_output/tmp
ln -s ${prev_tmp} Quandenser_output/tmp
quandenser --batch list.txt --parallel-4 true ${params.quandenser_additional_arguments} 2>&1 | tee -a stdout.txt
quandenser --batch list.txt --partial-4-consensus ${params.quandenser_additional_arguments} 2>&1 | tee -a stdout.txt
"""
}

Expand All @@ -443,7 +443,7 @@ process tide_search {
containerOptions "$params.custom_mounts"
input:
file 'seqdb.fa' from db
file ms2_files from spectra.collect() // PS: Sensitive to naming (no blankspace, paranthesis and such)
file ms2_files from spectra.collect() // N.B.: Sensitive to naming (no blankspace, parenthesis and such)
output:
file("crux-output/*") into id_files
file("${seq_index_name}") into index_files
Expand Down

0 comments on commit fe30040

Please sign in to comment.