fastqc abyss qiime

added autogenerated script for fastqc and multiqc created qiime2 and abyss workflow summary cmds
Marysteph · Nov 20, 2018 · f142148 · f142148
1 parent e526ba5
commit f142148
Show file tree

Hide file tree

Showing 3 changed files with 114 additions and 0 deletions.
diff --git a/abyss.sh b/abyss.sh
@@ -32,3 +32,4 @@ abyss-pe j=4 np=4 name='${OUTPUT_DIR}dataset_1_abyss_k_$size' k=${size} in='$REA
 
 sbatch $OUTPUT_DIR/slurm_scripts/$size'_abyss_slurm.sh'
 done
+abyss-pe name='cowpea_abyss_61' k=61 in='/home/smaranga/cowpea_data/SRR37167*_1.fastq.gz \/home/smaranga/cowpea_data/SRR37167*_1.fastq.gz' ulimit -v 512000000
diff --git a/fast_multiqc.sh b/fast_multiqc.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+#specify paths to your working directory and your reads directory
+WORKING_DIR='/home/dkiambi/Qiime2_limidb/raw_reads/'
+
+cd $WORKING_DIR
+
+#the reads are in pairs in folders, iterate over each pair
+for file in $(ls -I "*.sh" -I "*.txt" )
+	do
+
+	OUTPUT_DIR=${file}/fastq_out
+#generate a unique slurm script for ec folder	
+echo \
+"#!/bin/bash -e
+#SBATCH -p batch
+#SBATCH -n 4
+#SBATCH -o $OUTPUT_DIR/fastq.%N.%j.out
+#SBATCH -e $OUTPUT_DIR/fastq.%N.%j.err
+#SBATCH --mail-user=D.Kaimenyi@cgiar.org
+#SBATCH --mail-type=END,FAIL
+
+# automatically load module abyss
+
+module load fastqc
+
+fastqc -o $OUTPUT_DIR -t 10 $file/*.gz " > $OUTPUT_DIR/$file'_fastqc_slurm.sh'
+
+sbatch $OUTPUT_DIR/$file'_fastqc_slurm.sh'
+done
+
+
+####
+#!/bin/bash
+
+#specify paths to your working directory and your reads directory
+WORKING_DIR='/home/dkiambi/Qiime2_limidb/raw_reads/'
+
+cd $WORKING_DIR
+
+#the reads are in pairs in folders, iterate over each pair
+for file in $(ls -I "*.sh" -I "*.txt" )
+	do
+
+	OUTPUT_DIR=${file}/fastq_out
+#generate a unique slurm script for ec folder	
+echo \
+"#!/bin/bash -e
+#SBATCH -p batch
+#SBATCH -n 4
+#SBATCH -o $OUTPUT_DIR/fastq.%N.%j.out
+#SBATCH -e $OUTPUT_DIR/fastq.%N.%j.err
+#SBATCH --mail-user=D.Kaimenyi@cgiar.org
+#SBATCH --mail-type=END,FAIL
+
+# automatically load module abyss
+
+module load multiqc
+cd $OUTPUT_DIR
+
+multiqc . " > $OUTPUT_DIR/$file'_fastqc_slurm.sh'
+
+sbatch $OUTPUT_DIR/$file'_fastqc_slurm.sh'
+done
diff --git a/qiime2.bash b/qiime2.bash
@@ -0,0 +1,49 @@
+#create this manifest file with vim conatining absolute filepath to the sequences and their orientation
+sample-id,absolute-filepath,direction
+# Lines starting with '#' are ignored and can be used to create
+# "comments" or even "comment out" entries
+sample-1,/home/dkiambi/qiime2_tuto/data/S20_S20_L001_R1_001.fastq,forward
+sample-2,/home/dkiambi/qiime2_tuto/data/S27_S27_L001_R1_001.fastq,forward
+sample-1,/home/dkiambi/qiime2_tuto/data/S20_S20_L001_R2_001.fastq,reverse
+sample-2,/home/dkiambi/qiime2_tuto/data/S27_S27_L001_R2_001.fastq,reverse
+## end of file
+
+
+# import the data into the qiime 2 env using the manifest file created above
+qiime tools import --type 'SampleData[PairedEndSequencesWithQuality]' --input-path $PWD/manifest --output-path $PWD/paired.qza --source-format PairedEndFastqManifestPhred33
+
+# # join the paired ends
+qiime vsearch join-pairs --i-demultiplexed-seqs paired.qza --o-joined-sequences jioned_paired.qza
+
+# generate a summary of the jioned_paired.qza artifact
+qiime demux summarize --i-data jioned_paired.qza --o-visualization demux-joined.qzv
+
+# quality control to our sequences
+qiime quality-filter q-score-joined --i-demux jioned_paired.qza --o-filtered-sequences jioned_paired_filtered.qza --o-filter-stats jioned_paired_filter-stats.qza
+
+# do one of the 2 step below their output is similar; the do the same thing 
+
+# denoise your sequences with Dada2
+qiime dada2 denoise-paired --i-demultiplexed-seqs paired.qza --o-table table.qza --p-trunc-len-f 150 --p-trunc-len-r 150 --o-representative-sequences rep-seqs.qza --o-denoising-stats denoising-stats.qza
+
+# denoise in deblur 
+qiime deblur denoise-16S --i-demultiplexed-seqs jioned_paired_filtered.qza --p-trim-length 150 --o-representative-sequences rep-seqs.qza --o-table table.qza --p-sample-stats --o-stats deblur-stats.qza
+## view your .qzv files 
+qiime tools view demux.qzv
+
+# Download classifier that has been pretrained on GreenGenes database with 99% OTUs 
+wget -O "gg-13-8-99-515-806-nb-classifier.qza" "https://data.qiime2.org/2018.2/common/gg-13-8-99-515-806-nb-classifier.qza"
+
+#getting feature biom file
+qiime tools export table.qza --output-dir ./
+
+# convert it to tsv and view it
+biom convert -i feature-table.biom -o feature-table.tsv --to-tsv
+biom head -i feature-table.tsv # reduced view of the data 
+head feature-table.tsv
+
+# assign taxonomy to your sequences
+qiime feature-classifier classify-sklearn --i-classifier gg-13-8-99-515-806-nb-classifier.qza --i-reads rep-seqs.qza --o-classification taxonomy.qza
+
+#getting taxonomy biom file 
+qiime tools export taxonomy.qza --output-dir ./ # the output is a tsv
Original file line number	Diff line number	Diff line change
Expand Up		@@ -32,3 +32,4 @@ abyss-pe j=4 np=4 name='${OUTPUT_DIR}dataset_1_abyss_k_$size' k=${size} in='$REA

		sbatch $OUTPUT_DIR/slurm_scripts/$size'_abyss_slurm.sh'
		done
		abyss-pe name='cowpea_abyss_61' k=61 in='/home/smaranga/cowpea_data/SRR37167_1.fastq.gz \/home/smaranga/cowpea_data/SRR37167_1.fastq.gz' ulimit -v 512000000