From dd39d4b2d08614df4d159e7b1023e9165159562b Mon Sep 17 00:00:00 2001 From: J35P312 Date: Sat, 16 Jun 2018 17:09:21 +0200 Subject: [PATCH] modified: INSTALL.sh modified: README.md modified: assemblatron.py --- INSTALL.sh | 6 ++++++ README.md | 26 ++++++++++++++++++++------ assemblatron.py | 35 ++++++++++++++++++++++++++++++++--- 3 files changed, 58 insertions(+), 9 deletions(-) diff --git a/INSTALL.sh b/INSTALL.sh index 7d083f8..7c21cef 100755 --- a/INSTALL.sh +++ b/INSTALL.sh @@ -8,3 +8,9 @@ pip install BESST cd scripts python setup.py build_ext --inplace + +cd .. + +git clone https://github.com/ablab/quast.git +cd quast +pip install -e . diff --git a/README.md b/README.md index 9d99cd2..46877e3 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ The full workflow involves: 3: alignment of the contigs - 4: compute assembly statistics + 4: Quality control, using assemblatron stats or quast 5: variant calling @@ -26,10 +26,9 @@ The assemblatron workflow is run through the following commands: run the install script: ./INSTALL.sh -The install script will download and install BESST, fermikit, and tiddit. +The install script will download and install BESST, fermikit, and quast. Dependencies: - vcftools samtools python 2.7 bwa @@ -71,12 +70,23 @@ Assemblatron performs alignment using bwa mem. The output is printed to a file n A bam file named .bam will be produced # Stats -compute various statistics of an assembly. THe output is printed to stdout. +compute various statistics of an assembly. The output is printed to stdout. -python assemblatron.py --stats +python assemblatron.py --stats --bam The statistics include N50, L50, assembly size, and the number of contigs. +# Quast + +The quality control may be performed using quast. Quast performs a more in-depth but slower analysis. + +python assemblatron.py --quast --fasta --output + +python assemblatron.py --quast --fasta --ref --output + +The statistics module supports any number of fasta files. Type --help for more information. +NOTE: use absolute path for the output directory. + # SV Call SV. Assemblatron will calssify variants as DEL, INV, BND (translocation or complex), INS, and TDUP. @@ -102,7 +112,7 @@ Call indels and SNV using htsbox pileup (same as fermikit). the output is printed to stdout -other options: +options: -h, --help show this help message and exit --bam BAM input bam (contigs) @@ -140,3 +150,7 @@ Cite the components that you used, as well as the Assemblatron git hub page. If you performed scaffolding, please cite the BESST paper: https://github.com/ksahlin/BESST + + For more info on QUAST: + + https://github.com/ablab/quast diff --git a/assemblatron.py b/assemblatron.py index 4c45e9e..207ef69 100644 --- a/assemblatron.py +++ b/assemblatron.py @@ -35,6 +35,7 @@ def assemble(args,wd): parser.add_argument('--align' , help="align contigs to reference using bwa mem", required=False, action="store_true") parser.add_argument('--fasta' , help="convert aligned contigs bam file to fasta", required=False, action="store_true") parser.add_argument('--fastq' , help="convert bam to fastq", required=False, action="store_true") +parser.add_argument('--quast' , help="compute assembly stats using quast", required=False, action="store_true") args, unknown = parser.parse_known_args() @@ -79,6 +80,25 @@ def assemble(args,wd): args= parser.parse_args() stats.assembly_stats(args) +elif args.quast: + + parser = argparse.ArgumentParser("""QUAST - quality control""") + parser.add_argument('--quast' , help="compute assembly stats using quast", required=False, action="store_true") + parser.add_argument('--contigs', nargs='*', help="input contigs (multiple assemblies are allowed)", required=True) + parser.add_argument('--ref',required = False,type=str, help="reference fasta") + parser.add_argument('--output',required = True,type=str, help="output folder") + parser.add_argument('--features',nargs='*',required = False,type=str, help="Feature BED/GFF file") + parser.add_argument('--len',default=100,type=int, help="minimum contig length (default= 100 bp)") + args= parser.parse_args() + + quast="quast.py {} --output-dir {} --min-contig {}".format(" ".join(args.contigs),args.output,args.len) + + if args.ref: + quast+=" -r {}".format(args.ref) + + if args.features: + quast+=" -g {}".format(" ".join(args.features)) + os.system(quast) elif args.align: parser = argparse.ArgumentParser("""Assemblatron align - align contigs to the reference using bwa mem""") @@ -115,6 +135,8 @@ def assemble(args,wd): parser.add_argument('--mem' , help="maximum mempry per thread (gigabytes)", type=int, default=4) parser.add_argument('--iter' , help="Number of itterations (default = 500000)", type=int, default=500000) parser.add_argument('--cores' ,type=int, default = 8, help="number of cores (default = 2)", required=False) + parser.add_argument('-q' ,type=int, help="minimum mapping quality for scaffolding", required=False) + parser.add_argument('-p' ,type=int, help="minimum number of read-pairs to create edge", required=False) args= parser.parse_args() args.prefix=args.filename @@ -132,10 +154,17 @@ def assemble(args,wd): os.system("samtools index {}".format(args.bam)) if args.rf: - os.system("runBESST -c {} -f {} -orientation rf -o {} --iter {}".format(args.contigs,args.bam,args.output,args.iter)) + besst="runBESST -c {} -f {} -orientation rf -o {} -plots --iter {}".format(args.contigs,args.bam,args.output,args.iter) else: - os.system("runBESST -c {} -f {} -orientation fr -o {} --iter {}".format(args.contigs,args.bam,args.output,args.iter)) - + besst="runBESST -c {} -f {} -orientation fr -o {} -plots --iter {}".format(args.contigs,args.bam,args.output,args.iter) + + if args.q: + besst+= " --min_mapq {}".format(args.q) + + if args.p: + besst+= " -e {}".format(args.p) + + os.system(besst) elif args.fastq: parser = argparse.ArgumentParser("""Assemblatron fastq - converts bam to fastq using samtools""")