Skip to content

Commit

Permalink
modified: INSTALL.sh
Browse files Browse the repository at this point in the history
	modified:   README.md
	modified:   assemblatron.py
  • Loading branch information
J35P312 committed Jun 16, 2018
1 parent 09c34c3 commit dd39d4b
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 9 deletions.
6 changes: 6 additions & 0 deletions INSTALL.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,9 @@ pip install BESST

cd scripts
python setup.py build_ext --inplace

cd ..

git clone https://github.com/ablab/quast.git
cd quast
pip install -e .
26 changes: 20 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ The full workflow involves:

3: alignment of the contigs

4: compute assembly statistics
4: Quality control, using assemblatron stats or quast

5: variant calling

Expand All @@ -26,10 +26,9 @@ The assemblatron workflow is run through the following commands:

run the install script:
./INSTALL.sh
The install script will download and install BESST, fermikit, and tiddit.
The install script will download and install BESST, fermikit, and quast.
Dependencies:

vcftools
samtools
python 2.7
bwa
Expand Down Expand Up @@ -71,12 +70,23 @@ Assemblatron performs alignment using bwa mem. The output is printed to a file n
A bam file named <prefix>.bam will be produced

# Stats
compute various statistics of an assembly. THe output is printed to stdout.
compute various statistics of an assembly. The output is printed to stdout.

python assemblatron.py --stats <contigs_bam>
python assemblatron.py --stats --bam <contigs_bam>

The statistics include N50, L50, assembly size, and the number of contigs.

# Quast

The quality control may be performed using quast. Quast performs a more in-depth but slower analysis.

python assemblatron.py --quast --fasta <contigs_fasta> --output <output_folder>

python assemblatron.py --quast --fasta <contigs_fasta> --ref <reference.fasta> --output <output_folder>

The statistics module supports any number of fasta files. Type --help for more information.
NOTE: use absolute path for the output directory.

# SV
Call SV. Assemblatron will calssify variants as DEL, INV, BND (translocation or complex), INS, and TDUP.

Expand All @@ -102,7 +112,7 @@ Call indels and SNV using htsbox pileup (same as fermikit).

the output is printed to stdout

other options:
options:

-h, --help show this help message and exit
--bam BAM input bam (contigs)
Expand Down Expand Up @@ -140,3 +150,7 @@ Cite the components that you used, as well as the Assemblatron git hub page.
If you performed scaffolding, please cite the BESST paper:

https://github.com/ksahlin/BESST

For more info on QUAST:

https://github.com/ablab/quast
35 changes: 32 additions & 3 deletions assemblatron.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def assemble(args,wd):
parser.add_argument('--align' , help="align contigs to reference using bwa mem", required=False, action="store_true")
parser.add_argument('--fasta' , help="convert aligned contigs bam file to fasta", required=False, action="store_true")
parser.add_argument('--fastq' , help="convert bam to fastq", required=False, action="store_true")
parser.add_argument('--quast' , help="compute assembly stats using quast", required=False, action="store_true")

args, unknown = parser.parse_known_args()

Expand Down Expand Up @@ -79,6 +80,25 @@ def assemble(args,wd):
args= parser.parse_args()

stats.assembly_stats(args)
elif args.quast:

parser = argparse.ArgumentParser("""QUAST - quality control""")
parser.add_argument('--quast' , help="compute assembly stats using quast", required=False, action="store_true")
parser.add_argument('--contigs', nargs='*', help="input contigs (multiple assemblies are allowed)", required=True)
parser.add_argument('--ref',required = False,type=str, help="reference fasta")
parser.add_argument('--output',required = True,type=str, help="output folder")
parser.add_argument('--features',nargs='*',required = False,type=str, help="Feature BED/GFF file")
parser.add_argument('--len',default=100,type=int, help="minimum contig length (default= 100 bp)")
args= parser.parse_args()

quast="quast.py {} --output-dir {} --min-contig {}".format(" ".join(args.contigs),args.output,args.len)

if args.ref:
quast+=" -r {}".format(args.ref)

if args.features:
quast+=" -g {}".format(" ".join(args.features))
os.system(quast)

elif args.align:
parser = argparse.ArgumentParser("""Assemblatron align - align contigs to the reference using bwa mem""")
Expand Down Expand Up @@ -115,6 +135,8 @@ def assemble(args,wd):
parser.add_argument('--mem' , help="maximum mempry per thread (gigabytes)", type=int, default=4)
parser.add_argument('--iter' , help="Number of itterations (default = 500000)", type=int, default=500000)
parser.add_argument('--cores' ,type=int, default = 8, help="number of cores (default = 2)", required=False)
parser.add_argument('-q' ,type=int, help="minimum mapping quality for scaffolding", required=False)
parser.add_argument('-p' ,type=int, help="minimum number of read-pairs to create edge", required=False)
args= parser.parse_args()

args.prefix=args.filename
Expand All @@ -132,10 +154,17 @@ def assemble(args,wd):
os.system("samtools index {}".format(args.bam))

if args.rf:
os.system("runBESST -c {} -f {} -orientation rf -o {} --iter {}".format(args.contigs,args.bam,args.output,args.iter))
besst="runBESST -c {} -f {} -orientation rf -o {} -plots --iter {}".format(args.contigs,args.bam,args.output,args.iter)
else:
os.system("runBESST -c {} -f {} -orientation fr -o {} --iter {}".format(args.contigs,args.bam,args.output,args.iter))

besst="runBESST -c {} -f {} -orientation fr -o {} -plots --iter {}".format(args.contigs,args.bam,args.output,args.iter)

if args.q:
besst+= " --min_mapq {}".format(args.q)

if args.p:
besst+= " -e {}".format(args.p)

os.system(besst)
elif args.fastq:

parser = argparse.ArgumentParser("""Assemblatron fastq - converts bam to fastq using samtools""")
Expand Down

0 comments on commit dd39d4b

Please sign in to comment.