Skip to content

Commit

Permalink
Merge pull request #11 from bioinform/docker-update
Browse files Browse the repository at this point in the history
Docker update
  • Loading branch information
msahraeian authored Mar 6, 2018
2 parents 8dc3264 + f13c179 commit 8823db9
Show file tree
Hide file tree
Showing 11 changed files with 330 additions and 88 deletions.
35 changes: 20 additions & 15 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
FROM ubuntu:16.04


ENV R_VERSION 3.3.2
ENV SAMTOOLS_VERSION 1.2
ENV BEDTOOLS2_VERSION 2.24.0
ENV PYBEDTOOLS_VERSION 0.7.7
Expand All @@ -19,16 +19,18 @@ ENV BWA_VERSION 0.7.15
ENV SRA_VERSION 2.8.1
ENV COREUTILS_VERSION 8.25
ENV PIGZ_VERSION 2.3.1
ENV BBMAP_VERSION 37.28
ENV GMAP_VERSION 2017-02-15
ENV IDPFUSION_VERSION 1.1.1

RUN apt-get update && \
apt-get install -y --fix-missing build-essential zlib1g-dev unzip libncurses5-dev curl wget r-base r-base-dev python python-pip python-dev cmake libboost-all-dev libxml2-dev libcurl4-gnutls-dev software-properties-common apt-transport-https default-jre default-jdk less vim libtbb-dev
apt-get install -y --fix-missing build-essential zlib1g-dev unzip libncurses5-dev curl wget python python-pip python-dev cmake libboost-all-dev libxml2-dev libcurl4-gnutls-dev software-properties-common apt-transport-https default-jre default-jdk less vim libtbb-dev git

RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
RUN add-apt-repository 'deb [arch=amd64,i386] https://cran.rstudio.com/bin/linux/ubuntu xenial/'
RUN apt-get update
RUN apt-get install -y --fix-missing r-base r-base-dev
RUN apt-get install -y --fix-missing r-base=${R_VERSION}-1xenial0 r-recommended=${R_VERSION}-1xenial0
RUN apt-get install -y --fix-missing --allow-downgrades r-base-core=${R_VERSION}-1xenial0

RUN echo 'local({r <- getOption("repos"); r["CRAN"] <- "http://cran.r-project.org"; options(repos=r)})' > ~/.Rprofile
RUN R -e 'source("http://bioconductor.org/biocLite.R"); biocLite("DESeq2"); biocLite("tximport"); biocLite("readr");'
Expand Down Expand Up @@ -87,16 +89,16 @@ RUN cd /opt && unzip bowtie2-${BOWTIE2_VERSION}-linux-x86_64.zip && cp -p /opt/b
RUN wget https://sourceforge.net/projects/bio-bwa/files/bwa-${BWA_VERSION}.tar.bz2/download -O /opt/bwa-${BWA_VERSION}.tar.bz2
RUN cd /opt && tar xjf bwa-${BWA_VERSION}.tar.bz2 && cd bwa-${BWA_VERSION} && make && cp -p /opt/bwa-${BWA_VERSION}/bwa /usr/local/bin && cd /opt && rm -rf bwa*

ADD https://github.com/ndaniel/seqtk/archive/1.0-r82b.tar.gz /opt/seqtk-1.0-r82b.tar.gz
RUN cd /opt && tar -zxvf /opt/seqtk-1.0-r82b.tar.gz && cd seqtk-1.0-r82b && make && cp -p /opt/seqtk-1.0-r82b/seqtk /usr/local/bin && cd /opt && rm -rf seqtk*
ADD https://github.com/ndaniel/seqtk/archive/1.2-r101c.tar.gz /opt/seqtk-1.2-r101c.tar.gz
RUN cd /opt && tar -zxvf /opt/seqtk-1.2-r101c.tar.gz && cd seqtk-1.2-r101c && make && cp -p /opt/seqtk-1.2-r101c/seqtk /usr/local/bin && cd /opt && rm -rf seqtk*

ADD http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/blat/blat /usr/local/bin/blat
RUN chmod 755 /usr/local/bin/blat

ADD http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/faToTwoBit /usr/local/bin/faToTwoBit
RUN chmod 755 /usr/local/bin/faToTwoBit

ADD http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64.v287/liftOver /usr/local/bin/liftOver
ADD http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/liftOver /usr/local/bin/liftOver
RUN chmod 755 /usr/local/bin/liftOver

ADD https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/${SRA_VERSION}/sratoolkit.${SRA_VERSION}-ubuntu64.tar.gz /opt/sratoolkit.${SRA_VERSION}-ubuntu64.tar.gz
Expand All @@ -105,25 +107,28 @@ RUN cd /opt && tar -zxvf sratoolkit.${SRA_VERSION}-ubuntu64.tar.gz && cp -Rp /op
ADD http://ftp.gnu.org/gnu/coreutils/coreutils-${COREUTILS_VERSION}.tar.xz /opt/coreutils-${COREUTILS_VERSION}.tar.xz
RUN cd /opt && tar -xJf coreutils-${COREUTILS_VERSION}.tar.xz && cd coreutils-${COREUTILS_VERSION} && ./configure FORCE_UNSAFE_CONFIGURE=1 && make && make install && cd /opt && rm -rf coreutils*

ADD http://pkgs.fedoraproject.org/repo/pkgs/pigz/pigz-${PIGZ_VERSION}.tar.gz/e803f8bc0770c7a5e96dccb1d2dd2aab/pigz-${PIGZ_VERSION}.tar.gz /opt/pigz-${PIGZ_VERSION}.tar.gz
ADD https://github.com/madler/pigz/archive/v${PIGZ_VERSION}.tar.gz /opt/pigz-${PIGZ_VERSION}.tar.gz
RUN cd /opt && tar -zxvf pigz-${PIGZ_VERSION}.tar.gz && cd pigz-${PIGZ_VERSION} && make && cp -p /opt/pigz-${PIGZ_VERSION}/pigz /usr/local/bin && cd /opt && rm -rf pigz*

ADD http://research-pub.gene.com/gmap/src/gmap-gsnap-${GMAP_VERSION}.tar.gz /opt/gmap-gsnap-${GMAP_VERSION}.tar.gz
RUN cd /opt && tar -zxvf gmap-gsnap-${GMAP_VERSION}.tar.gz && cd gmap-${GMAP_VERSION} && ./configure && make && make install && cd /opt && rm -rf gmap*

RUN wget https://github.com/ndaniel/fusioncatcher/archive/master.zip -O /opt/fusioncatcher-master.zip && cd /opt && unzip fusioncatcher-master.zip && cp -p /opt/fusioncatcher-master/bin/sam2psl.py /usr/local/bin
ENV PATH $PATH:/opt/fusioncatcher-master/bin/
RUN wget https://sourceforge.net/projects/bbmap/files/BBMap_${BBMAP_VERSION}.tar.gz -O /opt/BBMap_${BBMAP_VERSION}.tar.gz
RUN cd /opt && tar -xzvf BBMap_${BBMAP_VERSION}.tar.gz
ENV PATH $PATH:/opt/bbmap/

RUN cd /opt/ && git clone https://github.com/ndaniel/fusioncatcher.git && cd fusioncatcher && git checkout 60bddd2f1ddfd95a2dbc6c4efa3c521bea3421da && cp -p /opt/fusioncatcher/bin/sam2psl.py /usr/local/bin
ENV PATH $PATH:/opt/fusioncatcher/bin/

ADD http://ccb.jhu.edu/software/stringtie/dl/gffread-0.9.12.Linux_x86_64.tar.gz opt/gffread-0.9.12.Linux_x86_64.tar.gz
RUN cd /opt && tar -xzvf gffread-0.9.12.Linux_x86_64.tar.gz && cp -p /opt/gffread-0.9.12.Linux_x86_64/gffread /usr/local/bin && rm -rf /opt/gffread*

ADD https://github.com/bioinform/IDP/archive/master.zip /opt/IDP-master.zip
RUN rm -rf /usr/local/bin/IDP* && cd /opt && unzip IDP-master.zip && rm -rf /opt/IDP-master.zip
RUN cd /opt/ && git clone https://github.com/bioinform/IDP.git && cd IDP && git checkout a5d2d624ab8e4545feff3f51d264931b440d0b53

ADD https://www.healthcare.uiowa.edu/labs/au/IDP-fusion/files/IDP-fusion_${IDPFUSION_VERSION}.tar.gz /opt/IDP-fusion_${IDPFUSION_VERSION}.tar.gz
RUN cd /opt && tar -xzvf IDP-fusion_${IDPFUSION_VERSION}.tar.gz && rm -rf /opt/IDP-fusion_${IDPFUSION_VERSION}.tar.gz


RUN pip install https://github.com/bioinform/RNACocktail/archive/v0.2.tar.gz
RUN pip install https://github.com/bioinform/rnacocktail/archive/v0.2.2.tar.gz

VOLUME /work_dir



16 changes: 9 additions & 7 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -150,17 +150,19 @@ <h2>A comprehensive framework for accurate and efficient RNA-Seq analysis</h2>

</div>

<h2>Publication [Open access]</h2>
<h2>Publication</h2>

<div class="panel panel-default" style="font-family:monospace;">
<div class="panel-body">
<!--
<i>If you use RNACocktail in your work, please cite the following:</i><br>
??<br>
<b>??</b><br>
??
-->
Manuscript in preparation.
Sayed Mohammad Ebrahim Sahraeian, Marghoob Mohiyuddin, Robert Sebra, Hagen Tilgner,
Pegah T. Afshar, Kin Fai Au, Narges Bani Asadi, Mark B. Gerstein, Wing Hung Wong,
Michael P. Snyder, Eric Schadt, and Hugo Y. K. Lam<br>
<b>Gaining comprehensive biological insight into the transcriptome by performing a broad-spectrum RNA-seq analysis</b><br>
Nature Communications 8, Article number: 59 (2017). <a
href="http://dx.doi.org/10.1038/s41467-017-00050-4"
onclick="trackOutboundLink('http://dx.doi.org/10.1038/s41467-017-00050-4'); return false;">doi:10.1038/s41467-017-00050-4
</a>
</div>
</div>

Expand Down
2 changes: 1 addition & 1 deletion src/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.2.1"
__version__ = "0.2.2"
24 changes: 22 additions & 2 deletions src/run_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,17 @@ def tx2gene_map(ref_gtf_file,tx2gene_file):
spamwriter.writerows(tx2gene.items())
return tx2gene


def fix_quant_file(quant_file,fixed_quant_file):
cnt=0
with open(quant_file) as q_f:
with open(fixed_quant_file,"w'") as fixed_q_f:
for line in q_f:
if cnt>0:
fields=line.strip().split("\t")
fields[0]=fields[0].split("|")[0]
line="\t".join(fields)+"\n"
fixed_q_f.write(line)
cnt+=1

def run_deseq2(quant_files="", alignments="",
transcripts_gtfs="", ref_gtf="",
Expand Down Expand Up @@ -140,6 +150,16 @@ def run_deseq2(quant_files="", alignments="",
msg="compute gene level abundances for %s."%samples_txt
if start<=step:
logger.info("--------------------------STEP %s--------------------------"%step)

fixed_quant_files=[]
for i,qs in enumerate(quant_files):
fixed_qs=[]
for j,q in enumerate(qs):
fixed_q = os.path.join(work_deseq2, "{}.fixed_quant.sf".format(samples[i][j]))
fix_quant_file(q,fixed_q)
fixed_qs.append(fixed_q)
fixed_quant_files.append(fixed_qs)

command = "%s -e \"library('readr'); library('tximport'); \
samples=c(%s); (files <- file.path(c(%s))); names(files) <- samples; \
tx2gene <- read.csv(file.path('%s'),sep='\\t'); \
Expand All @@ -150,7 +170,7 @@ def run_deseq2(quant_files="", alignments="",
write.table(txi$length, file = '%s/txi.length', quote = FALSE, \
sep='\\t'); write.table(txi$counts, file = '%s/txi.counts',\
quote = FALSE, sep='\\t');\""%(R, ",".join(map(lambda x: "'%s'"%x,reduce(lambda x,y:x+y,samples)))
,",".join(map(lambda x: "'%s'"%x,reduce(lambda x,y:x+y,quant_files)))
,",".join(map(lambda x: "'%s'"%x,reduce(lambda x,y:x+y,fixed_quant_files)))
,tx2gene_file, work_deseq2, work_deseq2, work_deseq2, work_deseq2)
cmd = TimedExternalCmd(command, logger, raise_exception=True)
retcode = cmd.run(cmd_log_fd_out=deseq2_log_fd, cmd_log=deseq2_log, msg=msg, timeout=timeout)
Expand Down
4 changes: 2 additions & 2 deletions src/run_lr_correct.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ def run_lordec(kmer=23,
lordec_log_fd = open(lordec_log, "w")
ksps = ""

if "--threads " not in lordec_opts:
lordec_opts += " --T %d"%nthreads
if "-T " not in lordec_opts:
lordec_opts += " -T %d"%nthreads


msg = "LoRDEC for %s"%sample
Expand Down
2 changes: 1 addition & 1 deletion src/run_lr_fusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def sort_gpd(in_file,out_file,order_chrs=dict([("%s"%k,k) for k in range(1,23)]+
others_chrs=sorted(set(map(lambda x:x[2],rows))-set(order_chrs.keys()))
if others_chrs:
max_id=max(order_chrs.values())
for i,c in enumerated(others_chrs):
for i,c in enumerate(others_chrs):
order_chrs[c]=max_id+i+1
sorted_rows=sorted(rows,key=lambda x: (order_chrs[x[2]],int(x[4])))
with open(out_file, 'wb') as csvfile:
Expand Down
2 changes: 1 addition & 1 deletion src/run_lr_reconstruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def sort_gpd(in_file,out_file,order_chrs=dict([("%s"%k,k) for k in range(1,23)]+
others_chrs=sorted(set(map(lambda x:x[2],rows))-set(order_chrs.keys()))
if others_chrs:
max_id=max(order_chrs.values())
for i,c in enumerated(others_chrs):
for i,c in enumerate(others_chrs):
order_chrs[c]=max_id+i+1
sorted_rows=sorted(rows,key=lambda x: (order_chrs[x[2]],int(x[4])))
with open(out_file, 'wb') as csvfile:
Expand Down
Binary file added test/GRCh38.21.gpd.gz
Binary file not shown.
Binary file added test/GRCh38_genes_pos.bed.gz
Binary file not shown.
Binary file added test/GRCh38_strand_pos.bed.gz
Binary file not shown.
Loading

0 comments on commit 8823db9

Please sign in to comment.