Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev compute matrix strand 20220301 #1128

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 27 additions & 15 deletions .azure-pipelines/test-template.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,27 @@
steps:
- bash: conda create -n foo -q --yes -c conda-forge -c bioconda python=$(python.version) numpy scipy matplotlib==3.1.1 nose flake8 plotly pysam pyBigWig py2bit deeptoolsintervals
displayName: Installing dependencies
- bash: |
source activate foo
python -m pip install . --no-deps --ignore-installed -vvv
displayName: Installing deeptools
- bash: |
source activate foo
flake8 . --exclude=.venv,.build,build --ignore=E501,F403,E402,F999,F405,E722,W504,W605
displayName: flake8
- bash: |
source activate foo
nosetests --with-doctest -sv deeptools
displayName: Test deepTools
trigger:
branches:
include:
- '*'
pr:
branches:
include:
- '*'
jobs:
- job: install_deeptools_run_tests
pool:
vmImage: 'ubuntu-latest'
steps:
- bash: conda create -n foo -q --yes -c conda-forge -c bioconda python=$(python.version) numpy scipy matplotlib==3.1.1 nose flake8 plotly pysam pyBigWig py2bit deeptoolsintervals
displayName: Installing dependencies
- bash: |
source activate foo
python -m pip install . --no-deps --ignore-installed -vvv
displayName: Installing deeptools
- bash: |
source activate foo
flake8 . --exclude=.venv,.build,build --ignore=E501,F403,E402,F999,F405,E722,W504,W605
displayName: flake8
- bash: |
source activate foo
nosetests --with-doctest -sv deeptools
displayName: Test deepTools
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: Test
on: [push]
on: [push, pull_request]
jobs:
build-linux:
name: Test on Linux
Expand Down
1 change: 1 addition & 0 deletions deeptools/computeMatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def parse_arguments(args=None):
argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""
(This version should work for stranded bigWig files.)

This tool calculates scores per genome regions and prepares an intermediate file that can be used with ``plotHeatmap`` and ``plotProfiles``.
Typically, the genome regions are genes, but any other regions defined in a BED file can be used.
Expand Down
12 changes: 7 additions & 5 deletions deeptools/heatmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from deeptools.utilities import toString, toBytes, smartLabels
from deeptools.heatmapper_utilities import getProfileTicks

from deeptools.pyBigWigStranded import openBigWigGuessingStrandedness

old_settings = np.seterr(all='ignore')

Expand Down Expand Up @@ -364,8 +365,8 @@ def compute_sub_matrix_worker(self, chrom, start, end, score_file_list, paramete
# read BAM or scores file
score_file_handles = []
for sc_file in score_file_list:
score_file_handles.append(pyBigWig.open(sc_file))

score_file_handles.append(
openBigWigGuessingStrandedness(sc_file))
# determine the number of matrix columns based on the lengths
# given by the user, times the number of score files
matrix_cols = len(score_file_list) * \
Expand Down Expand Up @@ -531,7 +532,7 @@ def compute_sub_matrix_worker(self, chrom, start, end, score_file_list, paramete
for sc_handler in score_file_handles:
# We're only supporting bigWig files at this point
cov = heatmapper.coverage_from_big_wig(
sc_handler, feature_chrom, zones,
sc_handler, feature_chrom, feature_strand, zones,
parameters['bin size'],
parameters['bin avg type'],
parameters['missing data as zero'],
Expand Down Expand Up @@ -652,7 +653,7 @@ def change_chrom_names(chrom):
return chrom

@staticmethod
def coverage_from_big_wig(bigwig, chrom, zones, binSize, avgType, nansAsZeros=False, verbose=True):
def coverage_from_big_wig(bigwig, chrom, strand, zones, binSize, avgType, nansAsZeros=False, verbose=True):

"""
uses pyBigWig
Expand Down Expand Up @@ -716,7 +717,8 @@ def coverage_from_big_wig(bigwig, chrom, zones, binSize, avgType, nansAsZeros=Fa
endIdx += end - start
if start < end:
# This won't be the case if we extend off the front of a chromosome, such as (-100, 0)
values_array[startIdx:endIdx] = bigwig.values(chrom, start, end)
# jtb: also adding in strand
values_array[startIdx:endIdx] = bigwig.values(chrom, start, end, strand)
if end < region[1]:
startIdx = endIdx
endIdx += region[1] - end
Expand Down
17 changes: 17 additions & 0 deletions deeptools/parserCommon.py
Original file line number Diff line number Diff line change
Expand Up @@ -914,3 +914,20 @@ def __call__(self, parser, args, values, option_string=None):
raise argparse.ArgumentTypeError(msg)
setattr(args, self.dest, values)
return RequiredLength


def stranded_bigwig_file_pair():
"""Common arguments related to pairs of stranded bigWig files.
"""
parser = argparse.ArgumentParser(add_help=False)
group = parser.add_argument_group('Options for stranded pairs of bigWig files')

group.add_argument('--strandedBigWigSuffix',
help='If specified, then files ending with the '
'first suffix will be treated as the + strand '
'of a pair of bigWig files, and the second '
'suffix will be used for the - strand.',
default='.fwd.bw,.rev.bw',
type=str,
required=False)
return parser
143 changes: 143 additions & 0 deletions deeptools/pyBigWigStranded.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re

import numpy as np
import pyBigWig

class pyBigWig1:
"""Wrapper around pyBigWig to ignore strand.

This modifies some pyBigWig methods to expect (and ignore) strand
information. This way, callers can get data either from
a pyBigWig object, or a strandedPyBigWig object, without
knowing whether the file is stranded.

Possibly this module belongs in pyBigWig.

Also, this module (currently) requires numpy support in pyBigWig.
"""
def __init__(self, bigWigFile):
"""Constructor is a wrapper around pyBigWig.pyBigWig.
"""
self.bigWig = pyBigWig.open(bigWigFile)

def values(self, chrom, start, end, strand='+'):
"""Gets values at a region.

This is like pyBigWig.values(), except that expects the strand argument
(which it ignores).
??? possibly this should reverse values, if strand is '-' ?
"""
return self.bigWig.values(chrom, start, end)

def chroms(self, chroms=None):
"""This is just forwarded.
"""
if chroms:
return self.bigWig.chroms(chroms)
else:
return self.bigWig.chroms()


class strandedPyBigWig:
"""Wrapper for accessing pairs of 'stranded' bigWig files.

This has almost the same API as pyBigWig, except that
most methods include a 'strand' argument. When these are
called, this gets the result from the appropriate stranded file.

Note that that these methods return the absolute value of the
contents of the bigWig. This is because the - strand file is
sometimes stored negated. It seems simpler to just always
return the absolute value, than to add a flag indicating
whether the - strand file is stored negated.
"""

def __init__(self, plusStrandFile,
suffixPairs=['fwd.bw,rev.bw', 'plus.bw,minus.bw', 'pl.bw,mn.bw', 'p.bw,m.bw'],
minusStrandFile=None):
"""Constructor opens a pair of bigWig files for reading.

suffixPairs: these define the + and - filename pairing.
minusStrandFile: the minus strand file can also simply be given
explicitly (this overrides the name computed using suffixPairs)
"""
# if - strand file isn't given, attempt to compute it
if not minusStrandFile:
minusStrandFile = getMinusStrandFile(plusStrandFile, suffixPairs)
# if we still don't have a - strand filename, throw an error
if not minusStrandFile:
raise ValueError('couldn\'t compute - strand filename')
# open files
self.plusBigWig = pyBigWig1(plusStrandFile)
self.minusBigWig = pyBigWig1(minusStrandFile)

def chroms(self, chroms=None):
"""Gets chromosomes.

This just uses the + strand file's chromosomes (without
checking that they agree with the - strand file's).
"""
if chroms:
return self.plusBigWig.chroms(chroms)
else:
return self.plusBigWig.chroms()

def values(self, chrom, start, end, strand='+'):
"""Gets values at a region specified by an object.

This will get values from plusBigWig or minusBigWig,
depending on the strand.
Returns: absolute value of values at that region.
If strand is '-', these will be in descending coordinate
order; otherwise they'll be in ascending coordinate order.
"""
# get the relevant bigWig object
bigWig = self.minusBigWig if strand == '-' else self.plusBigWig
# forward this call to that object, taking absolute value
return np.abs(bigWig.values(chrom, start, end, strand))

def getMinusStrandFile(plusStrandFile,
suffixPairs=['fwd.bw,rev.bw', 'plus.bw,minus.bw', 'pl.bw,mn.bw', 'p.bw,m.bw']):
"""Converts the + strand filename to the - strand filename.

plusStrandFilename: name of the + strand file
suffixPairs: these define the + and - filename pairing.
For each A,B pair, if plusStrandFile ends with A, then minusStrandFile is
assumed to end with B.
Returns: name of the - strand file, or None if the + strand file's
name didn't match any of the known + strand file's suffixes.
"""
# loop through + suffix, until a matching one is found
for suffixPair in suffixPairs:
(plusSuffix, minusSuffix) = suffixPair.split(',')
# try replacing the suffix (at the end of the filename)
minusStrandFile = re.sub(re.escape(plusSuffix) + '$', minusSuffix,
plusStrandFile)
# if this is different, then the replacement happened
if minusStrandFile != plusStrandFile:
return minusStrandFile
# at this point, none of the suffixes matched
return None

def openBigWigGuessingStrandedness(bigWigFile,
suffixPairs=['fwd.bw,rev.bw', 'plus.bw,minus.bw', 'pl.bw,mn.bw', 'p.bw,m.bw']):
"""Opens a bigWig file/files, guessing whether they're stranded.

bigWigFile: name of the file (or the + strand file)
suffixPairs: these define the and - filename pairing
(as elsehwere)
Returns: an object for reading from a bigWig file, either stranded
(if bigWigFile ends with one of the suffixes), or not
(if it doesn't).
"""
# see if bigWigFile "looks" stranded
minusStrandFile = getMinusStrandFile(bigWigFile, suffixPairs)
if minusStrandFile:
# if so, assume it's a stranded pair
return strandedPyBigWig(bigWigFile, minusStrandFile=minusStrandFile)
else:
# otherwise, assume it's unstranded
return pyBigWig1(bigWigFile, suffixPairs)
29 changes: 15 additions & 14 deletions galaxy/wrapper/multiBigwigSummary.xml
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
<tool id="deeptools_multi_bigwig_summary" name="multiBigwigSummary" version="@WRAPPER_VERSION@.0">
<tool id="deeptools_multi_bigwig_summary" name="multiBigwigSummary" version="@WRAPPER_VERSION@.1">
<description>calculates average scores for a list of two or more bigwig files</description>
<macros>
<token name="@BINARY@">multiBigwigSummary</token>
<import>deepTools_macros.xml</import>
</macros>
<expand macro="requirements" />
<expand macro="requirements" />
<command>
<![CDATA[
#set files=[]
#set labels=[]

@multiple_input_bigwigs@

@BINARY@
$mode.modeOpt

Expand All @@ -20,8 +19,10 @@
--outFileName $outFile

--bwfiles #echo ' '.join($files)#
--labels #echo ' '.join($labels)#


#if $custom_sample_labels_conditional.custom_labels_select == 'Yes'
--labels #echo ' '.join($custom_sample_labels_conditional.labels)#
#end if
#if $outRawCounts:
--outRawCounts '$outFileRawCounts'
#end if
Expand All @@ -43,7 +44,6 @@
#end if
]]>
</command>

<inputs>
<expand macro="multiple_input_bigwigs" MIN="2" LABEL="Bigwig files" TITLE="BigWig files"/>
<expand macro="custom_sample_labels" />
Expand All @@ -70,7 +70,6 @@
help="Correlation is computed for the number of reads that overlap such regions."/>
</when>
</conditional>

<expand macro="region_limit_operation" />
<param argument="--outRawCounts" type="boolean" label="Save raw counts (scores) to file" help=""/>

Expand All @@ -91,18 +90,20 @@
<param name="bigwigfiles" value="test.bw,test.bw" ftype="bigwig" />
<param name="modeOpt" value="bins" />
<param name="binSize" value="10" />
<param name="corMethod" value="spearman" />
<output name="outFile" file="multiBigwigSummary_result1.npz" ftype="deeptools_coverage_matrix" compare="sim_size" />
</test>
<!--test>
<test>
<param name="bigwigfiles" value="test.bw,test.bw" ftype="bigwig" />
<param name="modeOpt" value="BED-file" />
<param name="region_file" value="multiBamSummary_regions.bed" />
<param name="corMethod" value="pearson" />
<param name="modeOpt" value="bins" />
<param name="binSize" value="10" />
<param name="outRawCounts" value="True" />
<conditional name="custom_sample_labels_conditional">
<param name="custom_labels_select" value="Yes"/>
<param name="labels" value="sample1 sample2"/>
</conditional>
<output name="outFileRawCounts" file="multiBigwigSummary_result2.tabular" ftype="tabular" />
<output name="outFileName" file="multiBigwigSummary_result2.npz" ftype="deeptools_coverage_matrix" compare="sim_size" />
</test-->
<output name="outFile" file="multiBigwigSummary_result2.npz" ftype="deeptools_coverage_matrix" compare="sim_size" />
</test>
</tests>
<help>
<![CDATA[
Expand Down
Binary file modified galaxy/wrapper/test-data/multiBigwigSummary_result1.npz
Binary file not shown.
Loading