deeptools · adRn-s · Feb 3, 2023 · Feb 28, 2022 · Feb 28, 2022 · Feb 28, 2022
diff --git a/.azure-pipelines/test-template.yml b/.azure-pipelines/test-template.yml
@@ -1,15 +1,27 @@
-steps:
-- bash: conda create -n foo -q --yes -c conda-forge -c bioconda python=$(python.version) numpy scipy matplotlib==3.1.1 nose flake8 plotly pysam pyBigWig py2bit deeptoolsintervals
-  displayName: Installing dependencies
-- bash: |
-    source activate foo
-    python -m pip install . --no-deps --ignore-installed -vvv
-  displayName: Installing deeptools
-- bash: |
-    source activate foo
-    flake8 . --exclude=.venv,.build,build --ignore=E501,F403,E402,F999,F405,E722,W504,W605
-  displayName: flake8
-- bash: |
-    source activate foo
-    nosetests --with-doctest -sv deeptools
-  displayName: Test deepTools
+trigger:
+  branches:
+    include:
+    - '*'
+pr:
+  branches:
+    include:
+    - '*'
+jobs:
+- job: install_deeptools_run_tests
+  pool:
+    vmImage: 'ubuntu-latest'
+  steps:
+  - bash: conda create -n foo -q --yes -c conda-forge -c bioconda python=$(python.version) numpy scipy matplotlib==3.1.1 nose flake8 plotly pysam pyBigWig py2bit deeptoolsintervals
+    displayName: Installing dependencies
+  - bash: |
+      source activate foo
+      python -m pip install . --no-deps --ignore-installed -vvv
+    displayName: Installing deeptools
+  - bash: |
+      source activate foo
+      flake8 . --exclude=.venv,.build,build --ignore=E501,F403,E402,F999,F405,E722,W504,W605
+    displayName: flake8
+  - bash: |
+      source activate foo
+      nosetests --with-doctest -sv deeptools
+    displayName: Test deepTools
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -1,5 +1,7 @@
 name: Test
-on: [push]
+on: [push, pull_request]
+env:
+  GALAXY_BRANCH: release_22.05
 jobs:
   build-linux:
     name: Test on Linux
@@ -35,8 +37,8 @@ jobs:
 
         source activate foo
         nosetests --with-doctest -sv deeptools
-  planemo:
-    name: First planemo chunk
+  planemo_test:
+    name: Planemo test
     runs-on: ubuntu-latest
     needs: build-linux
     strategy:
@@ -48,4 +50,45 @@ jobs:
     - name: planemo
       run: |
         source activate foo
-        ./.planemo.sh ${{ matrix.chunk }}
+        conda update -c conda-forge -c bioconda samtools
+        ./.planemo.sh ${{ matrix.chunk }} ${{ env.GALAXY_BRANCH }}
+    - uses: actions/upload-artifact@v3
+      with:
+        name: 'Tool test output ${{ matrix.chunk }}'
+        path: upload
+
+  planemo_combine_outputs:
+    name: Combine chunked test results
+    needs: [build-linux, planemo_test]
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.7']
+    steps:
+    - uses: actions/download-artifact@v3
+      with:
+        path: artifacts
+    - uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Cache .cache/pip
+      uses: actions/cache@v3
+      id: cache-pip
+      with:
+        path: ~/.cache/pip
+        key: pip_cache_py_${{ matrix.python-version }}_gxy_${{ env.GALAXY_BRANCH }}
+    - name: Combine outputs
+      uses: galaxyproject/planemo-ci-action@v1
+      id: combine
+      with:
+        mode: combine
+        html-report: true
+    - uses: actions/upload-artifact@v3
+      with:
+        name: 'All tool test results'
+        path: upload
+    - name: Check outputs
+      uses: galaxyproject/planemo-ci-action@v1
+      id: check
+      with:
+        mode: check
diff --git a/.gitignore b/.gitignore
@@ -50,3 +50,6 @@ _sources
 #os X
 .DS_Store
 ._.DS_Store
+
+# Planemo
+tool_test*
diff --git a/.planemo.sh b/.planemo.sh
@@ -9,6 +9,7 @@ if [[ $1 == "1" ]] ; then
     galaxy/wrapper/bamCoverage.xml \
     galaxy/wrapper/bamPEFragmentSize.xml \
     galaxy/wrapper/bigwigCompare.xml \
+    galaxy/wrapper/bigwigAverage.xml \
     galaxy/wrapper/computeGCBias.xml"
 elif [[ $1 == "2" ]] ; then
     wrappers="galaxy/wrapper/computeMatrix.xml \
@@ -28,5 +29,6 @@ else
 fi
 
 planemo lint ${wrappers}
-planemo test --no_dependency_resolution --galaxy_branch release_20.09 --install_galaxy ${wrappers} 2>&1 | grep -v -e "^galaxy" | grep -v -e "^requests"
-test ${PIPESTATUS[0]} -eq 0
+planemo test --no_dependency_resolution --galaxy_branch $2 --install_galaxy ${wrappers} 2>&1
+mkdir upload
+mv tool_test_output* upload/
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -1,3 +1,6 @@
+3.6.0
+* Add bigwigAverage
+
 3.5.1
 * cmp usage is updated to fit the recent mpl updates.
 * The requirements.txt is updated.

diff --git a/bin/bigwigAverage b/bin/bigwigAverage
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+#-*- coding: utf-8 -*-
+
+import deeptools.misc
+from deeptools.bigwigAverage import main
+import sys
+
+if __name__ == "__main__":
+    args = None
+    if len(sys.argv) == 1:
+        args = ["--help"]
+    main(args)
diff --git a/bin/estimateScaleFactor b/bin/estimateScaleFactor
@@ -85,7 +85,7 @@ def parseArguments(args=None):
          args.ignoreForNormalization=[x.strip() for x in args.ignoreForNormalization.split(',')]
     else:
          args.ignoreForNormalization = []
-    return(args)
+    return args
 
 def main(args):
     """

diff --git a/deeptools/SES_scaleFactor.py b/deeptools/SES_scaleFactor.py
@@ -127,7 +127,7 @@ def estimateScaleFactor(bamFilesList, binLength, numberOfSamples,
     # Take a lower rank to move to a region with probably
     # less peaks and more background.
     maxIndex = int(maxIndex * 0.8)
-    while(maxIndex < len(p)):
+    while maxIndex < len(p):
         # in rare cases the maxIndex maps to a zero value.
         # In such cases, the next index is used until
         # a non zero value appears.

diff --git a/deeptools/bamHandler.py b/deeptools/bamHandler.py
@@ -77,7 +77,7 @@ def openBam(bamFile, returnStats=False, nThreads=1, minimalDecoding=True):
         sys.exit("The file '{}' does not have BAM or CRAM format ".format(bamFile))
 
     try:
-        assert(bam.check_index() is not False)
+        assert bam.check_index() is not False
     except:
         sys.exit("'{}' does not appear to have an index. You MUST index the file first!".format(bamFile))
 

diff --git a/deeptools/bigwigAverage.py b/deeptools/bigwigAverage.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import argparse  # to parse command line arguments
+import sys
+import multiprocessing
+import os
+import numpy as np
+from deeptools import parserCommon
+from deeptools import writeBedGraph_bam_and_bw
+import deeptools.deepBlue as db
+
+debug = 0
+
+
+def parse_arguments(args=None):
+    parentParser = parserCommon.getParentArgParse()
+    outputParser = parserCommon.output()
+    dbParser = parserCommon.deepBlueOptionalArgs()
+    parser = argparse.ArgumentParser(
+        parents=[parentParser, outputParser, dbParser],
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        description='This tool average multiple bigWig files based on the number '
+        'of mapped reads. To average the bigWig files, the genome is '
+        'partitioned into bins of equal size, then the scores '
+        'in each bigwig file are computed per bin.'
+        'These scores are averaged and scaleFactors can be applied before the average.')
+
+    # define the arguments
+    parser.add_argument('--bigwigs', '-b',
+                        metavar='Bigwig files',
+                        help='Bigwig files separated by space.',
+                        nargs='+',
+                        required=True)
+
+    parser.add_argument('--scaleFactors',
+                        help='Set this parameter to multipy the bigwig values '
+                        'by a constant. The format is '
+                        'scaleFactor1:scaleFactor2:scaleFactor3 etc. '
+                        'For example 0.7:1 to scale the first bigwig file '
+                        'by 0.7 while not scaling the second bigwig file',
+                        default=None,
+                        required=False)
+
+    parser.add_argument('--skipNonCoveredRegions', '--skipNAs',
+                        help='This parameter determines if non-covered regions (regions without a score) '
+                        'in the bigWig files should be skipped. The default is to treat those '
+                        'regions as having a value of zero. '
+                        'The decision to skip non-covered regions '
+                        'depends on the interpretation of the data. Non-covered regions '
+                        'in a bigWig file may represent repetitive regions that should '
+                        'be skipped. Alternatively, the interpretation of non-covered regions as '
+                        'zeros may be wrong and this option should be used ',
+                        action='store_true')
+
+    return parser
+
+
+def getType(fname):
+    """
+    Tries to determine if a file is a wiggle file from deepBlue or a bigWig file.
+    Returns 'wiggle' if the file name ends with .wig, otherwise 'bigwig'
+    """
+    if fname.endswith(".wig") or fname.endswith(".wiggle"):
+        return "wiggle"
+    elif fname.lower().endswith(".bedgraph") or fname.endswith(".bdg"):
+        return "bedgraph"
+    else:
+        return "bigwig"
+
+
+def average(tileCoverage, args):
+    r"""
+    The mapreduce method calls this function
+    for each tile. The parameters (args) are fixed
+    in the main method.
+
+    >>> funcArgs= {'scaleFactors': (1,1)}
+    >>> average([1, 2], funcArgs)
+    1.5
+    >>> funcArgs= {'scaleFactors': (1,0.5)}
+    >>> average([1, 2], funcArgs)
+    1.0
+    >>> funcArgs= {'scaleFactors': (1,0.5,0.1,0.2)}
+    >>> average([1, 2, 3, 12], funcArgs)
+    1.175
+    >>> average([1, 2, 3, np.nan], funcArgs)
+    nan
+    """
+
+    norm_values = [args['scaleFactors'][i] * cov for i, cov in enumerate(tileCoverage)]
+
+    return np.mean(norm_values)
+
+
+def main(args=None):
+    args = parse_arguments().parse_args(args)
+
+    nFiles = len(args.bigwigs)
+
+    if args.scaleFactors:
+        scaleFactors = [float(x) for x in args.scaleFactors.split(":")]
+        if len(scaleFactors) == 1:
+            scaleFactors = scaleFactors * nFiles
+        elif len(scaleFactors) != nFiles:
+            raise argparse.ArgumentTypeError(
+                "Format of scaleFactors is factor or factor1:factor2... as many as bigwig files. "
+                "There are {} bigwigs and {} factors."
+                "The value given ( {} ) is not valid".format(nFiles, len(scaleFactors), args.scaleFactors))
+    else:
+        scaleFactors = [1] * nFiles
+
+    # the average function is called and receives
+    # the function_args per each tile that is considered
+    FUNC = average
+    function_args = {'scaleFactors': scaleFactors}
+
+    # Preload deepBlue files, which need to then be deleted
+    deepBlueFiles = []
+    for idx, fname in enumerate(args.bigwigs):
+        if db.isDeepBlue(fname):
+            deepBlueFiles.append([fname, idx])
+    if len(deepBlueFiles) > 0:
+        sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles])))
+        foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey)
+        regs = db.makeChromTiles(foo)
+        for x in deepBlueFiles:
+            x.extend([args, regs])
+        if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1:
+            pool = multiprocessing.Pool(args.numberOfProcessors)
+            res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999)
+        else:
+            res = list(map(db.preloadWrapper, deepBlueFiles))
+
+        # substitute the file names with the temp files
+        for (ftuple, r) in zip(deepBlueFiles, res):
+            args.bigwigs[ftuple[1]] = r
+        deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles]
+        del regs
+
+    writeBedGraph_bam_and_bw.writeBedGraph(
+        [(b, getType(b)) for b in args.bigwigs],
+        args.outFileName, 0, FUNC,
+        function_args, tileSize=args.binSize, region=args.region,
+        blackListFileName=args.blackListFileName,
+        verbose=args.verbose,
+        numberOfProcessors=args.numberOfProcessors,
+        skipZeroOverZero=False,
+        format=args.outFileFormat,
+        smoothLength=False,
+        missingDataAsZero=not args.skipNonCoveredRegions,
+        extendPairedEnds=False)
+
+    # Clean up temporary bigWig files, if applicable
+    if not args.deepBlueKeepTemp:
+        for k, v in deepBlueFiles:
+            os.remove(args.bigwigs[v])
+    else:
+        for k, v in deepBlueFiles:
+            foo = args.bigwigs[v]
+            print("{} is stored in {}".format(k, foo))
diff --git a/deeptools/computeGCBias.py b/deeptools/computeGCBias.py
@@ -390,7 +390,7 @@ def tabulateGCcontent_worker(chromNameBam, start, end, stepSize,
         print("%s total time %.1f @ %s:%s-%s %s" % (multiprocessing.current_process().name,
                                                     (endTime - startTime), chromNameBit, start, end, stepSize))
 
-    return(subN_gc, subF_gc)
+    return subN_gc, subF_gc
 
 
 def tabulateGCcontent(fragmentLength, chrNameBitToBam, stepSize,

diff --git a/deeptools/computeMatrix.py b/deeptools/computeMatrix.py
@@ -366,7 +366,7 @@ def process_args(args=None):
                      "set to 0. Nothing to output. Maybe you want to "
                      "use the scale-regions mode?\n")
 
-    return(args)
+    return args
 
 
 def main(args=None):

diff --git a/deeptools/correctGCBias.py b/deeptools/correctGCBias.py
@@ -363,7 +363,7 @@ def writeCorrectedSam_worker(chrNameBam, chrNameBit, start, end,
         try:
             copies = matePairs[read.qname]['copies']
             gc = matePairs[read.qname]['gc']
-            del(matePairs[read.qname])
+            del matePairs[read.qname]
         except:
             # this exception happens when a mate is
             # not present. This could