v0.2.2

tleonardi · Feb 27, 2019 · c1c9b63 · c1c9b63
2 parents d26c28f + 58374d7
commit c1c9b63
Show file tree

Hide file tree

Showing 11 changed files with 208 additions and 42 deletions.
diff --git a/Changelog.md b/Changelog.md
@@ -1,5 +1,21 @@
 # Changelog
 
+## [0.2.2] - 2019/02/27
+
+### Added
+- Better documentation and examples of API methods
+
+### Fixed
+- Fixed shadowing of built-in `all` function by translateChr()
+
+## [0.2.1] - 2019/02/25
+
+### Added
+- Added support for strandedness in tx2genome()
+
+### Fixed
+- Fixed bug in tx2genome() strand handling
+
 ## [0.2.0] - 2019/01/19
 
 ### Fixed

diff --git a/README.md b/README.md
@@ -1,33 +1,45 @@
 [![Build Status](https://travis-ci.org/tleonardi/bedparse.svg?branch=master)](https://travis-ci.org/tleonardi/bedparse)
-[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-
+[![Docs Status](https://readthedocs.org/projects/bedparse/badge/?version=master&style=flat)](https://bedparse.readthedocs.io/en/master/)
+[![JOSS Status](http://joss.theoj.org/papers/22763a3b37fde13e548e884edd3221fa/status.svg)](http://joss.theoj.org/papers/22763a3b37fde13e548e884edd3221fa)
+[![License: MIT](https://img.shields.io/badge/License-MIT-brightgreen.svg)](https://opensource.org/licenses/MIT)
+
 # Bedparse
 
 ![](docs/bedparse.svg)
 
 Bedparse is a simple python module and CLI tool to perform common operations on BED files.
 
-It offers the following functionality:
-* Filtering of transcripts based on annotations
-* Joining of annotation files based on transcript names
-* Conversion from GTF to BED format
-* Conversion from UCSC to Ensembl chromosome names (and viceversa)
-* Conversion from bed12 to bed6
-* Promoter reporting
-* Intron reporting
-* CDS reporting
-* UTR reporting 
+It offers 11 sub-commands that implement the following functionality:
+* `filter`: Filtering of transcripts based on annotations
+* `join`: Joining of annotation files based on transcript names
+* `gtf2bed`: Conversion from GTF to BED format
+* `convertChr`: Conversion from UCSC to Ensembl chromosome names (and viceversa)
+* `bed12tobed6`: Conversion from bed12 to bed6
+* `promoter`: Promoter reporting
+* `introns`: Intron reporting
+* `cds`: CDS reporting
+* `3pUTR` and `5pUTR`: UTR reporting 
+* `validateFormat`: Check that the file conforms with the BED format
 
 ## Installation
 
+Installing is as simple as:
+
 ```
 pip install bedparse
 ```
 
+## Basic usage
+
+The basic syntax in the form: `bedparse subcommand [parameters]`.
+
+For a list of all subcommands and a brief explanation of what they do, use: `bedparse --help`.
+
+For a detailed explanation of each subcommand and a list of its parameters, use the `--help` option after the subcommand's name, e.g.: `bedparse promoter --help`
+
 ## Documentation
 
 Our documentation is hosted on [Read the Docs](https://bedparse.readthedocs.io/en/latest/).
 
 We also have a short [tutorial](https://bedparse.readthedocs.io/en/latest/Tutorial.html) to guide you through the basic functions.
 
-
diff --git a/bedparse/bedline.py b/bedparse/bedline.py
@@ -3,9 +3,14 @@
 from bedparse import chrnames
 
 class bedline(object):
-    """An object to represent a BED 12 line"""
-    fields = ("chr", "start", "end", "name", "score", "strand", "cdsStart", "cdsEnd", "color", "nEx", "exLengths", "exStarts")
+    """The bedline class defines an object that represents a single BED[3,4,6,12] line
+    """
+    __fields = ("chr", "start", "end", "name", "score", "strand", "cdsStart", "cdsEnd", "color", "nEx", "exLengths", "exStarts")
     def __init__(self, line=None):
+        """
+        :param line: List where each element corresponds to one field of a BED file
+        :type line: list
+        """
         if(line is None):
             return None
         elif(type(line) is not list):
@@ -18,7 +23,7 @@ def __init__(self, line=None):
 
         self.bedType=len(line)
         for n in range(self.bedType):
-           self.__dict__[self.fields[n]] = line[n]
+           self.__dict__[self.__fields[n]] = line[n]
 
         # If the file format is bed3 set the name to "NoName"
         if(self.bedType<4):
@@ -88,29 +93,48 @@ def __init__(self, line=None):
 
     def __str__(self):
         out=[]
-        for key in self.fields[:self.bedType]:
+        for key in self.__fields[:self.bedType]:
             out.append(self.__dict__[key])
         return str(out)
 
     def print(self, end='\n'):
+        """Prints a bedline object
+
+        :param end: Line terminator character
+        """
         out=[]
-        for key in self.fields[:self.bedType]:
+        for key in self.__fields[:self.bedType]:
             out.append(self.__dict__[key])
         return print(*out, sep="\t", end=end)
 
     def pprint(self):
+        """Prints a bedline object formatted as a python list
+        """
         import pprint
         pp = pprint.PrettyPrinter(indent=4)
         out=[]
-        for key in self.fields[:self.bedType]:
+        for key in self.__fields[:self.bedType]:
             out.append(self.__dict__[key])
         return pp.pprint(out)
 
     def __eq__(self, other):
         return self.__dict__ == other.__dict__
 
-    def promoter(self, up=500, down=500, strand=1):
-        """ Returns a bedline of the promoters"""
+    def promoter(self, up=500, down=500, strand=True):
+        """ Returns the promoter of a bedline object
+
+        Args:
+            up (int): Number of upstream bases
+            down (int): Number of donwstream bases
+            strand (bool): If false strandedness is ignored 
+        Returns:
+            bedline: The promoter as a bedline object
+        Examples:
+	    >>> bl = bedline(['chr1', 1000, 2000, 'Tx1', '0', '+'])
+            >>> print(bl.promoter())
+            ['chr1', 500, 1500, 'Tx1']
+        """
+
         if strand and self.bedType<6:
             raise BEDexception("You requested stranded promoters, but the BED file appears to be unstranded")
         if not strand or self.strand=="+":
@@ -124,7 +148,18 @@ def promoter(self, up=500, down=500, strand=1):
         return bedline([self.chr, start, end, self.name])
 
     def utr(self, which=None):
-        """ Returns the 5p UTR of coding transcripts (i.e. those with a CDS) """
+        """ Returns the UTR of coding transcripts (i.e. those with a CDS) 
+        
+	Args:
+	    which (int): Which UTR to return: 3 for 3'UTR or 5 for 5' UTR
+        Returns:
+	    bedline: The UTR as a bedline object
+	Examples:
+	    >>> bl = bedline(["chr1", 100, 500, "Tx1", 0, "+", 200, 300, ".", 1, "400,", "0,"])
+            >>> print(bl.utr(which=5))
+            ['chr1', 100, 200, 'Tx1', 0, '+', 100, 100, '.', 1, '100,', '0,']
+
+        """
         if(not self.stranded):
             raise BEDexception("UTRs for an unstranded transcript make little sense: "+self.name)
         if(which!=5 and which!=3):
@@ -221,7 +256,17 @@ def utr(self, which=None):
 
 
     def cds(self, ignoreCDSonly=False):
-        """Return the CDS of a coding transcript. Transcripts that are only CDS are NOT reported."""
+        """Return the CDS of a coding transcript. Transcripts without CDS are not reported
+	
+	Args:
+            ignoreCDSonly (bool): If True return None when the entire transcript is CDS 
+	Returns:
+	    bedline: The CDS as a bedline object
+	Examples:
+	    >>> bl = bedline(["chr1", 100, 500, "Tx1", 0, "+", 200, 300, ".", 1, "400,", "0,"])
+            >>> print(bl.cds())
+            ['chr1', 200, 300, 'Tx1', 0, '+', 200, 300, '.', 1, '100,', '0,']
+        """
         if(not self.stranded):
             raise BEDexception("CDS for an unstranded transcript makes little sense: "+self.name)
 
@@ -278,7 +323,18 @@ def cds(self, ignoreCDSonly=False):
         return result
 
     def introns(self):
-        """ Returns the introns of a transcript """
+        """ Returns a bedline object of the introns of a transcript
+        
+        Returns:
+	    bedline: The introns of the transcripts as a bedline object
+	Examples:
+	    >>> bl = bedline(["chr1", 100, 420, "Name", 0, "+", 210, 310, ".", 4, "20,20,20,20,", "0,100,200,300,"])
+            >>> print(bl.introns())
+            ['chr1', 120, 400, 'Name', 0, '+', 120, 120, '.', 3, '80,80,80,', '0,100,200,']
+            >>> bl = bedline(["chr1", 100, 420, "Name", 0, "-", 210, 310, ".", 1, "320,", "0,"])
+            >>> print(bl.introns())
+            None
+        """
         if(self.bedType<12 or self.nEx<2): return None
 
         exStarts=self.exStarts.split(',')
@@ -301,7 +357,20 @@ def introns(self):
     def tx2genome(self, coord, stranded=False):
         """ Given a position in transcript coordinates returns the equivalent in genome coordinates.
             The transcript coordinates are considered without regard to strand, i.e. 0 is the leftmost
-            position for both + and - strand transcripts."""
+            position for both + and - strand transcripts, unless the stranded options is set to True.
+
+            Args:
+                coord (int): Coordinate to convert from transcript-space to genome space
+                stranded (bool): If True use the rightmost base of negative strand trascripts as 0
+            Returns:
+		int: Coordinate in genome-space
+	    Examples:
+		>>> bl = bedline(['chr1', 1000, 2000, 'Tx1', '0', '-'])
+                >>> bl.tx2genome(10)
+                1010
+                >>> bl.tx2genome(10, stranded=True)
+                1989
+            """
 
         if not isinstance(coord, int):
             raise BEDexception("coord must be of type integer")
@@ -343,7 +412,22 @@ def tx2genome(self, coord, stranded=False):
 
 
     def bed12tobed6(self, appendExN=False, whichExon="all"):
-        """ Returns a list of bedlines (bed6) corresponding to the exons."""
+        """ Returns a list of bedlines (bed6) corresponding to the exons.
+
+       	    Args:
+        	appendExN (bool): Appends the exon number to the transcript name
+        	whichExon (str): Which exon to return. One of ["all", "first", "last"]. First and last respectively report the first or last exon relative to the TSS (i.e. taking strand into account).
+            Returns:
+		list: list of bedline objects, one per exon
+	    Examples:
+		>>> bl = bedline(["chr1", 100, 420, "Name", 0, "+", 210, 310, ".", 4, "20,20,20,20,", "0,100,200,300,"])
+                >>> for i in bl.bed12tobed6(appendExN=True): print(i)
+                ... 
+                ['chr1', 100, 120, 'Name_Exon001', 0, '+']
+                ['chr1', 200, 220, 'Name_Exon002', 0, '+']
+                ['chr1', 300, 320, 'Name_Exon003', 0, '+']
+                ['chr1', 400, 420, 'Name_Exon004', 0, '+']
+        """
         if(self.bedType!=12): raise BEDexception("Only BED12 lines can be coverted to BED6")
         if whichExon not in ("all", "first", "last"):
             raise BEDexception("whichExon has to be one of [all, first, last]")
@@ -371,14 +455,31 @@ def bed12tobed6(self, appendExN=False, whichExon="all"):
             elif self.strand == "-":
                 return([exons[0]])
 
-    def translateChr(self, assembly, target, suppress=False, all=False, patches=False):
-        """ Convert the chromosome name to Ensembl or UCSC """
+    def translateChr(self, assembly, target, suppress=False, ignore=False, patches=False):
+        """ Convert the chromosome name to Ensembl or UCSC 
+
+	    Args:
+                assembly (str): Assembly of the BED file (either hg38 or mm10).
+                target (str): Desidered chromosome name convention (ucsc or ens).
+                suppress (bool): When a chromosome name can't be matched between USCS and Ensembl set it to 'NA' (by default throws as error)
+                ignore (bool): When a chromosome name can't be matched between USCS and Ensembl do not report it in the output (by default throws an error)
+                patches (bool): Allows conversion of all patches up to p11 for hg38 and p4 for mm10. Without this option, if the BED file contains contigs added by a patch the conversion terminates with an error (unless the -a or -s flags are present
+            Returns:
+		bedline: A bedline object with the converted chromosome
+	    Examples:
+		>>> bl = bedline(['chr1', 1000, 2000, 'Tx1', '0', '-'])
+                >>> print(bl.translateChr(assembly="hg38", target="ens"))
+                ['1', 1000, 2000, 'Tx1', '0', '-']
+                >>> bl = bedline(['chr19_GL000209v2_alt', 1000, 2000, 'Tx1', '0', '-'])
+                >>> print(bl.translateChr(assembly="hg38", target="ens"))
+                ['CHR_HSCHR19KIR_RP5_B_HAP_CTG3_1', 1000, 2000, 'Tx1', '0', '-']
+        """
 
         if(assembly not in ("hg38", "mm10")):
             raise BEDexception("The specified assembly is not supported")
         if(target not in ("ucsc", "ens")):
             raise BEDexception("The specified target naming convention is not supported")
-        if(all and suppress):
+        if(ignore and suppress):
             raise BEDexception("Only one of allowMissing and suppressMissing is allowed")
 
         if(assembly=="hg38" and target=="ucsc"):
@@ -400,7 +501,7 @@ def translateChr(self, assembly, target, suppress=False, all=False, patches=Fals
 
         if(self.chr in convDict.keys()):
                 self.chr=convDict[self.chr]
-        elif(all):
+        elif(ignore):
             self.chr="NA"
         elif(suppress):
             return None

diff --git a/bedparse/bedparse.py b/bedparse/bedparse.py
@@ -118,7 +118,7 @@ def join(args):
 def convertChr(args):
     with args.bedfile as tsvfile:
         for line in tsvfile:
-            translatedLine=bedline(line.split('\t')).translateChr(assembly=args.assembly, target=args.target, suppress=args.suppressMissing, all=args.allowMissing, patches=args.patches)
+            translatedLine=bedline(line.split('\t')).translateChr(assembly=args.assembly, target=args.target, suppress=args.suppressMissing, ignore=args.allowMissing, patches=args.patches)
             if(translatedLine):
                translatedLine.print()
     tsvfile.close()

diff --git a/bedparse/converters.py b/bedparse/converters.py
@@ -82,7 +82,7 @@ def gtf2bed(gtf, extra=[''], filterKey="transcript_biotype", filterType=['']):
 	# Convert to bedline for format validation
         out=list()
         bed = bedline(transcripts[transcript])
-        for key in bed.fields[:bed.bedType]:
+        for key in bed._bedline__fields[:bed.bedType]:
             out.append(bed.__dict__[key])
         if(extra!=['']):
             for field in extra:

diff --git a/docs/Tutorial.md b/docs/Tutorial.md
@@ -124,3 +124,32 @@ chr1    943907  944575  ENST00000342066.7_Exon014       0       +
 ```
 
 The optional flag --appendExN adds ExonNNN to the end of each transcript name.
+
+## APIs
+
+Bedparse can also be imported as a python module. The API documentation contains detailed information of the bedline class and its methods. The following is simple example of how to use it:
+
+```
+In [1]: from bedparse import bedline
+
+In [2]: l = bedline(['chr1', 1000, 2000, 'Tx1', '0', '+'])
+
+In [3]: prom = l.promoter()
+
+In [4]: prom.print()
+chr1    500     1500    Tx1
+
+In [5]: prom.pprint()
+['chr1', 500, 1500, 'Tx1']
+
+In [6]: ens_prom = prom.translateChr(assembly="hg38", target="ens")
+
+In [7]: ens_prom.print()
+1       500     1500    Tx1
+
+```
+
+
+
+
+
diff --git a/docs/Usage.md b/docs/Usage.md
@@ -30,11 +30,11 @@ optional arguments:
   --version, -v         show program's version number and exit
 ```
 
-The basic syntax in the form: `bedparse subcommand [parameters]`.
+The basic syntax in the form: `bedparse sub-command [parameters]`.
 
-For a list of all subcommands and a brief explanation of what they do, use: `bedparse --help`
+For a list of all sub-commands and a brief explanation of what they do, use: `bedparse --help`
 
-For a detailed explanation of each subcommand and a list of its paramters, use the `--help` options after the subcommand's name, e.g.: `bedparse promoter --help`
+For a detailed explanation of each subcommand and a list of its parameters, use the `--help` option after the subcommand's name, e.g.: `bedparse promoter --help`
 
 ---
 

diff --git a/docs/bedparse.bedline.rst b/docs/bedparse.bedline.rst
@@ -0,0 +1,7 @@
+bedparse.bedline module
+=======================
+
+.. automodule:: bedparse.bedline
+    :members:
+    :undoc-members:
+    :show-inheritance: