Skip to content

Commit

Permalink
docs: add docstrings to functions of TrueConsense.ORFs
Browse files Browse the repository at this point in the history
  • Loading branch information
florianzwagemaker authored and KHajji committed Apr 14, 2022
1 parent 650a5d1 commit b52a750
Showing 1 changed file with 84 additions and 0 deletions.
84 changes: 84 additions & 0 deletions TrueConsense/ORFs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
def in_orf(loc, gffd):
"""If the location is in any of the ORFs, return True. Otherwise, return False
Parameters
----------
loc
the current position
gffd
a dictionary of dictionaries, where the keys are the gene IDs, and the values are dictionaries containing the gene attributes
Returns
-------
A list of True/False values.
"""
exists = []
for k in gffd.keys():
start = gffd[k].get("start")
Expand All @@ -13,10 +27,36 @@ def in_orf(loc, gffd):


def split_to_codons(seq):
"""It takes a string of DNA and returns a list of codons (nucleotide triplets)
Parameters
----------
seq
the sequence to split
Returns
-------
A list of codons
"""
return [seq[start : start + 3] for start in range(0, len(seq), 3)]


def SolveTripletLength(uds, mds):
"""Check wether the combination of the uds (upcoming stretch of deletions) and the mds (group of minority deletions) is divisible by 3. If it is, return the length of the triplet. If it is not, return None.
Parameters
----------
uds
upcoming stretch of dels
mds
the minority-del group
Returns
-------
A boolean value.
"""
mdslen = len(mds)
udslen = len(uds)

Expand All @@ -38,6 +78,25 @@ def SolveTripletLength(uds, mds):


def CorrectStartPositions(gffd, shifts, p):
"""Function takes a dictionary of gff data, a list of shifts, and a position. It then iterates
through the dictionary and updates the start position of each entry if the start position is greater
than the position
Parameters
----------
gffd
a dictionary of dictionaries, where each key is a gene name, and each value is a dictionary of the
gene's attributes
shifts
the number of bases to shift the start positions by
p
the position of the first base of the insertion
Returns
-------
A dictionary with the updated start positions.
"""
for k in gffd.keys():
start = gffd[k].get("start")

Expand All @@ -50,6 +109,31 @@ def CorrectStartPositions(gffd, shifts, p):


def CorrectGFF(oldgffdict, newgffdict, cons, p, inserts, mincov, cov):
"""This function corrects the start and end positions of the genes in the new GFF file
Parameters
----------
oldgffdict
a dictionary of the old gff file
newgffdict
the new gff dictionary
cons
the consensus sequence
p
the position of the current base
inserts
a dictionary of insertions, where the key is the position of the insertion and the value is the
nucleotide inserted
mincov
minimum coverage to consider a position as a potential insertion
cov
coverage of the contig
Returns
-------
A dictionary of the corrected gff file.
"""

stopcodons = ["TAG", "TAA", "TGA"]
# rvstopcodon = ["CAT"]
Expand Down

0 comments on commit b52a750

Please sign in to comment.