Skip to content

Commit

Permalink
Option to specify the GTF attribute to use for annotaiton.
Browse files Browse the repository at this point in the history
  • Loading branch information
lpantano committed Aug 10, 2016
1 parent d71bb94 commit 5cdb7b9
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 5 deletions.
2 changes: 2 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
- development

* Add --feature_id as an option to specify the attribute
to use in the GTF file for annotation
* Add gene_id as a 2nd option to add GTF annotation
* Only do rnafold for precursors shorter than 200nt

Expand Down
8 changes: 4 additions & 4 deletions seqcluster/libs/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
logger = mylog.getLogger("run")


def read_gtf_line(cols):
def read_gtf_line(cols, field="name"):
"""parse gtf line to get class/name information"""
try:
group = cols[2]
attrs = cols[8].split(";")
name = [attr.strip().split(" ")[1] for attr in attrs if attr.strip().split(" ")[0].lower().endswith("name")]
name = [attr.strip().split(" ")[1] for attr in attrs if attr.strip().split(" ")[0].lower().endswith(field)]
if not name:
name = [attr.strip().split(" ")[1] for attr in attrs if attr.strip().split(" ")[0].lower().endswith("gene_id")]

Expand Down Expand Up @@ -57,7 +57,7 @@ def _position_in_feature(pos_a, pos_b):
return lento5, lento3, strd


def anncluster(c, clus_obj, db, type_ann):
def anncluster(c, clus_obj, db, type_ann, feature_id="name"):
"""intersect transcription position with annotation files"""
id_sa, id_ea, id_id, id_idl, id_sta = 1, 2, 3, 4, 5
if type_ann == "bed":
Expand All @@ -72,7 +72,7 @@ def anncluster(c, clus_obj, db, type_ann):
logger.debug("Type:%s\n" % type_ann)
for cols in c.features():
if type_ann == "gtf":
cb, sb, eb, stb, db, tag = read_gtf_line(cols[6:])
cb, sb, eb, stb, db, tag = read_gtf_line(cols[6:], feature_id)
else:
sb = int(cols[id_sb])
eb = int(cols[id_eb])
Expand Down
2 changes: 2 additions & 0 deletions seqcluster/libs/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,8 @@ def add_subparser_cluster(subparsers):
dest="min_seqs", help="threshold to consider a cluster as valid", default=10)
parser.add_argument("--db",
help="prefix for sqlite3 database with results to use htmlViz plugin (in dev).")
parser.add_argument("--feature_id",
help="name in GTF to use to annotate clusters", default='name')
return parser


Expand Down
2 changes: 1 addition & 1 deletion seqcluster/make_clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def _annotate(args, setclus):
db = os.path.basename(filebed)
b = pybedtools.BedTool(filebed)
c = a.intersect(b, wo=True)
setclus = anncluster(c, setclus, db, args.type_ann)
setclus = anncluster(c, setclus, db, args.type_ann, args.feature_id)
return setclus


Expand Down

0 comments on commit 5cdb7b9

Please sign in to comment.