Skip to content
This repository has been archived by the owner on Nov 7, 2021. It is now read-only.

small fixes/improvements #14

Merged
merged 6 commits into from
Jul 14, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion annot.nf
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,12 @@ if (params.run_exonerate) {
-join -type CDS -translate -retainids 1 > ref.pep
"""
}
exn_prot_chunk = ref_pep.splitFasta( by: 20)
exn_prot_chunk = ref_pep.splitFasta( by: 100)
exn_genome_chunk = pseudochr_seq_exonerate.splitFasta( by: 3)
process run_exonerate {
cache 'deep'
// this process can fail for rogue exonerate processes
errorStrategy 'ignore'

input:
set file('genome.fasta'), file('prot.fasta') from exn_genome_chunk.spread(exn_prot_chunk)
Expand Down
25 changes: 17 additions & 8 deletions bin/iproscan_gff3_merge.lua
Original file line number Diff line number Diff line change
Expand Up @@ -142,16 +142,25 @@ function annotate_vis:visit_feature(fn)
for k,v in pairs(hits) do
if not FILTERED_SOURCES[k] then
for _,n in ipairs(v) do
rng = aminoloc_to_dnaloc(fn, n:get_range(), n:get_strand())
new_node = gt.feature_node_new(fn:get_seqid(), "protein_match",
rng[1], rng[2], fn:get_strand())
new_node:set_source(k)
for attr, attrv in n:attribute_pairs() do
if not FILTERED_ATTRIBS[attr] then
new_node:set_attribute(attr, string.gsub(attrv, "\"",""))
local rng = aminoloc_to_dnaloc(fn, n:get_range(), n:get_strand())
if fn:get_range():contains(gt.range_new(rng[1],rng[2])) then
local new_node = gt.feature_node_new(fn:get_seqid(),
"protein_match",
rng[1], rng[2],
fn:get_strand())
new_node:set_source(k)
for attr, attrv in n:attribute_pairs() do
if not FILTERED_ATTRIBS[attr] then
new_node:set_attribute(attr, string.gsub(attrv, "\"",""))
end
end
fn:add_child(new_node)
else
io.stderr:write("coordinates for feature outside of parent: "
.. tostring(fn:get_range()) .. " vs. "
.. tostring(rng) .. " -- not attaching to "
.. "polypeptide parent")
end
fn:add_child(new_node)
end
end
end
Expand Down
6 changes: 4 additions & 2 deletions bin/sample_ref_genes.lua
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
math.randomseed(os.time())

function usage()
io.stderr:write("Randomly samples a number of protein coding gene CCs.\n")
io.stderr:write("Randomly samples a number of single transcript protein coding gene CCs.\n")
io.stderr:write(string.format("Usage: %s <GFF with gene annotations> " ..
"<number of genes to sample>\n" , arg[0]))
os.exit(1)
Expand All @@ -36,17 +36,19 @@ cv.out = nil
function cv:visit_feature(fn)
local gene = false
local mrna = false
local nof_transcripts = 0
local cds = false
for n in fn:get_children() do
if n:get_type() == "gene" then
gene = true
elseif n:get_type() == "mRNA" then
mrna = true
nof_transcripts = nof_transcripts + 1
elseif n:get_type() == "CDS" then
cds = true
end
end
if gene and mrna and cds then
if gene and mrna and nof_transcripts == 1 and cds then
self.out = fn
else
self.out = nil
Expand Down
4 changes: 4 additions & 0 deletions loc_sanger_farm.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,12 @@ process.memory = "2 GB"
process.clusterOptions = " -R 'select[mem>2048] rusage[mem=2048]' "
process.$contiguate_pseudochromosomes.memory = "8 GB"
process.$contiguate_pseudochromosomes.clusterOptions = " -R 'select[mem>8192] rusage[mem=8192]' "
process.$blast_for_circos.memory = "8 GB"
process.$blast_for_circos.clusterOptions = " -R 'select[mem>8192] rusage[mem=8192]' "
process.$run_ratt.memory = "8 GB"
process.$run_ratt.clusterOptions = " -R 'select[mem>8192] rusage[mem=8192]' "
process.$run_exonerate.memory = "8 GB"
process.$run_exonerate.clusterOptions = " -R 'select[mem>8192] rusage[mem=8192]' "
process.$run_augustus_pseudo.memory = "2 GB"
process.$run_augustus_pseudo.clusterOptions = " -R 'select[mem>2048] rusage[mem=2048]' "
process.$predict_ncRNA.memory = "8 GB"
Expand Down