diff --git a/annot.nf b/annot.nf index bf05bed..9302d56 100644 --- a/annot.nf +++ b/annot.nf @@ -191,10 +191,12 @@ if (params.run_exonerate) { -join -type CDS -translate -retainids 1 > ref.pep """ } - exn_prot_chunk = ref_pep.splitFasta( by: 20) + exn_prot_chunk = ref_pep.splitFasta( by: 100) exn_genome_chunk = pseudochr_seq_exonerate.splitFasta( by: 3) process run_exonerate { cache 'deep' + // this process can fail for rogue exonerate processes + errorStrategy 'ignore' input: set file('genome.fasta'), file('prot.fasta') from exn_genome_chunk.spread(exn_prot_chunk) diff --git a/bin/iproscan_gff3_merge.lua b/bin/iproscan_gff3_merge.lua index 4230e44..a3cfb25 100755 --- a/bin/iproscan_gff3_merge.lua +++ b/bin/iproscan_gff3_merge.lua @@ -142,16 +142,25 @@ function annotate_vis:visit_feature(fn) for k,v in pairs(hits) do if not FILTERED_SOURCES[k] then for _,n in ipairs(v) do - rng = aminoloc_to_dnaloc(fn, n:get_range(), n:get_strand()) - new_node = gt.feature_node_new(fn:get_seqid(), "protein_match", - rng[1], rng[2], fn:get_strand()) - new_node:set_source(k) - for attr, attrv in n:attribute_pairs() do - if not FILTERED_ATTRIBS[attr] then - new_node:set_attribute(attr, string.gsub(attrv, "\"","")) + local rng = aminoloc_to_dnaloc(fn, n:get_range(), n:get_strand()) + if fn:get_range():contains(gt.range_new(rng[1],rng[2])) then + local new_node = gt.feature_node_new(fn:get_seqid(), + "protein_match", + rng[1], rng[2], + fn:get_strand()) + new_node:set_source(k) + for attr, attrv in n:attribute_pairs() do + if not FILTERED_ATTRIBS[attr] then + new_node:set_attribute(attr, string.gsub(attrv, "\"","")) + end end + fn:add_child(new_node) + else + io.stderr:write("coordinates for feature outside of parent: " + .. tostring(fn:get_range()) .. " vs. " + .. tostring(rng) .. " -- not attaching to " + .. "polypeptide parent") end - fn:add_child(new_node) end end end diff --git a/bin/sample_ref_genes.lua b/bin/sample_ref_genes.lua index a09cd9b..c189179 100755 --- a/bin/sample_ref_genes.lua +++ b/bin/sample_ref_genes.lua @@ -20,7 +20,7 @@ math.randomseed(os.time()) function usage() - io.stderr:write("Randomly samples a number of protein coding gene CCs.\n") + io.stderr:write("Randomly samples a number of single transcript protein coding gene CCs.\n") io.stderr:write(string.format("Usage: %s " .. "\n" , arg[0])) os.exit(1) @@ -36,17 +36,19 @@ cv.out = nil function cv:visit_feature(fn) local gene = false local mrna = false + local nof_transcripts = 0 local cds = false for n in fn:get_children() do if n:get_type() == "gene" then gene = true elseif n:get_type() == "mRNA" then mrna = true + nof_transcripts = nof_transcripts + 1 elseif n:get_type() == "CDS" then cds = true end end - if gene and mrna and cds then + if gene and mrna and nof_transcripts == 1 and cds then self.out = fn else self.out = nil diff --git a/loc_sanger_farm.config b/loc_sanger_farm.config index 99ce198..213784b 100644 --- a/loc_sanger_farm.config +++ b/loc_sanger_farm.config @@ -20,8 +20,12 @@ process.memory = "2 GB" process.clusterOptions = " -R 'select[mem>2048] rusage[mem=2048]' " process.$contiguate_pseudochromosomes.memory = "8 GB" process.$contiguate_pseudochromosomes.clusterOptions = " -R 'select[mem>8192] rusage[mem=8192]' " +process.$blast_for_circos.memory = "8 GB" +process.$blast_for_circos.clusterOptions = " -R 'select[mem>8192] rusage[mem=8192]' " process.$run_ratt.memory = "8 GB" process.$run_ratt.clusterOptions = " -R 'select[mem>8192] rusage[mem=8192]' " +process.$run_exonerate.memory = "8 GB" +process.$run_exonerate.clusterOptions = " -R 'select[mem>8192] rusage[mem=8192]' " process.$run_augustus_pseudo.memory = "2 GB" process.$run_augustus_pseudo.clusterOptions = " -R 'select[mem>2048] rusage[mem=2048]' " process.$predict_ncRNA.memory = "8 GB"