Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add chrom name to source #7

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions gff3toembl/EMBLWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,16 @@ def output_seq(self, seq):
sequence_string = self.converter.construct_sequence(seq)
return sequence_string

def output_source(self, sequence_length, organism, taxonid):
source_string = self.converter.source_template(sequence_length,organism, taxonid)
def output_source(self, sequence_length, organism, taxonid,sequence_name):
source_string = self.converter.source_template(sequence_length,organism, taxonid,sequence_name)
return source_string

def create_output_file(self, sequences, organism, taxonid, project, description, authors, title, publication, genome_type, classification, submitter_name, submitter_title, submitter_location):
i = 1
target = open(self.output_filename, 'w')
for seqid in sorted(sequences):
target.write(self.converter.populated_header(len(self.conv.seqs[seqid]), project, description, i, authors, title, publication, genome_type, classification, submitter_name, submitter_title, submitter_location ) )
target.write(self.output_source(len(self.conv.seqs[seqid]), organism, taxonid))
target.write(self.output_source(len(self.conv.seqs[seqid]), organism, taxonid,seqid ))
for feat in self.conv.feats[seqid]:
target.write(feat)
target.write(self.output_seq(self.conv.seqs[seqid]))
Expand Down
7 changes: 4 additions & 3 deletions gff3toembl/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def blank_header(self):
header = """\
ID XXX; XXX; %s; genomic DNA; STD; %s; %d BP.
XX
AC * _%s
AC * _%s
XX
PR Project:%s
XX
Expand Down Expand Up @@ -66,13 +66,14 @@ def populated_header(self,
header_with_values = header % (genome_type, classification, num_bp,project+str(num_bp)+str(contig_number), project, description, contig_number,authors,title,publication,submitter_name,submitter_title,submitter_location )
return header_with_values

def source_template(self, sequence_length = None, organism = None, taxon_id = None):
def source_template(self, sequence_length = None, organism = None, taxon_id = None, sequence_name = None):
source_template = """\
FT source 1..%d
FT /organism="%s"
FT /mol_type="genomic DNA"
FT /db_xref="taxon:%d"
""" % (sequence_length, organism,taxon_id)
FT /note="%s"
""" % (sequence_length, organism,taxon_id,sequence_name)
return source_template

def construct_sequence(self,sequence):
Expand Down
7 changes: 4 additions & 3 deletions gff3toembl/tests/convert_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_blank_header(self):
expected_header = """\
ID XXX; XXX; %s; genomic DNA; STD; %s; %d BP.
XX
AC * _%s
AC * _%s
XX
PR Project:%s
XX
Expand Down Expand Up @@ -65,7 +65,7 @@ def test_populate_header(self):
expected_populated_header = """\
ID XXX; XXX; circular; genomic DNA; STD; UNC; 1234 BP.
XX
AC * _PRJ123412341
AC * _PRJ123412341
XX
PR Project:PRJ1234
XX
Expand Down Expand Up @@ -102,11 +102,12 @@ def test_construct_sequence(self):

def test_source_template(self):
converter = convert.Convert()
assert converter.source_template(1234,"My organism", 5678) == """\
assert converter.source_template(1234,"My organism", 5678,"chromX") == """\
FT source 1..1234
FT /organism="My organism"
FT /mol_type="genomic DNA"
FT /db_xref="taxon:5678"
FT /note="chromX"
"""

def test_sequence_header(self):
Expand Down
30 changes: 20 additions & 10 deletions gff3toembl/tests/data/expected_large_annotation.embl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ID XXX; XXX; circular; genomic DNA; STD; PROK; 175120 BP.
XX
AC * _My project1751201
AC * _My project1751201
XX
PR Project:My project
XX
Expand Down Expand Up @@ -29,6 +29,7 @@ FT source 1..175120
FT /organism="Organism"
FT /mol_type="genomic DNA"
FT /db_xref="taxon:1234"
FT /note="ERS154949|SC|contig000003"
FT rRNA complement(170..1709)
FT /product="16S ribosomal RNA"
FT /inference="COORDINATES:profile:RNAmmer:1.2"
Expand Down Expand Up @@ -4682,7 +4683,7 @@ SQ Sequence 175120 BP; 55731 A; 31266 C; 25419 G; 62704 T; 0 other;
//
ID XXX; XXX; circular; genomic DNA; STD; PROK; 108420 BP.
XX
AC * _My project1084202
AC * _My project1084202
XX
PR Project:My project
XX
Expand Down Expand Up @@ -4711,6 +4712,7 @@ FT source 1..108420
FT /organism="Organism"
FT /mol_type="genomic DNA"
FT /db_xref="taxon:1234"
FT /note="ERS154949|SC|contig000004"
FT rRNA 2..108
FT /product="5S ribosomal RNA"
FT /inference="COORDINATES:profile:RNAmmer:1.2"
Expand Down Expand Up @@ -7672,7 +7674,7 @@ SQ Sequence 108420 BP; 39263 A; 15735 C; 20016 G; 33406 T; 0 other;
//
ID XXX; XXX; circular; genomic DNA; STD; PROK; 52142 BP.
XX
AC * _My project521423
AC * _My project521423
XX
PR Project:My project
XX
Expand Down Expand Up @@ -7701,6 +7703,7 @@ FT source 1..52142
FT /organism="Organism"
FT /mol_type="genomic DNA"
FT /db_xref="taxon:1234"
FT /note="ERS154949|SC|contig000005"
FT rRNA 1..91
FT /product="5S ribosomal RNA"
FT /inference="COORDINATES:profile:RNAmmer:1.2"
Expand Down Expand Up @@ -9192,7 +9195,7 @@ SQ Sequence 52142 BP; 18748 A; 7364 C; 8937 G; 17093 T; 0 other;
//
ID XXX; XXX; circular; genomic DNA; STD; PROK; 51716 BP.
XX
AC * _My project517164
AC * _My project517164
XX
PR Project:My project
XX
Expand Down Expand Up @@ -9221,6 +9224,7 @@ FT source 1..51716
FT /organism="Organism"
FT /mol_type="genomic DNA"
FT /db_xref="taxon:1234"
FT /note="ERS154949|SC|contig000006"
FT CDS complement(793..1389)
FT /product="Recombination protein RecR"
FT /inference="ab initio prediction:Prodigal:2.60"
Expand Down Expand Up @@ -10529,7 +10533,7 @@ SQ Sequence 51716 BP; 16183 A; 10270 C; 6898 G; 18365 T; 0 other;
//
ID XXX; XXX; circular; genomic DNA; STD; PROK; 39433 BP.
XX
AC * _My project394335
AC * _My project394335
XX
PR Project:My project
XX
Expand Down Expand Up @@ -10558,6 +10562,7 @@ FT source 1..39433
FT /organism="Organism"
FT /mol_type="genomic DNA"
FT /db_xref="taxon:1234"
FT /note="ERS154949|SC|contig000007"
FT rRNA 107..220
FT /product="5S ribosomal RNA"
FT /inference="COORDINATES:profile:RNAmmer:1.2"
Expand Down Expand Up @@ -11719,7 +11724,7 @@ SQ Sequence 39433 BP; 14273 A; 5313 C; 7870 G; 11977 T; 0 other;
//
ID XXX; XXX; circular; genomic DNA; STD; PROK; 21936 BP.
XX
AC * _My project219366
AC * _My project219366
XX
PR Project:My project
XX
Expand Down Expand Up @@ -11748,6 +11753,7 @@ FT source 1..21936
FT /organism="Organism"
FT /mol_type="genomic DNA"
FT /db_xref="taxon:1234"
FT /note="ERS154949|SC|contig000008"
FT CDS complement(1171..1659)
FT /product="hypothetical protein"
FT /inference="ab initio prediction:Prodigal:2.60"
Expand Down Expand Up @@ -12333,7 +12339,7 @@ SQ Sequence 21936 BP; 8100 A; 2662 C; 3389 G; 7784 T; 1 other;
//
ID XXX; XXX; circular; genomic DNA; STD; PROK; 4804 BP.
XX
AC * _My project48047
AC * _My project48047
XX
PR Project:My project
XX
Expand Down Expand Up @@ -12362,6 +12368,7 @@ FT source 1..4804
FT /organism="Organism"
FT /mol_type="genomic DNA"
FT /db_xref="taxon:1234"
FT /note="ERS154949|SC|contig000009"
FT CDS 628..1344
FT /product="GDP-3%2C6-dideoxy-L-galactose biosynthesis
FT protein"
Expand Down Expand Up @@ -12496,7 +12503,7 @@ SQ Sequence 4804 BP; 1694 A; 684 C; 780 G; 1645 T; 1 other;
//
ID XXX; XXX; circular; genomic DNA; STD; PROK; 969 BP.
XX
AC * _My project9698
AC * _My project9698
XX
PR Project:My project
XX
Expand Down Expand Up @@ -12525,6 +12532,7 @@ FT source 1..969
FT /organism="Organism"
FT /mol_type="genomic DNA"
FT /db_xref="taxon:1234"
FT /note="ERS154949|SC|contig000010"
FT CDS complement(118..294)
FT /product="hypothetical protein"
FT /inference="ab initio prediction:Prodigal:2.60"
Expand Down Expand Up @@ -12558,7 +12566,7 @@ SQ Sequence 969 BP; 336 A; 157 C; 116 G; 360 T; 0 other;
//
ID XXX; XXX; circular; genomic DNA; STD; PROK; 656 BP.
XX
AC * _My project6569
AC * _My project6569
XX
PR Project:My project
XX
Expand Down Expand Up @@ -12587,6 +12595,7 @@ FT source 1..656
FT /organism="Organism"
FT /mol_type="genomic DNA"
FT /db_xref="taxon:1234"
FT /note="ERS154949|SC|contig000011"
FT tRNA complement(370..446)
FT /product="tRNA-Ile(gat)"
FT /inference="COORDINATES:profile:Aragorn:1.2.34"
Expand All @@ -12607,7 +12616,7 @@ SQ Sequence 656 BP; 184 A; 154 C; 100 G; 218 T; 0 other;
//
ID XXX; XXX; circular; genomic DNA; STD; PROK; 465 BP.
XX
AC * _My project46510
AC * _My project46510
XX
PR Project:My project
XX
Expand Down Expand Up @@ -12636,6 +12645,7 @@ FT source 1..465
FT /organism="Organism"
FT /mol_type="genomic DNA"
FT /db_xref="taxon:1234"
FT /note="ERS154949|SC|contig000012"
SQ Sequence 465 BP; 139 A; 103 C; 64 G; 159 T; 0 other;
ccccaaagca tatcgtcgtt agtaacgtcc ttcatcggct tctagtgcca aggcatccac 60
cgtgcgccct taataactta atctatgttt ccaccatttt tataaatcaa acgttaacac 120
Expand Down
3 changes: 2 additions & 1 deletion gff3toembl/tests/data/expected_single_feature.embl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ID XXX; XXX; circular; genomic DNA; STD; PROK; 240 BP.
XX
AC * _My project2401
AC * _My project2401
XX
PR Project:My project
XX
Expand Down Expand Up @@ -29,6 +29,7 @@ FT source 1..240
FT /organism="Organism"
FT /mol_type="genomic DNA"
FT /db_xref="taxon:1234"
FT /note="ER123|SC|contig000003"
FT CDS complement(1..210)
FT /product="Peroxide stress regulator PerR%2C FUR family"
FT /inference="ab initio prediction:Prodigal:2.60"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ID XXX; XXX; circular; genomic DNA; STD; PROK; 240 BP.
XX
AC * _My project2401
AC * _My project2401
XX
PR Project:My project
XX
Expand Down Expand Up @@ -29,6 +29,7 @@ FT source 1..240
FT /organism="Organism"
FT /mol_type="genomic DNA"
FT /db_xref="taxon:1234"
FT /note="ER123|SC|contig000003"
FT CDS complement(1..210)
FT /product="Peroxide stress regulator PerR%2C FUR family"
FT /inference="ab initio prediction:Prodigal:2.60"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ID XXX; XXX; circular; genomic DNA; STD; PROK; 240 BP.
XX
AC * _My project2401
AC * _My project2401
XX
PR Project:My project
XX
Expand Down Expand Up @@ -29,6 +29,7 @@ FT source 1..240
FT /organism="Organism"
FT /mol_type="genomic DNA"
FT /db_xref="taxon:1234"
FT /note="ER123|SC|contig000003"
FT CDS complement(1..210)
FT /product="Peroxide stress regulator PerR%2C FUR family"
FT /inference="ab initio prediction:Prodigal:2.60"
Expand Down