-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAAfunctionwithdataframe.py
124 lines (82 loc) · 3.29 KB
/
AAfunctionwithdataframe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from Bio import SeqIO
from Bio import AlignIO
from Bio import AlignIO
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
file = open("testresult.txt","w")
counterofstrains = 0
counterofstrains2 = 0
idlist = []
aalist = {}
def aminoacidchecker(inputfile):
#this defines a new function, named aminoacidchecker, which takes one input argumnent 'inputfile'
AAstring = ("GALMFWKQESPVICYHRNDT")
#This is a string of the 20 aminos acids
percentagedict = {}
#this is an example string of amino acids to test the code
#for record in inputfile:
#print(record)
#this prints the character for each present in the string of example AAs
aa_count = {}
totalcount = 0
#this creates an empty dictionary
for aminoacid in AAstring:
#for each character in the string of all amino acids
aa_count[aminoacid] = 0
#the amino acid count variable 20 amino acids is 0
for record in inputfile:
if record in aa_count:
aa_count[record] += 1
totalcount = totalcount + 1
else:
exit
#print aa_count[aa]
for character in AAstring:
print ("\n --- " + character + " ---")
print ("There are " + str(aa_count[character]) + " copies of the amino acid " + str(character) + ", out of a total of " + str(totalcount))
foo = aa_count[character]
#print (foo)
bar = totalcount
#print (bar)
percentageofAA = float(foo)/float(bar)
#percentageofAA = foo/bar
percentageofAA = percentageofAA
file.write("Proportion of " + character + " present is " + str(percentageofAA) + "\n")
#percentagedict.update = (character)[]
#alignment = AlignIO.read(open("6.phy"), "phylip")
#print("Alignment length %i" % alignment.get_alignment_length())
#
#
#for record in alignment:
# print(record.seq + " " + record.id)
# counterofstrains2 = counterofstrains2 + 1
# print ("\n\n\n-------------------------------- New strain " + str(counterofstrains2) + "---------------------------------\n")
#
# aminoacidchecker(record)
#
# beebop = record.id
#
# print ("\n -------------------------------- Strain ID " + beebop + " --------------------\n")
#
print ("\n\n\n ########################################## \n\n\n")
alignment = AlignIO.read(open("3927.phy"), "phylip")
print("Alignment length %i" % alignment.get_alignment_length())
for record in alignment:
#print(record.seq + " " + record.id)
counterofstrains = counterofstrains + 1
print ("\n\n\n-------------------------------- New strain " + str(counterofstrains) + "---------------------------------\n")
#aalistdata =
aminoacidchecker(record)
#print (aalistdata)
#aacharacter, aaprop = aminoacidchecker(record)
beebop = record.id
file.write ("\n -------------------------------- Strain ID " + beebop + " --------------------\n")
#idlist.append(beebop)
#aalist[beebop][aacharacter].append(aaprop)
df = pd.DataFrame(aalist)
#, columns = ['G','A','L','M','F','W','K','Q','E','S','P','V','I','C','Y','H','R','N','D','T'])
#df['Local ID'] = idlist
print (df)
#Comment below is code to convert a pandasdataframe to a csv file
#DataFrame.to_csv(path_or_buf=None, sep=', ', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, mode='w', encoding=None, compression=None, quoting=None, quotechar='"', line_terminator='\n', chunksize=None, tupleize_cols=None, date_format=None, doublequote=True, escapechar=None, decimal='.')