-
Notifications
You must be signed in to change notification settings - Fork 1
/
demo1.py
53 lines (47 loc) · 1.17 KB
/
demo1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 14 13:13:04 2017
@author: weishen
"""
#This script cacluates the GC content
#open the file and store it in the variable fh
fh=open("/Users/weishen/Documents/Courses/EEB5300/demo.fasta","r")
#store lines from fh into list called lines
lines=fh.readlines()
NumGenes=0
for l in lines:
#print l
if l[0] == ">":
NumGenes=NumGenes+1
print NumGenes
nucleotides=0
countA=0
countG=0
countC=0
countT=0
for l in lines:
if l[0] != ">":
for nuc in l:
if nuc=="A":
countA=countA+1
nucleotides=nucleotides+1
elif nuc=="G":
countG=countG+1
nucleotides=nucleotides+1
elif nuc=="C":
countC=countC+1
nucleotides=nucleotides+1
elif nuc=="T":
countT=countT+1
nucleotides=nucleotides+1
else:
print "it is a new line character"
print "A:", countA
print "G:", countG
print "T:", countC
print "C:", countT
print nucleotides
print countA+countG+countC+countT
GC=(countG+countC)/float(nucleotides)
print GC