Reputation: 45
So I am supposed to devise a program that counts a DNA sequence as well as count the individual base pairs. Here's what I have thus far:
class dnaString (str):
def __new__(self,s):
return str.__new__(self,s.upper())
def length (self):
return (len(self))
def getATCG (self,num_A,num_T,num_C,num_G):
num_A = self.count("A")
num_T = self.count("T")
num_C = self.count ("C")
num_G = self.count ("G")
return ( (self.length(), num_A, num_T, num_G, num_C) )
def printnum_A (self):
print ("Adenine base content: {0}".format(self.count("A")))
dna = input("Enter a dna sequence: ")
x=dnaString(dna)
The program doesn't really do anything, and since I'm just starting out with python, I'm not sure how to fix this so it works. what else should I add? I know it's unfinished.
Upvotes: 1
Views: 7075
Reputation: 21
You can do this by using a dictionary to organize and retrieve your counts. For instance:
DNASeq = raw_input("Enter a DNA sequence: ")
SeqLength = len(DNASeq)
print 'Sequence Length:', SeqLength
BaseKey = list(set(DNASeq)) #creates a list from the unique characters in the DNASeq
Dict = {}
for char in BaseKey:
Dict[char] = DNASeq.count(char)
print Dict
Upvotes: -1
Reputation: 6421
Does this help? It works in Python 2.7.3 and 3.2.3 that I happen to have installed.
import itertools
import sys
def pairwise(iterable):
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
a, b = itertools.tee(iterable)
next(b, None)
if sys.version_info[0] > 2:
return zip(a,b)
return itertools.izip(a, b)
class DnaSequence():
Names = {
'A' : 'adenine',
'C' : 'cytosine',
'G' : 'guanine',
'T' : 'thymine'
}
Bases = Names.keys()
def __init__(self, seq):
self._string = seq
self.bases = { x:0 for x in DnaSequence.Bases }
self.pairs = { x+y:0 for x in DnaSequence.Bases
for y in DnaSequence.Bases }
for base in seq:
if base in self.bases:
self.bases[base] += 1
for x,y in pairwise(seq):
pair = x+y
if pair in self.pairs:
self.pairs[pair] += 1
def printCount(self, base):
if base in DnaSequence.Names:
print(DnaSequence.Names[base].capitalize() +
" base content: " + str(self.bases[base]))
else:
sys.stderr.write('No such base ("%s")\n' % base)
def __repr__(self):
return self._string
d = DnaSequence("CCTAGTGTTAGCTAGTCTAGGGAT")
for base in DnaSequence.Bases:
d.printCount(base)
# Further:
print(d)
print(d.bases)
print(d.pairs)
It's a complete example that counts the bases (A, C, G, T) and all occurrences of adjacent pairs (e.g. in ACCGTA, the pairs AC, CC, CG, GT, TA would all be 1, the other 11 possible combinations of the Cartesian product ACGT x ACGT would all be 0).
The counting method used here scans the string once in the constructor, rather than scanning it four times every time getATGC()
is called.
Upvotes: 0
Reputation: 56654
I think the class can be simplified a bit:
class DnaString(str):
def __new__(self, s):
return str.__new__(self, s.strip().upper())
def __init__(self, _):
self.num_A = self.count("A")
self.num_C = self.count("C")
self.num_G = self.count("G")
self.num_T = self.count("T")
def stats(self):
return len(self), self.num_A, self.num_C, self.num_G, self.num_T
then
dna = raw_input("Enter a dna sequence: ")
d = DnaString(dna)
print(d)
print(d.stats())
gives
Enter a dna sequence: ACGTACGTA
ACGTACGTA
(9, 3, 2, 2, 2)
Upvotes: 0
Reputation: 2333
I'm not sure what is the question, but as you are not calling the method ´printnum_A`, nothing is printing. If you call it like this, it works:
dna = input("Enter a dna sequence: ")
x=dnaString(dna)
x.printnum_A()
Update according to comments
It is not enough to declare the methods of a class, you need also to call then when you need them. Like here for printnum_T
:
class dnaString (str):
def __new__(self,s):
return str.__new__(self,s.upper())
def length (self):
return (len(self))
def getATCG (self,num_A,num_T,num_C,num_G):
num_A = self.count("A")
num_T = self.count("T")
num_C = self.count ("C")
num_G = self.count ("G")
return ( (self.length(), num_A, num_T, num_G, num_C) )
def printnum_A (self):
print ("Adenine base content: {0}".format(self.count("A")))
# here the method is declared
def printnum_T (self):
print ("Adenine base content: {0}".format(self.count("T")))
dna = input("Enter a dna sequence: ")
x=dnaString(dna)
x.printnum_A()
# Here I call my method on `x`
x.printnum_T()
Upvotes: 1