Reputation: 301
Hey what's wrong with my script? It responds bad request. I don't know what's wrong
from Bio import Entrez
Entrez.email = '[email protected]'
import time
def fetch(ID):
handle = Entrez.efetch(db = 'Protein', id = ID, retmode = 'fasta', rettype = 'text') #<--- here
seq = handle.read()
time.sleep(1)
return seq
ids = ['ATK1','Cat','Lig1']
out = [fetch(id) for id in ids]
with open('out.fasta', 'w') as f:
f.writelines(out)
trackback:
File "<ipython-input-42-0be173f176eb>", line 1, in <module>
runfile('C:/Users/MGrad/bioPythonSearch.py', wdir='C:/Users/MGrad/Dropbox/Leg')
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\site-packages\spyder\utils\site\sitecustomize.py", line 880, in runfile
execfile(filename, namespace)
File "C:\Users\Local\conda\conda\envs\my_root\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/Leg/bioPythonSearch.py", line 20, in <module>
out = [fetch(id) for id in ids] # where ids is a Python list containing gene ids/accession numbers
File "C:/Users/MGrad/bioPythonSearch.py", line 20, in <listcomp>
out = [fetch(id) for id in ids] # where ids is a Python list containing gene ids/accession numbers
File "C:/Users/MGrad/bioPythonSearch.py", line 14, in fetch
handle = Entrez.efetch(db = 'Protein', id = ID, retmode = 'fasta', rettype = 'text')
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\site-packages\Bio\Entrez\__init__.py", line 180, in efetch
return _open(cgi, variables, post=post)
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\site-packages\Bio\Entrez\__init__.py", line 526, in _open
raise exception
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\site-packages\Bio\Entrez\__init__.py", line 524, in _open
handle = _urlopen(cgi)
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\urllib\request.py", line 570, in error
return self._call_chain(*args)
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\Users\MGrad\AppData\Local\conda\conda\envs\my_root\lib\urllib\request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
HTTPError: Bad Request
Upvotes: 1
Views: 729
Reputation: 41872
Entrez.efetch()
works with exact ID numbers. If you want to lookup terms like ATK1, you need to go through Entrez.esearch()
first to resolve that into one or more ID numbers. Here's a simplistic, but working, example:
import time
from Bio import SeqIO
from Bio import Entrez
import xml.etree.cElementTree as ElementTree
TERMS = ['ATK1', 'Cat', 'Lig1']
Entrez.email = '[email protected]'
def fetch(term):
# retmax=1 just returns first result of possibly many; i.e. may be wrong, use more specific ID
handle = Entrez.esearch(db='Protein', term=term, retmax=1)
root = ElementTree.fromstring(handle.read())
id_number = root.find("IdList/Id").text
print(term, '->', id_number) # ATK1 -> 1039008188
handle = Entrez.efetch(db='Protein', id=id_number, retmode='text', rettype='fasta')
seq_record = SeqIO.read(handle, 'fasta')
time.sleep(1)
return seq_record
out = [fetch(my_term) for my_term in TERMS]
with open('out.fasta', 'w') as f:
for record in out:
SeqIO.write(record, f, 'fasta')
The Entrez.esearch()
results come back as an XML document so we use cElementTree
to parse it. There are multiple results to this query but we naively just asked for one -- you'll need to work this out by examining the multiple results or providing more specific terms.
Also, your code inverted the values for retmode
and rettype
.
Upvotes: 2