Reputation: 1263
It seems zipfile.ZipFile
requires random access which is is not supported by urllib2
's returned "file-like" object.
I've tried wrapping it with io.BufferedRandom
but got:
AttributeError: addinfourl instance has no attribute 'seekable'
Upvotes: 1
Views: 298
Reputation: 1263
In absence of other responses I've settled down with a home-made solution below. It probably won't reduce memory footprint when reading zip files yet it might improve latency when zip
header is read first.
from io import BytesIO, SEEK_SET, SEEK_END
def _ceil_div(a, b):
return (a + b - 1) / b
def _align_up(a, b):
return _ceil_div(a, b) * b
class BufferedRandomReader:
"""Create random-access, read-only buffered stream adapter from a sequential
input stream which does not support random access (i.e., ```seek()```)
Example::
>>> stream = BufferedRandomReader(BytesIO('abc'))
>>> print stream.read(2)
ab
>>> stream.seek(0)
0L
>>> print stream.read()
abc
"""
def __init__(self, fin, chunk_size=512):
self._fin = fin
self._buf = BytesIO()
self._eof = False
self._chunk_size = chunk_size
def tell(self):
return self._buf.tell()
def read(self, n=-1):
"""Read at most ``n`` bytes from the file (less if the ```read``` hits
end-of-file before obtaining size bytes).
If ``n`` argument is negative or omitted, read all data until end of
file is reached. The bytes are returned as a string object. An empty
string is returned when end of file is encountered immediately.
"""
pos = self._buf.tell()
end = self._buf.seek(0, SEEK_END)
if n < 0:
if not self._eof:
self._buf.write(self._fin.read())
self._eof = True
else:
req = pos + n - end
if req > 0 and not self._eof: # need to grow
bcount = _align_up(req, self._chunk_size)
bytes = self._fin.read(bcount)
self._buf.write(bytes)
self._eof = len(bytes) < bcount
self._buf.seek(pos)
return self._buf.read(n)
def seek(self, offset, whence=SEEK_SET):
if whence == SEEK_END:
if not self._eof:
self._buf.seek(0, SEEK_END)
self._buf.write(self._fin.read())
self._eof = True
return self._buf.seek(offset, SEEK_END)
return self._buf.seek(offset, whence)
def close(self):
self._fin.close()
self._buf.close()
Usage example:
import urllib2
req = urllib2.urlopen('http://test/file.zip')
import zipfile
zf = zipfile.ZipFile(BufferedRandomReader(req), 'r')
...
Upvotes: 1