Reputation: 513
How can I get dimensions of image without actually downloading it? Is it even possible? I have a list of urls of images and I want to assign width and size to it.
I know there is a way of doing it locally (How to check dimensions of all images in a directory using python?), but I don't want to download all the images.
Edit:
Following ed. suggestions, I edited the code. I came up with this code. Not sure weather it downloads whole file or just a part (as I wanted).
Upvotes: 26
Views: 34879
Reputation: 572
The shortest code I have come up with downloads only the first 1024 bytes. This can be set lower if you need it to but could give problems with some image types
from io import BytesIO
from urllib.request import urlopen
from PIL import Image
Image.MAX_IMAGE_PIXELS = None # My problem had really big images
def get_image_size_from_url(url):
response = urlopen(url)
r = response.read(1024)
img = Image.open(BytesIO(r))
return img.size
Upvotes: 0
Reputation: 65
To get the image size in bytes:
Only by getting the headers data from the website: (without downloading the image)
import requests
url = r"https://www.sulitest.org/files/source/Big%20image%20HD/elyx.png"
size = requests.get(url, stream = True).headers['Content-length']
print(size)
## output: 437495
## to see what other headers data you can get:
allheaders = requests.get(url, stream = True).headers
print(allheaders)
To get the image (Width, Height):
We have to download part of the image and let an image library read the image header and retrieve/parse the (Width, Height). here i'm using Pillow.
import requests
from PIL import ImageFile
resume_header = {'Range': 'bytes=0-2000000'} ## the amount of bytes you will download
data = requests.get(url, stream = True, headers = resume_header).content
p = ImageFile.Parser()
p.feed(data) ## feed the data to image parser to get photo info from data headers
if p.image:
print(p.image.size) ## get the image size (Width, Height)
## output: (1400, 1536)
Upvotes: 6
Reputation: 2246
This is just a Python 3+ adaptation of an earlier answer here.
from urllib import request as ulreq
from PIL import ImageFile
def getsizes(uri):
# get file size *and* image size (None if not known)
file = ulreq.urlopen(uri)
size = file.headers.get("content-length")
if size:
size = int(size)
p = ImageFile.Parser()
while True:
data = file.read(1024)
if not data:
break
p.feed(data)
if p.image:
return size, p.image.size
break
file.close()
return(size, None)
Upvotes: 16
Reputation: 665
import requests
from PIL import Image
from io import BytesIO
url = 'http://farm4.static.flickr.com/3488/4051378654_238ca94313.jpg'
img_data = requests.get(url).content
im = Image.open(BytesIO(img_data))
print (im.size)
Upvotes: -1
Reputation: 677
This is based on ed's answer mixed with other things I found on the web. I ran into the same issue as grotos with .read(24). Download getimageinfo.py from here and download ReSeekFile.py from here.
import urllib2
imgdata = urllib2.urlopen(href)
image_type,width,height = getimageinfo.getImageInfo(imgdata)
Modify getimageinfo as such...
import ReseekFile
def getImageInfo(datastream):
datastream = ReseekFile.ReseekFile(datastream)
data = str(datastream.read(30))
#Skipping to jpeg
# handle JPEGs
elif (size >= 2) and data.startswith('\377\330'):
content_type = 'image/jpeg'
datastream.seek(0)
datastream.read(2)
b = datastream.read(1)
try:
while (b and ord(b) != 0xDA):
while (ord(b) != 0xFF): b = datastream.read(1)
while (ord(b) == 0xFF): b = datastream.read(1)
if (ord(b) >= 0xC0 and ord(b) <= 0xC3):
datastream.read(3)
h, w = struct.unpack(">HH", datastream.read(4))
break
else:
datastream.read(int(struct.unpack(">H", datastream.read(2))[0])-2)
b = datastream.read(1)
width = int(w)
height = int(h)
except struct.error:
pass
except ValueError:
pass
Upvotes: 16
Reputation: 31
My fixed "getimageInfo.py", work with Python 3.4+, try it, just great!
import io
import struct
import urllib.request as urllib2
def getImageInfo(data):
data = data
size = len(data)
#print(size)
height = -1
width = -1
content_type = ''
# handle GIFs
if (size >= 10) and data[:6] in (b'GIF87a', b'GIF89a'):
# Check to see if content_type is correct
content_type = 'image/gif'
w, h = struct.unpack(b"<HH", data[6:10])
width = int(w)
height = int(h)
# See PNG 2. Edition spec (http://www.w3.org/TR/PNG/)
# Bytes 0-7 are below, 4-byte chunk length, then 'IHDR'
# and finally the 4-byte width, height
elif ((size >= 24) and data.startswith(b'\211PNG\r\n\032\n')
and (data[12:16] == b'IHDR')):
content_type = 'image/png'
w, h = struct.unpack(b">LL", data[16:24])
width = int(w)
height = int(h)
# Maybe this is for an older PNG version.
elif (size >= 16) and data.startswith(b'\211PNG\r\n\032\n'):
# Check to see if we have the right content type
content_type = 'image/png'
w, h = struct.unpack(b">LL", data[8:16])
width = int(w)
height = int(h)
# handle JPEGs
elif (size >= 2) and data.startswith(b'\377\330'):
content_type = 'image/jpeg'
jpeg = io.BytesIO(data)
jpeg.read(2)
b = jpeg.read(1)
try:
while (b and ord(b) != 0xDA):
while (ord(b) != 0xFF): b = jpeg.read(1)
while (ord(b) == 0xFF): b = jpeg.read(1)
if (ord(b) >= 0xC0 and ord(b) <= 0xC3):
jpeg.read(3)
h, w = struct.unpack(b">HH", jpeg.read(4))
break
else:
jpeg.read(int(struct.unpack(b">H", jpeg.read(2))[0])-2)
b = jpeg.read(1)
width = int(w)
height = int(h)
except struct.error:
pass
except ValueError:
pass
return content_type, width, height
#from PIL import Image
#import requests
#hrefs = ['http://farm4.staticflickr.com/3894/15008518202_b016d7d289_m.jpg','https://farm4.staticflickr.com/3920/15008465772_383e697089_m.jpg','https://farm4.staticflickr.com/3902/14985871946_86abb8c56f_m.jpg']
#RANGE = 5000
#for href in hrefs:
#req = requests.get(href,headers={'User-Agent':'Mozilla5.0(Google spider)','Range':'bytes=0-{}'.format(RANGE)})
#im = getImageInfo(req.content)
#print(im)
req = urllib2.Request("http://vn-sharing.net/forum/images/smilies/onion/ngai.gif", headers={"Range": "5000"})
r = urllib2.urlopen(req)
#f = open("D:\\Pictures\\1.jpg", "rb")
print(getImageInfo(r.read()))
# Output: >> ('image/gif', 50, 50)
#print(getImageInfo(f.read()))
Source code: http://code.google.com/p/bfg-pages/source/browse/trunk/pages/getimageinfo.py
Upvotes: 1
Reputation: 144
Since getimageinfo.py mentioned above doesn't work in Python3. Pillow is used instead of it.
Pillow can be found in pypi, or installed by using pip: pip install pillow
.
from io import BytesIO from PIL import Image import requests hrefs = ['https://farm4.staticflickr.com/3894/15008518202_b016d7d289_m.jpg','https://farm4.staticflickr.com/3920/15008465772_383e697089_m.jpg','https://farm4.staticflickr.com/3902/14985871946_86abb8c56f_m.jpg'] RANGE = 5000 for href in hrefs: req = requests.get(href,headers={'User-Agent':'Mozilla5.0(Google spider)','Range':'bytes=0-{}'.format(RANGE)}) im = Image.open(BytesIO(req.content)) print(im.size)
Upvotes: 7
Reputation: 2181
Unfortunately I can't comment, so this is as an answer:
Use a get query with the header
"Range": "bytes=0-30"
And then simply use
http://code.google.com/p/bfg-pages/source/browse/trunk/pages/getimageinfo.py
If you use python's "requests", it's simply
r = requests.get(image_url, headers={
"Range": "bytes=0-30"
})
image_info = get_image_info(r.content)
This fixes ed.'s answer and doesn't have any other dependencies (like ReSeekFile.py).
Upvotes: 1
Reputation: 13089
I found the solution on this site to work well:
import urllib
import ImageFile
def getsizes(uri):
# get file size *and* image size (None if not known)
file = urllib.urlopen(uri)
size = file.headers.get("content-length")
if size: size = int(size)
p = ImageFile.Parser()
while 1:
data = file.read(1024)
if not data:
break
p.feed(data)
if p.image:
return size, p.image.size
break
file.close()
return size, None
print getsizes("http://www.pythonware.com/images/small-yoyo.gif")
# (10965, (179, 188))
Upvotes: 25
Reputation: 1393
If you're willing to download the first 24 bytes of each file, then this function (mentioned in johnteslade's answer to the question you mention) will work out the dimensions.
That's probably the least downloading necessary to do the job you want.
import urllib2
start = urllib2.urlopen(image_url).read(24)
Edit (1):
In the case of jpeg files it seems to need more bytes. You could edit the function so that instead of reading a StringIO.StringIO(data) it instead reads the file handle from urlopen. Then it will read exactly as much of the image as it needs to find out the width and height.
Upvotes: 10
Reputation: 32716
It's not possible to do it directly, but there's a workaround for that. If the files are present on the server, then implement the API endpoint that takes image name as an argument and returns the size.
But if the files are on the different server, you've got no other way but to download the files.
Upvotes: 1