rufaidulk
rufaidulk

Reputation: 374

python code for downloading images from image-net.org for haar cascade training

I have a python code for downloading images from "www.image-net.org" for haar cascade training. Basically it checks each image urls and download the images.

import urllib2  
import cv2
import numpy as np
import os
import urllib
import sys


reload(sys)
sys.setdefaultencoding('utf8')


def store_raw_images():

    pos_images_link = 'http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n04154340'   
    pos_image_urls = urllib2.urlopen(pos_images_link).read().decode()



    if not os.path.exists('pos'):
        os.makedirs('pos')

    pic_num = 1
    for i in pos_image_urls.split('\n'):
        try:
            print(i)
            urllib.urlretrieve(i, "pos/"+str(pic_num)+".jpg")
            img = cv2.imread("pos/"+str(pic_num)+".jpg",cv2.IMREAD_GRAYSCALE)
            # should be larger than samples / pos pic (so we can place our image on it)
            resized_image = cv2.resize(img, (100, 100))
            cv2.imwrite("pos/"+str(pic_num)+".jpg",resized_image)
            pic_num += 1

        except Exception as e:
                print(str(e))  
store_raw_images()

I copy paste the url link to download in "pos_images_link", but the code only checks the urls of 5 images then the code stops running with a message in the terminal:

"terminate called after throwing an instance of 'std::out_of_range'
  what():  basic_string::substr: __pos (which is 140) > this->size() (which is 0)"

, i am using opencv 3.1.0 and python 2.7.12

Upvotes: 3

Views: 1398

Answers (1)

caot
caot

Reputation: 3328

The follows worked in python 3 with opencv

from urllib.request import Request, urlretrieve
import cv2
import numpy as np
import os
import urllib
import sys

def store_raw_images():
    url = 'http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n04154340'
    request = urllib.request.Request(url)
    response = urllib.request.urlopen(request)
    urls = response.read().decode('utf-8')

    if not os.path.exists('pos'):
        os.makedirs('pos')

    pic_num = 1
    for i in urls.split('\n'):
        try:
            print(i)
            urlretrieve(i, "pos/"+str(pic_num)+".jpg")
            img = cv2.imread("pos/"+str(pic_num)+".jpg",cv2.IMREAD_GRAYSCALE)
            # should be larger than samples / pos pic (so we can place our image on it)
            resized_image = cv2.resize(img, (100, 100))
            cv2.imwrite("pos/"+str(pic_num)+".jpg",resized_image)
            pic_num += 1

        except Exception as e:
            print(str(e))  

store_raw_images()

Upvotes: 1

Related Questions