Reputation: 622
Hu guys,
I'm new to python/anaconda/jupyter/numPy, panda, etc.... so please excuse me if it's a really stupid question. I'm trying to obtain MNIST database by using anaconda/jupyter. But everytime I get an HTTP error 500 at the end. Is it really a server problem (as 500 would suggest) or am I doing something wrong?
Input in jupyter:
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')
Result:
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
<ipython-input-1-15dc285fb373> in <module>()
1 from sklearn.datasets import fetch_mldata
----> 2 mnist = fetch_mldata('MNIST original')
e:\ProgramData\Anaconda3\lib\site-packages\sklearn\datasets\mldata.py in fetch_mldata(dataname, target_name, data_name, transpose_data, data_home)
140 urlname = MLDATA_BASE_URL % quote(dataname)
141 try:
--> 142 mldata_url = urlopen(urlname)
143 except HTTPError as e:
144 if e.code == 404:
e:\ProgramData\Anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
221 else:
222 opener = _opener
--> 223 return opener.open(url, data, timeout)
224
225 def install_opener(opener):
e:\ProgramData\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
530 for processor in self.process_response.get(protocol, []):
531 meth = getattr(processor, meth_name)
--> 532 response = meth(req, response)
533
534 return response
e:\ProgramData\Anaconda3\lib\urllib\request.py in http_response(self, request, response)
640 if not (200 <= code < 300):
641 response = self.parent.error(
--> 642 'http', request, response, code, msg, hdrs)
643
644 return response
e:\ProgramData\Anaconda3\lib\urllib\request.py in error(self, proto, *args)
562 http_err = 0
563 args = (dict, proto, meth_name) + args
--> 564 result = self._call_chain(*args)
565 if result:
566 return result
e:\ProgramData\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
502 for handler in handlers:
503 func = getattr(handler, meth_name)
--> 504 result = func(*args)
505 if result is not None:
506 return result
e:\ProgramData\Anaconda3\lib\urllib\request.py in http_error_302(self, req, fp, code, msg, headers)
754 fp.close()
755
--> 756 return self.parent.open(new, timeout=req.timeout)
757
758 http_error_301 = http_error_303 = http_error_307 = http_error_302
e:\ProgramData\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
530 for processor in self.process_response.get(protocol, []):
531 meth = getattr(processor, meth_name)
--> 532 response = meth(req, response)
533
534 return response
e:\ProgramData\Anaconda3\lib\urllib\request.py in http_response(self, request, response)
640 if not (200 <= code < 300):
641 response = self.parent.error(
--> 642 'http', request, response, code, msg, hdrs)
643
644 return response
e:\ProgramData\Anaconda3\lib\urllib\request.py in error(self, proto, *args)
568 if http_err:
569 args = (dict, 'default', 'http_error_default') + orig_args
--> 570 return self._call_chain(*args)
571
572 # XXX probably also want an abstract factory that knows when it makes
e:\ProgramData\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
502 for handler in handlers:
503 func = getattr(handler, meth_name)
--> 504 result = func(*args)
505 if result is not None:
506 return result
e:\ProgramData\Anaconda3\lib\urllib\request.py in http_error_default(self, req, fp, code, msg, hdrs)
648 class HTTPDefaultErrorHandler(BaseHandler):
649 def http_error_default(self, req, fp, code, msg, hdrs):
--> 650 raise HTTPError(req.full_url, code, msg, hdrs, fp)
651
652 class HTTPRedirectHandler(BaseHandler):
HTTPError: HTTP Error 500: INTERNAL SERVER ERROR
Upvotes: 13
Views: 17704
Reputation: 5
How about you go to this link:
https://anaconda.org/conda-forge/mnist
Follow the instructions:
I had the same problem too, I followed these instructions and I don't have any errors again. Hope this is helpful. Sorry if this doesn't solve your problem.
Upvotes: 0
Reputation: 187
I found this solution on https://github.com/ageron/handson-ml/issues/7 and this one was most useful for me. Just download the file from https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat
after that use this script:
from scipy.io import loadmat
mnist_path = "my/local/path/mnist-original.mat" #type the directory where you want to the file is located
mnist_raw = loadmat(mnist_path)
mnist = {
"data": mnist_raw["data"].T,
"target": mnist_raw["label"][0],
"COL_NAMES": ["label", "data"],
"DESCR": "mldata.org dataset: mnist-original",
}
print("Success!")
Upvotes: 0
Reputation: 400
Late to the party, but i had the same error and my simple solution was to run the two commands separately, like:
from sklearn import datasets
and make sure you run this in a separate line in jupyter notebook
mnist_data = datasets.fetch_mldata('MNIST original', data_home = 'datasets/')
Upvotes: 2
Reputation: 338
this is for python 3.6.*
import os
from urllib.request import urlretrieve
import numpy as np
def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
print("Downloading %s" % filename)
urlretrieve(source + filename, filename)
# We then define functions for loading MNIST images and labels.
# For convenience, they also download the requested files if needed.
import gzip
def load_mnist_images(filename):
if not os.path.exists(filename):
download(filename)
# Read the inputs in Yann LeCun's binary format.
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=16)
# The inputs are vectors now, we reshape them to monochrome 2D images,
# following the shape convention: (examples, channels, rows, columns)
data = data.reshape(-1, 1, 28, 28)
# The inputs come as bytes, we convert them to float32 in range [0,1].
# (Actually to range [0, 255/256], for compatibility to the version
# provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
return data / np.float32(256)
def load_mnist_labels(filename):
if not os.path.exists(filename):
download(filename)
# Read the labels in Yann LeCun's binary format.
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=8)
# The labels are vectors of integers now, that's exactly what we want.
return data
X_train = load_mnist_images('train-images-idx3-ubyte.gz')
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')
Upvotes: 0
Reputation: 116
from sklearn.datasets import fetch_mldata
try:
mnist = fetch_mldata('MNIST original')
except Exception as ex:
from six.moves import urllib
from scipy.io import loadmat
import os
mnist_path = os.path.join(".", "datasets", "mnist-original.mat")
# download dataset from github.
mnist_alternative_url = "https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat"
response = urllib.request.urlopen(mnist_alternative_url)
with open(mnist_path, "wb") as f:
content = response.read()
f.write(content)
mnist_raw = loadmat(mnist_path)
mnist = {
"data": mnist_raw["data"].T,
"target": mnist_raw["label"][0],
"COL_NAMES": ["label", "data"],
"DESCR": "mldata.org dataset: mnist-original",
}
print("Done!")
Upvotes: 4
Reputation: 1189
I also had the same error and had to turn off the firewall. On the Macbook, go System Preferences > Security & Privacy > Firewall > Turn Off Firewall.
Upvotes: 5
Reputation: 11
Found a good solution here: https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py
It downloads dataset from Yan LeCun's website (http://yann.lecun.com/exdb/mnist/).
import os
from urllib import urlretrieve
def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
print("Downloading %s" % filename)
urlretrieve(source + filename, filename)
# We then define functions for loading MNIST images and labels.
# For convenience, they also download the requested files if needed.
import gzip
def load_mnist_images(filename):
if not os.path.exists(filename):
download(filename)
# Read the inputs in Yann LeCun's binary format.
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=16)
# The inputs are vectors now, we reshape them to monochrome 2D images,
# following the shape convention: (examples, channels, rows, columns)
data = data.reshape(-1, 1, 28, 28)
# The inputs come as bytes, we convert them to float32 in range [0,1].
# (Actually to range [0, 255/256], for compatibility to the version
# provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
return data / np.float32(256)
def load_mnist_labels(filename):
if not os.path.exists(filename):
download(filename)
# Read the labels in Yann LeCun's binary format.
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=8)
# The labels are vectors of integers now, that's exactly what we want.
return data
X_train = load_mnist_images('train-images-idx3-ubyte.gz')
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')
Upvotes: 1
Reputation: 11
Here is an alternative location to download the MNIST dataset (referenced from https://github.com/ageron/handson-ml/blob/master/03_classification.ipynb)
from six.moves import urllib
from sklearn.datasets import fetch_mldata
try:
mnist = fetch_mldata('MNIST original')
except urllib.error.HTTPError as ex:
print("Could not download MNIST data from mldata.org, trying alternative...")
# Alternative method to load MNIST, if mldata.org is down
from scipy.io import loadmat
mnist_alternative_url = "https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat"
mnist_path = "./mnist-original.mat"
response = urllib.request.urlopen(mnist_alternative_url)
with open(mnist_path, "wb") as f:
content = response.read()
f.write(content)
mnist_raw = loadmat(mnist_path)
mnist = {
"data": mnist_raw["data"].T,
"target": mnist_raw["label"][0],
"COL_NAMES": ["label", "data"],
"DESCR": "mldata.org dataset: mnist-original",
}
print("Success!")
Upvotes: 1
Reputation: 19634
I also get the same error as you. Here are some possible solutions that do not require this server.
If you have tensorflow
installed, you can get MNIST data in the following way:
import tensorflow.examples.tutorials.mnist.input_data as input_data
m=input_data.read_data_sets("MNIST")
Then for example len(m.train.images)
is 55000.
If you don't have tensorflow, you can get this dataset using the instructions here.
Upvotes: 3