Reputation: 121
Error occured when I was loading the MNIST data using the following code.(anaconda has already been installed and coded on online Jupyter notebook.)
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')
Timeouterror appeared and I have no idea where I made mistakes. I have closed my vpn proxy and it didnt work. Help!
TimeoutError Traceback (most recent call last)
<ipython-input-1-3ba7b9c02a3b> in <module>()
1 from sklearn.datasets import fetch_mldata
----> 2 mnist = fetch_mldata('MNIST original')
~\Anaconda3\lib\site-packages\sklearn\datasets\mldata.py in fetch_mldata(dataname, target_name, data_name, transpose_data, data_home)
152 urlname = MLDATA_BASE_URL % quote(dataname)
153 try:
--> 154 mldata_url = urlopen(urlname)
155 except HTTPError as e:
156 if e.code == 404:
~\Anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
221 else:
222 opener = _opener
--> 223 return opener.open(url, data, timeout)
224
225 def install_opener(opener):
~\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
524 req = meth(req)
525
--> 526 response = self._open(req, data)
527
528 # post-process response
~\Anaconda3\lib\urllib\request.py in _open(self, req, data)
542 protocol = req.type
543 result = self._call_chain(self.handle_open, protocol, protocol +
--> 544 '_open', req)
545 if result:
546 return result
~\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
502 for handler in handlers:
503 func = getattr(handler, meth_name)
--> 504 result = func(*args)
505 if result is not None:
506 return result
~\Anaconda3\lib\urllib\request.py in http_open(self, req)
1344
1345 def http_open(self, req):
-> 1346 return self.do_open(http.client.HTTPConnection, req)
1347
1348 http_request = AbstractHTTPHandler.do_request_
~\Anaconda3\lib\urllib\request.py in do_open(self, http_class, req, **http_conn_args)
1319 except OSError as err: # timeout error
1320 raise URLError(err)
-> 1321 r = h.getresponse()
1322 except:
1323 h.close()
~\Anaconda3\lib\http\client.py in getresponse(self)
1329 try:
1330 try:
-> 1331 response.begin()
1332 except ConnectionError:
1333 self.close()
~\Anaconda3\lib\http\client.py in begin(self)
295 # read until we get a non-100 response
296 while True:
--> 297 version, status, reason = self._read_status()
298 if status != CONTINUE:
299 break
~\Anaconda3\lib\http\client.py in _read_status(self)
256
257 def _read_status(self):
--> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
259 if len(line) > _MAXLINE:
260 raise LineTooLong("status line")
~\Anaconda3\lib\socket.py in readinto(self, b)
584 while True:
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
588 self._timeout_occurred = True
TimeoutError: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond
I downloaded the MNIST dataset and tried to load the data myself instead. I copied the code used to load the MNIST but I failed to load data again. I thought I need to change some code rather than completely copy the code from Internet but I dont know where I should do the change.(Just a beginner of Python) The code I used to load the downloaded MNIST data.Is it because I put the data in a wrong file?
def loadmnist(imagefile, labelfile):
# Open the images with gzip in read binary mode
images = open(imagefile, 'rb')
labels = open(labelfile, 'rb')
# Get metadata for images
images.read(4) # skip the magic_number
number_of_images = images.read(4)
number_of_images = unpack('>I', number_of_images)[0]
rows = images.read(4)
rows = unpack('>I', rows)[0]
cols = images.read(4)
cols = unpack('>I', cols)[0]
# Get metadata for labels
labels.read(4)
N = labels.read(4)
N = unpack('>I', N)[0]
# Get data
x = np.zeros((N, rows*cols), dtype=np.uint8) # Initialize numpy array
y = np.zeros(N, dtype=np.uint8) # Initialize numpy array
for i in range(N):
for j in range(rows*cols):
tmp_pixel = images.read(1) # Just a single byte
tmp_pixel = unpack('>B', tmp_pixel)[0]
x[i][j] = tmp_pixel
tmp_label = labels.read(1)
y[i] = unpack('>B', tmp_label)[0]
images.close()
labels.close()
return (x, y)
Above part is fine.
train_img, train_lbl = loadmnist('data/train-images-idx3-ubyte'
, 'data/train-labels-idx1-ubyte')
test_img, test_lbl = loadmnist('data/t10k-images-idx3-ubyte'
, 'data/t10k-labels-idx1-ubyte')
Error is like this.
FileNotFoundError Traceback (most recent call last)
<ipython-input-5-b23a5078b5bb> in <module>()
1 train_img, train_lbl = loadmnist('data/train-images-idx3-ubyte'
----> 2 , 'data/train-labels-idx1-ubyte')
3 test_img, test_lbl = loadmnist('data/t10k-images-idx3-ubyte'
4 , 'data/t10k-labels-idx1-ubyte')
<ipython-input-4-967098b85f28> in loadmnist(imagefile, labelfile)
2
3 # Open the images with gzip in read binary mode
----> 4 images = open(imagefile, 'rb')
5 labels = open(labelfile, 'rb')
6
FileNotFoundError: [Errno 2] No such file or directory: 'data/train-images-idx3-ubyte'
The data I downloaded was put in a folder I just made. enter image description here
Upvotes: 2
Views: 9956
Reputation: 663
I've faced this Error while I was coding on Spyder (Python 3.7) installed on Anaconda locally. I've tried many answers and at last I was only able to come across this error by specifying the target file location of the Mnist dataset after downloading it.
from scipy.io import loadmat
mnist_path = (r"C:\Users\duppa\Desktop\mnist-original.mat")
mnist_raw = loadmat(mnist_path)
mnist = {
"data": mnist_raw["data"].T,
"target": mnist_raw["label"][0],
"COL_NAMES": ["label", "data"],
"DESCR": "mldata.org dataset: mnist-original",
}
mnist
Upvotes: 0
Reputation: 16
You can load it from the sklearn datsets directly.
from sklearn import datasets
digits = datasets.load_digits()
Or you could load it using Keras.
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
Another option is to just download the dataset and load it in with something like pandas.
df = pd.read_csv('filename.csv')
Upvotes: 0
Reputation: 11917
If you want to load the dataset from some library directly rather than downloading it and then loading it, load it from Keras.
It can be done like this
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
If you are a beginner to Machine Learning and Python who want to know more about it, I recommend you to take a look at this excellent blog post.
Also, the extension of the file is also required when passing it to the function. ie you have to call the function like this.
train_img, train_lbl = loadmnist('mnist//train-images-idx3-ubyte.gz'
, 'mnist//train-labels-idx1-ubyte.gz')
test_img, test_lbl = loadmnist('mnist//t10k-images-idx3-ubyte.gz'
, 'mnist//t10k-labels-idx1-ubyte.gz')
In the code you are using to load data from the local disk, it throws an error because the file is not present in the given location. Make sure that the folder mnist is present in the folder your notebook is present.
Upvotes: 1