PScode
PScode

Reputation: 29

How to import data file from the S3 bucket into the Sagemaker notebook?

I have npz files that I want to import for my model training. Below is the code I have tried.

import s3fs
fs = s3fs.S3FileSystem()

# To List 5 files in your accessible bucket
#fs.ls('s3://input_data/train_npz/')[:5]

# open it directly
with fs.open(f's3://input_data/train_npz/0.npz') as f:
    display(Image.open(f))
--------------------------------------------------------------------------- AttributeError                            Traceback (most recent call

last) in 7 8 # open it directly ----> 9 with fs.open(f's3://input_data/train_npz/0.npz') as f: 10 display(Image.open(f))

/opt/conda/lib/python3.6/site-packages/fsspec/spec.py in open(self, path, mode, block_size, cache_options, **kwargs) 980 autocommit=ac, 981 cache_options=cache_options, --> 982 **kwargs, 983 ) 984 if not ac and "r" not in mode:

/opt/conda/lib/python3.6/site-packages/s3fs/core.py in _open(self, path, mode, block_size, acl, version_id, fill_cache, cache_type, autocommit, requester_pays, **kwargs) 543 cache_type=cache_type, 544 autocommit=autocommit, --> 545 requester_pays=requester_pays, 546 ) 547

/opt/conda/lib/python3.6/site-packages/s3fs/core.py in init(self, s3, path, mode, block_size, acl, version_id, fill_cache, s3_additional_kwargs, autocommit, cache_type, requester_pays) 1822 self.version_id = self.details.get("VersionId") 1823
super().init( -> 1824 s3, path, mode, block_size, autocommit=autocommit, cache_type=cache_type 1825 ) 1826 self.s3 = self.fs # compatibility

/opt/conda/lib/python3.6/site-packages/fsspec/spec.py in init(self, fs, path, mode, block_size, autocommit, cache_type, cache_options, **kwargs) 1302 if mode == "rb": 1303
if not hasattr(self, "details"): -> 1304 self.details = fs.info(path) 1305 self.size = self.details["size"] 1306 self.cache = caches[cache_type](

/opt/conda/lib/python3.6/site-packages/fsspec/asyn.py in wrapper(*args, **kwargs) 86 def wrapper(*args, **kwargs): 87 self = obj or args[0] ---> 88 return sync(self.loop, func, *args, **kwargs) 89 90 return wrapper

/opt/conda/lib/python3.6/site-packages/fsspec/asyn.py in sync(loop, func, timeout, *args, **kwargs) 67 raise FSTimeoutError 68 if isinstance(result[0], BaseException): ---> 69 raise result[0] 70 return result[0] 71

/opt/conda/lib/python3.6/site-packages/fsspec/asyn.py in _runner(event, coro, result, timeout) 23 coro = asyncio.wait_for(coro, timeout=timeout) 24 try: ---> 25 result[0] = await coro 26 except Exception as ex: 27 result[0] = ex

/opt/conda/lib/python3.6/site-packages/s3fs/core.py in _info(self, path, bucket, key, refresh, version_id) 1062 else: 1063 try: -> 1064 out = await self._simple_info(path) 1065 except PermissionError: 1066

If the permissions aren't enough for scanning a prefix

/opt/conda/lib/python3.6/site-packages/s3fs/core.py in _simple_info(self, path) 982 Delimiter="/", 983 MaxKeys=1, --> 984 **self.req_kw, 985 ) 986 # This method either can return the info blob for the object if it

/opt/conda/lib/python3.6/site-packages/s3fs/core.py in _call_s3(self, method, *akwarglist, **kwargs) 235 236 async def _call_s3(self, method, *akwarglist, **kwargs): --> 237 await self.set_session() 238 s3 = await self.get_s3(kwargs.get("Bucket")) 239 method = getattr(s3, method)

/opt/conda/lib/python3.6/site-packages/s3fs/core.py in set_session(self, refresh, kwargs) 376 377 conf = AioConfig(**config_kwargs) --> 378 self.session = aiobotocore.AioSession(**self.kwargs) 379 380 for parameters in (config_kwargs, self.kwargs, init_kwargs, client_kwargs):

AttributeError: module 'aiobotocore' has no attribute 'AioSession'

Can anyone let me know where I made the mistake or how to do it?

Upvotes: 1

Views: 817

Answers (1)

rok
rok

Reputation: 2775

s3fs needs credetials, see here, I think you have to provide the credentials for the bucket using one of the methods listed in the doc I linked. However there are many other ways to read s3 files into a sagemaker notebook, take a look here for example.

Upvotes: 0

Related Questions