Reputation: 11
I am trying to extract data from Databricks SQL Warehouse through Azure Machine Learning Notebook using Databricks-SQL-Connector and getting the following error in Jupyter Notebook. I am using the following script:
from databricks import sql
import os
connection = sql.connect(
server_hostname = "adb-4285847157798999.17.azuredatabricks.net",
http_path = "/sql/1.0/warehouses/37667bb8d1fe23fa",
access_token = "<access-token>")
cursor = connection.cursor()
cursor.execute("SELECT * from range(10)")
print(cursor.fetchall())
cursor.close()
connection.close()
Below is the error i am getting . Please suggest how to solve this as i am new to this.
gaierror Traceback (most recent call last)
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connection.py:199, in HTTPConnection._new_conn(self)
198 try:
--> 199 sock = connection.create_connection(
200 (self._dns_host, self.port),
201 self.timeout,
202 source_address=self.source_address,
203 socket_options=self.socket_options,
204 )
205 except socket.gaierror as e:
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/util/connection.py:60, in create_connection(address, timeout, source_address, socket_options)
58 raise LocationParseError(f"'{host}', label empty or too long") from None
---> 60 for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
61 af, socktype, proto, canonname, sa = res
File /anaconda/envs/nps/lib/python3.10/socket.py:955, in getaddrinfo(host, port, family, type, proto, flags)
954 addrlist = []
--> 955 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
956 af, socktype, proto, canonname, sa = res
gaierror: [Errno -2] Name or service not known
The above exception was the direct cause of the following exception:
NameResolutionError Traceback (most recent call last)
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:789, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
788 # Make the request on the HTTPConnection object
--> 789 response = self._make_request(
790 conn,
791 method,
792 url,
793 timeout=timeout_obj,
794 body=body,
795 headers=headers,
796 chunked=chunked,
797 retries=retries,
798 response_conn=response_conn,
799 preload_content=preload_content,
800 decode_content=decode_content,
801 **response_kw,
802 )
804 # Everything went great!
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:490, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
489 new_e = _wrap_proxy_error(new_e, conn.proxy.scheme)
--> 490 raise new_e
492 # conn.request() calls http.client.*.request, not the method in
493 # urllib3.request. It also calls makefile (recv) on the socket.
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:466, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
465 try:
--> 466 self._validate_conn(conn)
467 except (SocketTimeout, BaseSSLError) as e:
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:1095, in HTTPSConnectionPool._validate_conn(self, conn)
1094 if conn.is_closed:
-> 1095 conn.connect()
1097 # TODO revise this, see https://github.com/urllib3/urllib3/issues/2791
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connection.py:693, in HTTPSConnection.connect(self)
692 sock: socket.socket | ssl.SSLSocket
--> 693 self.sock = sock = self._new_conn()
694 server_hostname: str = self.host
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connection.py:206, in HTTPConnection._new_conn(self)
205 except socket.gaierror as e:
--> 206 raise NameResolutionError(self.host, self, e) from e
207 except SocketTimeout as e:
NameResolutionError: <urllib3.connection.HTTPSConnection object at 0x7f065ff85ea0>: Failed to resolve 'dbstoragej5fibm47ntgqa.blob.core.windows.net' ([Errno -2] Name or service not known)
The above exception was the direct cause of the following exception:
MaxRetryError Traceback (most recent call last)
File /anaconda/envs/nps/lib/python3.10/site-packages/requests/adapters.py:667, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
666 try:
--> 667 resp = conn.urlopen(
668 method=request.method,
669 url=url,
670 body=request.body,
671 headers=request.headers,
672 redirect=False,
673 assert_same_host=False,
674 preload_content=False,
675 decode_content=False,
676 retries=self.max_retries,
677 timeout=timeout,
678 chunked=chunked,
679 )
681 except (ProtocolError, OSError) as err:
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:873, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
870 log.warning(
871 "Retrying (%r) after connection broken by '%r': %s", retries, err, url
872 )
--> 873 return self.urlopen(
874 method,
875 url,
876 body,
877 headers,
878 retries,
879 redirect,
880 assert_same_host,
881 timeout=timeout,
882 pool_timeout=pool_timeout,
883 release_conn=release_conn,
884 chunked=chunked,
885 body_pos=body_pos,
886 preload_content=preload_content,
887 decode_content=decode_content,
888 **response_kw,
889 )
891 # Handle redirect?
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:873, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
870 log.warning(
871 "Retrying (%r) after connection broken by '%r': %s", retries, err, url
872 )
--> 873 return self.urlopen(
874 method,
875 url,
876 body,
877 headers,
878 retries,
879 redirect,
880 assert_same_host,
881 timeout=timeout,
882 pool_timeout=pool_timeout,
883 release_conn=release_conn,
884 chunked=chunked,
885 body_pos=body_pos,
886 preload_content=preload_content,
887 decode_content=decode_content,
888 **response_kw,
889 )
891 # Handle redirect?
[... skipping similar frames: HTTPConnectionPool.urlopen at line 873 (2 times)]
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:873, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
870 log.warning(
871 "Retrying (%r) after connection broken by '%r': %s", retries, err, url
872 )
--> 873 return self.urlopen(
874 method,
875 url,
876 body,
877 headers,
878 retries,
879 redirect,
880 assert_same_host,
881 timeout=timeout,
882 pool_timeout=pool_timeout,
883 release_conn=release_conn,
884 chunked=chunked,
885 body_pos=body_pos,
886 preload_content=preload_content,
887 decode_content=decode_content,
888 **response_kw,
889 )
891 # Handle redirect?
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:843, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
841 new_e = ProtocolError("Connection aborted.", new_e)
--> 843 retries = retries.increment(
844 method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2]
845 )
846 retries.sleep()
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/util/retry.py:519, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
518 reason = error or ResponseError(cause)
--> 519 raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
521 log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)
MaxRetryError: HTTPSConnectionPool(host='dbstoragej5fibm47ntgqa.blob.core.windows.net', port=443): Max retries exceeded with url: /jobs/4285847157799137/sql/2024-12-19/14/results_2024-12-19T14:41:23Z_c0096a00-d0d4-450e-934f-f1ed607087ea?sig=vMkEoWgwiFZTQvCPve8bdNRwveQcepSL5GC8t8Biczc%3D&se=2024-12-19T14%3A56%3A24Z&sv=2019-02-02&spr=https&sp=r&sr=b (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7f065ff85ea0>: Failed to resolve 'dbstoragej5fibm47ntgqa.blob.core.windows.net' ([Errno -2] Name or service not known)"))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
File /anaconda/envs/nps/lib/python3.10/s
Upvotes: 1
Views: 55
Reputation: 11234
Follow the below step by step process to achieve your requirement.
Open Notebook in Azure ML workspace and install the databricks connector as shown below.
pip install databricks-sql-connector
After installing it, restart the kernel once to reflect the updated packages.
Then use the same code and you can see It got worked for me.
The above might arise when the databricks-connect
version conflicts with the Pyspark version. You can try the troubleshooting steps like changing the compute and re-installing the total package after uninstalling the current Pyspark.
Reference:
# Troubleshooting Databricks Connect for Python
Upvotes: 0