vinicius.alves
vinicius.alves

Reputation: 11

How to proper use sql/hive variables in the new databricks connect

I'm testing the new databricks connect and I often use sql variables in my python scripts on databricks, however I'm not able to use those variables through dbconnect. The example below works fine in databricks but not in dbconnect:

from pyspark.sql import SparkSession
from pyspark.sql import SQLContext
import pandas as pd

spark = SparkSession.builder.getOrCreate()
sqlContext = SQLContext(spark)

df = spark.createDataFrame(pd.DataFrame({'a':[2,5,8], 'b':[3,5,5]}))
df.createOrReplaceTempView('test_view')

sqlContext.sql("set a_value = 2")
sqlContext.sql("select * from test_view where a = ${a_value}")

In dbconnect I received the follow:

---------------------------------------------------------------------------
ParseException                            Traceback (most recent call last)
<ipython-input-50-404f4c5b017c> in <module>
     10 
     11 sqlContext.sql("set a_value = 2")
---> 12 sqlContext.sql("select * from test_view where a = ${a_value}")

c:\users\pc\miniconda3\lib\site-packages\pyspark\sql\context.py in sql(self, sqlQuery)
    369         [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
    370         """
--> 371         return self.sparkSession.sql(sqlQuery)
    372 
    373     @since(1.0)

c:\users\pc\miniconda3\lib\site-packages\pyspark\sql\session.py in sql(self, sqlQuery)
    702         [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
    703         """
--> 704         return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)
    705 
    706     @since(2.0)

c:\users\pc\miniconda3\lib\site-packages\py4j\java_gateway.py in __call__(self, *args)
   1303         answer = self.gateway_client.send_command(command)
   1304         return_value = get_return_value(
-> 1305             answer, self.gateway_client, self.target_id, self.name)
   1306 
   1307         for temp_arg in temp_args:

c:\users\pc\miniconda3\lib\site-packages\pyspark\sql\utils.py in deco(*a, **kw)
    132                 # Hide where the exception came from that shows a non-Pythonic
    133                 # JVM exception message.
--> 134                 raise_from(converted)
    135             else:
    136                 raise

c:\users\pc\miniconda3\lib\site-packages\pyspark\sql\utils.py in raise_from(e)

ParseException: 
mismatched input '<EOF>' expecting {'(', 'COLLECT', 'CONVERT', 'DELTA', 'HISTORY', 'MATCHED', 'MERGE', 'OPTIMIZE', 'SAMPLE', 'TIMESTAMP', 'UPDATE', 'VERSION', 'ZORDER', 'ADD', 'AFTER', 'ALL', 'ALTER', 'ANALYZE', 'AND', 'ANTI', 'ANY', 'ARCHIVE', 'ARRAY', 'AS', 'ASC', 'AT', 'AUTHORIZATION', 'BETWEEN', 'BOTH', 'BUCKET', 'BUCKETS', 'BY', 'CACHE', 'CASCADE', 'CASE', 'CAST', 'CHANGE', 'CHECK', 'CLEAR', 'CLONE', 'CLUSTER', 'CLUSTERED', 'CODEGEN', 'COLLATE', 'COLLECTION', 'COLUMN', 'COLUMNS', 'COMMENT', 'COMMIT', 'COMPACT', 'COMPACTIONS', 'COMPUTE', 'CONCATENATE', 'CONSTRAINT', 'COPY', 'COPY_OPTIONS', 'COST', 'CREATE', 'CREDENTIALS', 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER', 'DATA', 'DATABASE', DATABASES, 'DAY', 'DBPROPERTIES', 'DEEP', 'DEFINED', 'DELETE', 'DELIMITED', 'DESC', 'DESCRIBE', 'DFS', 'DIRECTORIES', 'DIRECTORY', 'DISTINCT', 'DISTRIBUTE', 'DROP', 'ELSE', 'ENCRYPTION', 'END', 'ESCAPE', 'ESCAPED', 'EXCEPT', 'EXCHANGE', 'EXISTS', 'EXPLAIN', 'EXPORT', 'EXTENDED', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FETCH', 'FIELDS', 'FILTER', 'FILEFORMAT', 'FILES', 'FIRST', 'FOLLOWING', 'FOR', 'FOREIGN', 'FORMAT', 'FORMAT_OPTIONS', 'FORMATTED', 'FROM', 'FULL', 'FUNCTION', 'FUNCTIONS', 'GLOBAL', 'GRANT', 'GROUP', 'GROUPING', 'HAVING', 'HOUR', 'IF', 'IGNORE', 'IMPORT', 'IN', 'INDEX', 'INDEXES', 'INNER', 'INPATH', 'INPUTFORMAT', 'INSERT', 'INTERSECT', 'INTERVAL', 'INTO', 'IS', 'ITEMS', 'JOIN', 'KEYS', 'LAST', 'LATERAL', 'LAZY', 'LEADING', 'LEFT', 'LIKE', 'LIMIT', 'LINES', 'LIST', 'LOAD', 'LOCAL', 'LOCATION', 'LOCK', 'LOCKS', 'LOGICAL', 'MACRO', 'MAP', 'MINUTE', 'MONTH', 'MSCK', 'NAMESPACE', 'NAMESPACES', 'NATURAL', 'NO', NOT, 'NULL', 'NULLS', 'OF', 'ON', 'ONLY', 'OPTION', 'OPTIONS', 'OR', 'ORDER', 'OUT', 'OUTER', 'OUTPUTFORMAT', 'OVER', 'OVERLAPS', 'OVERLAY', 'OVERWRITE', 'PARTITION', 'PARTITIONED', 'PARTITIONS', 'PATTERN', 'PERCENT', 'PIVOT', 'PLACING', 'POSITION', 'PRECEDING', 'PRIMARY', 'PRINCIPALS', 'PROPERTIES', 'PURGE', 'QUERY', 'RANGE', 'RECORDREADER', 'RECORDWRITER', 'RECOVER', 'REDUCE', 'REFERENCES', 'REFRESH', 'RENAME', 'REPAIR', 'REPLACE', 'RESET', 'RESTRICT', 'REVOKE', 'RIGHT', RLIKE, 'ROLE', 'ROLES', 'ROLLBACK', 'ROLLUP', 'ROW', 'ROWS', 'SCHEMA', 'SECOND', 'SELECT', 'SEMI', 'SEPARATED', 'SERDE', 'SERDEPROPERTIES', 'SESSION_USER', 'SET', 'MINUS', 'SETS', 'SHALLOW', 'SHOW', 'SKEWED', 'SOME', 'SORT', 'SORTED', 'START', 'STATISTICS', 'STORED', 'STRATIFY', 'STRUCT', 'SUBSTR', 'SUBSTRING', 'TABLE', 'TABLES', 'TABLESAMPLE', 'TBLPROPERTIES', TEMPORARY, 'TERMINATED', 'THEN', 'TO', 'TOUCH', 'TRAILING', 'TRANSACTION', 'TRANSACTIONS', 'TRANSFORM', 'TRIM', 'TRUE', 'TRUNCATE', 'TYPE', 'UNARCHIVE', 'UNBOUNDED', 'UNCACHE', 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLOCK', 'UNSET', 'USE', 'USER', 'USING', 'VALUES', 'VIEW', 'VIEWS', 'WHEN', 'WHERE', 'WINDOW', 'WITH', 'YEAR', '+', '-', '*', 'DIV', '~', STRING, BIGINT_LITERAL, SMALLINT_LITERAL, TINYINT_LITERAL, INTEGER_VALUE, EXPONENT_VALUE, DECIMAL_VALUE, DOUBLE_LITERAL, BIGDECIMAL_LITERAL, IDENTIFIER, BACKQUOTED_IDENTIFIER}(line 1, pos 34)

== SQL ==
select * from test_view where a = 
----------------------------------^^^

So, has anyone managed to make these variables work?

Thanks

Upvotes: 1

Views: 1957

Answers (1)

CHEEKATLAPRADEEP
CHEEKATLAPRADEEP

Reputation: 12768

You can pass parameters/arguments to your SQL statements by programmatically creating the SQL string using Scala/Python and pass it to sqlContext.sql(string).

sqlContext.sql("set a_value = 2")
sqlContext.sql("select * from test_view where a = ${a_value}").show()

enter image description here

Upvotes: 1

Related Questions