Reputation: 11
I'm testing the new databricks connect and I often use sql variables in my python scripts on databricks, however I'm not able to use those variables through dbconnect. The example below works fine in databricks but not in dbconnect:
from pyspark.sql import SparkSession
from pyspark.sql import SQLContext
import pandas as pd
spark = SparkSession.builder.getOrCreate()
sqlContext = SQLContext(spark)
df = spark.createDataFrame(pd.DataFrame({'a':[2,5,8], 'b':[3,5,5]}))
df.createOrReplaceTempView('test_view')
sqlContext.sql("set a_value = 2")
sqlContext.sql("select * from test_view where a = ${a_value}")
In dbconnect I received the follow:
---------------------------------------------------------------------------
ParseException Traceback (most recent call last)
<ipython-input-50-404f4c5b017c> in <module>
10
11 sqlContext.sql("set a_value = 2")
---> 12 sqlContext.sql("select * from test_view where a = ${a_value}")
c:\users\pc\miniconda3\lib\site-packages\pyspark\sql\context.py in sql(self, sqlQuery)
369 [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
370 """
--> 371 return self.sparkSession.sql(sqlQuery)
372
373 @since(1.0)
c:\users\pc\miniconda3\lib\site-packages\pyspark\sql\session.py in sql(self, sqlQuery)
702 [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
703 """
--> 704 return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)
705
706 @since(2.0)
c:\users\pc\miniconda3\lib\site-packages\py4j\java_gateway.py in __call__(self, *args)
1303 answer = self.gateway_client.send_command(command)
1304 return_value = get_return_value(
-> 1305 answer, self.gateway_client, self.target_id, self.name)
1306
1307 for temp_arg in temp_args:
c:\users\pc\miniconda3\lib\site-packages\pyspark\sql\utils.py in deco(*a, **kw)
132 # Hide where the exception came from that shows a non-Pythonic
133 # JVM exception message.
--> 134 raise_from(converted)
135 else:
136 raise
c:\users\pc\miniconda3\lib\site-packages\pyspark\sql\utils.py in raise_from(e)
ParseException:
mismatched input '<EOF>' expecting {'(', 'COLLECT', 'CONVERT', 'DELTA', 'HISTORY', 'MATCHED', 'MERGE', 'OPTIMIZE', 'SAMPLE', 'TIMESTAMP', 'UPDATE', 'VERSION', 'ZORDER', 'ADD', 'AFTER', 'ALL', 'ALTER', 'ANALYZE', 'AND', 'ANTI', 'ANY', 'ARCHIVE', 'ARRAY', 'AS', 'ASC', 'AT', 'AUTHORIZATION', 'BETWEEN', 'BOTH', 'BUCKET', 'BUCKETS', 'BY', 'CACHE', 'CASCADE', 'CASE', 'CAST', 'CHANGE', 'CHECK', 'CLEAR', 'CLONE', 'CLUSTER', 'CLUSTERED', 'CODEGEN', 'COLLATE', 'COLLECTION', 'COLUMN', 'COLUMNS', 'COMMENT', 'COMMIT', 'COMPACT', 'COMPACTIONS', 'COMPUTE', 'CONCATENATE', 'CONSTRAINT', 'COPY', 'COPY_OPTIONS', 'COST', 'CREATE', 'CREDENTIALS', 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER', 'DATA', 'DATABASE', DATABASES, 'DAY', 'DBPROPERTIES', 'DEEP', 'DEFINED', 'DELETE', 'DELIMITED', 'DESC', 'DESCRIBE', 'DFS', 'DIRECTORIES', 'DIRECTORY', 'DISTINCT', 'DISTRIBUTE', 'DROP', 'ELSE', 'ENCRYPTION', 'END', 'ESCAPE', 'ESCAPED', 'EXCEPT', 'EXCHANGE', 'EXISTS', 'EXPLAIN', 'EXPORT', 'EXTENDED', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FETCH', 'FIELDS', 'FILTER', 'FILEFORMAT', 'FILES', 'FIRST', 'FOLLOWING', 'FOR', 'FOREIGN', 'FORMAT', 'FORMAT_OPTIONS', 'FORMATTED', 'FROM', 'FULL', 'FUNCTION', 'FUNCTIONS', 'GLOBAL', 'GRANT', 'GROUP', 'GROUPING', 'HAVING', 'HOUR', 'IF', 'IGNORE', 'IMPORT', 'IN', 'INDEX', 'INDEXES', 'INNER', 'INPATH', 'INPUTFORMAT', 'INSERT', 'INTERSECT', 'INTERVAL', 'INTO', 'IS', 'ITEMS', 'JOIN', 'KEYS', 'LAST', 'LATERAL', 'LAZY', 'LEADING', 'LEFT', 'LIKE', 'LIMIT', 'LINES', 'LIST', 'LOAD', 'LOCAL', 'LOCATION', 'LOCK', 'LOCKS', 'LOGICAL', 'MACRO', 'MAP', 'MINUTE', 'MONTH', 'MSCK', 'NAMESPACE', 'NAMESPACES', 'NATURAL', 'NO', NOT, 'NULL', 'NULLS', 'OF', 'ON', 'ONLY', 'OPTION', 'OPTIONS', 'OR', 'ORDER', 'OUT', 'OUTER', 'OUTPUTFORMAT', 'OVER', 'OVERLAPS', 'OVERLAY', 'OVERWRITE', 'PARTITION', 'PARTITIONED', 'PARTITIONS', 'PATTERN', 'PERCENT', 'PIVOT', 'PLACING', 'POSITION', 'PRECEDING', 'PRIMARY', 'PRINCIPALS', 'PROPERTIES', 'PURGE', 'QUERY', 'RANGE', 'RECORDREADER', 'RECORDWRITER', 'RECOVER', 'REDUCE', 'REFERENCES', 'REFRESH', 'RENAME', 'REPAIR', 'REPLACE', 'RESET', 'RESTRICT', 'REVOKE', 'RIGHT', RLIKE, 'ROLE', 'ROLES', 'ROLLBACK', 'ROLLUP', 'ROW', 'ROWS', 'SCHEMA', 'SECOND', 'SELECT', 'SEMI', 'SEPARATED', 'SERDE', 'SERDEPROPERTIES', 'SESSION_USER', 'SET', 'MINUS', 'SETS', 'SHALLOW', 'SHOW', 'SKEWED', 'SOME', 'SORT', 'SORTED', 'START', 'STATISTICS', 'STORED', 'STRATIFY', 'STRUCT', 'SUBSTR', 'SUBSTRING', 'TABLE', 'TABLES', 'TABLESAMPLE', 'TBLPROPERTIES', TEMPORARY, 'TERMINATED', 'THEN', 'TO', 'TOUCH', 'TRAILING', 'TRANSACTION', 'TRANSACTIONS', 'TRANSFORM', 'TRIM', 'TRUE', 'TRUNCATE', 'TYPE', 'UNARCHIVE', 'UNBOUNDED', 'UNCACHE', 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLOCK', 'UNSET', 'USE', 'USER', 'USING', 'VALUES', 'VIEW', 'VIEWS', 'WHEN', 'WHERE', 'WINDOW', 'WITH', 'YEAR', '+', '-', '*', 'DIV', '~', STRING, BIGINT_LITERAL, SMALLINT_LITERAL, TINYINT_LITERAL, INTEGER_VALUE, EXPONENT_VALUE, DECIMAL_VALUE, DOUBLE_LITERAL, BIGDECIMAL_LITERAL, IDENTIFIER, BACKQUOTED_IDENTIFIER}(line 1, pos 34)
== SQL ==
select * from test_view where a =
----------------------------------^^^
So, has anyone managed to make these variables work?
Thanks
Upvotes: 1
Views: 1957
Reputation: 12768
You can pass parameters/arguments to your SQL statements by programmatically creating the SQL string using Scala/Python and pass it to sqlContext.sql(string).
sqlContext.sql("set a_value = 2")
sqlContext.sql("select * from test_view where a = ${a_value}").show()
Upvotes: 1