Reputation: 159
I am trying to read some data from parquet file using spark SQL and trying to put that data into some other table. But while writing data into another table I am getting the below error.
The parquet file is pulled from event-hub
data. In the data I have one column of type Array of object e.g:
[{MeassageTyep:string, Data:{Liftlink:int,MotionSensorLink:int}}]
This is how I have written the code to read the parquet file:
try:
spark.sql("""DROP TABLE IF EXISTS stg_robustel.src_robustel_heartbeat""")
spark.sql("""
CREATE TABLE IF NOT EXISTS stg_robustel.src_robustel_heartbeat
USING PARQUET
LOCATION '/mnt/RobustelLanding/OtisOne/robustel/heartbeat/2020/06/30/16/-1761854110_24d31143a42f4b6f9cec9aa576e2ddac_1.parquet' """)
except Exception as e:
print('Error: ' + str(e))
And this is how I am trying to put the above data into another simple table
%sql
drop table if exists stg_robustel.src_robustel_heartbeat_test;
create table stg_robustel.src_robustel_heartbeat_test as
select * from stg_robustel.src_robustel_heartbeat
Error in SQL statement: SparkException: Job aborted. com.databricks.backend.common.rpc.DatabricksExceptions$SQLExecutionException: org.apache.spark.SparkException: Job aborted. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:201) at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:192) at org.apache.spark.sql.execution.datasources.DataSource.writeAndRead(DataSource.scala:558) at org.apache.spark.sql.execution.command.CreateDataSourceTableAsSelectCommand.saveDataIntoTable(createDataSourceTables.scala:216) at org.apache.spark.sql.execution.command.CreateDataSourceTableAsSelectCommand.run(createDataSourceTables.scala:175) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106) at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:119) at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:206) at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:206) at org.apache.spark.sql.Dataset$$anonfun$54.apply(Dataset.scala:3492) at org.apache.spark.sql.Dataset$$anonfun$54.apply(Dataset.scala:3487) at org.apache.spark.sql.execution.SQLExecution$$anonfun$withCustomExecutionEnv$1.apply(SQLExecution.scala:112) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:241) at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:98) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:171) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withAction(Dataset.scala:3487) at org.apache.spark.sql.Dataset.(Dataset.scala:206) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:90) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:696) at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:716) at com.databricks.backend.daemon.driver.SQLDriverLocal$$anonfun$1.apply(SQLDriverLocal.scala:88) at com.databricks.backend.daemon.driver.SQLDriverLocal$$anonfun$1.apply(SQLDriverLocal.scala:34) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.immutable.List.foreach(List.scala:392) at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) at scala.collection.immutable.List.map(List.scala:296) at com.databricks.backend.daemon.driver.SQLDriverLocal.executeSql(SQLDriverLocal.scala:34) at com.databricks.backend.daemon.driver.SQLDriverLocal.repl(SQLDriverLocal.scala:141) at com.databricks.backend.daemon.driver.DriverLocal$$anonfun$execute$9.apply(DriverLocal.scala:385) at com.databricks.backend.daemon.driver.DriverLocal$$anonfun$execute$9.apply(DriverLocal.scala:362) at com.databricks.logging.UsageLogging$$anonfun$withAttributionContext$1.apply(UsageLogging.scala:251) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) at com.databricks.logging.UsageLogging$class.withAttributionContext(UsageLogging.scala:246) at com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:49) at com.databricks.logging.UsageLogging$class.withAttributionTags(UsageLogging.scala:288) at com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:49) at com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:362) at com.databricks.backend.daemon.driver.DriverWrapper$$anonfun$tryExecutingCommand$2.apply(DriverWrapper.scala:644) at com.databricks.backend.daemon.driver.DriverWrapper$$anonfun$tryExecutingCommand$2.apply(DriverWrapper.scala:644) at scala.util.Try$.apply(Try.scala:192) at com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:639) at com.databricks.backend.daemon.driver.DriverWrapper.getCommandOutputAndError(DriverWrapper.scala:485) at com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:597) at com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:390) at com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:337) at com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:219) at java.lang.Thread.run(Thread.java:748) Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 4.0 failed 4 times, most recent failure: Lost task 0.3 in stage 4.0 (TID 10, 10.139.64.5, executor 0): org.apache.spark.SparkException: Task failed while writing rows. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:268) at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173) at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:172) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.doRunTask(Task.scala:140) at org.apache.spark.scheduler.Task.run(Task.scala:113) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$13.apply(Executor.scala:533) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1541) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:539) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.ArrayIndexOutOfBoundsException
Help is appreciated
Upvotes: 2
Views: 8916
Reputation: 1
spark.sql("REFRESH TABLE delta.`dbfs:/mnt/hive/......./tablename`")
Upvotes: 0