user1744416
user1744416

Reputation: 301

Apache Sedona ST_KNN reports java.lang.NoClassDefFoundError

I use Apache Sedona to process map data in my work. After reading the docs about Sedona KNN, I really want to have a try to use KNN. however, I even not able to run the example code in the doc.

Below are my main code.

    val config = SedonaContext.builder()
      .master("local[*]") // Delete this if run in cluster mode
      .appName("readTestScala") // Change this to a proper name
      .getOrCreate()
    val sedona = SedonaContext.create(config)

    val df1 = sedona.sql("SELECT ST_Point(0.0, 0.0) as geom1").cache()
    val df2 = sedona.sql("SELECT ST_Point(0.0, 0.0) as geom2").cache()
    df1.show()
    df2.show()
    val df = df1.join(df2, expr("ST_KNN(geom1, geom2, 1)"))
    df.show()

However, it doesn't work and report NoClassDefFoundError, below are error stack.

    Exception in thread "sbt-bg-threads-1" java.lang.NoClassDefFoundError: org/apache/commons/lang/NullArgumentException
    at org.apache.spark.sql.sedona_sql.strategy.join.TraitJoinQueryBase.toSpatialRDD(TraitJoinQueryBase.scala:47)
    at org.apache.spark.sql.sedona_sql.strategy.join.TraitJoinQueryBase.toSpatialRDD$(TraitJoinQueryBase.scala:46)
    at org.apache.spark.sql.sedona_sql.strategy.join.BroadcastQuerySideKNNJoinExec.toSpatialRDD(BroadcastQuerySideKNNJoinExec.scala:35)
    at org.apache.spark.sql.sedona_sql.strategy.join.BroadcastQuerySideKNNJoinExec.leftToSpatialRDD(BroadcastQuerySideKNNJoinExec.scala:86)
    at org.apache.spark.sql.sedona_sql.strategy.join.BroadcastQuerySideKNNJoinExec.toSpatialRddPair(BroadcastQuerySideKNNJoinExec.scala:69)
    at org.apache.spark.sql.sedona_sql.strategy.join.TraitKNNJoinQueryExec.executeKNNJoin(TraitKNNJoinQueryExec.scala:92)
    at org.apache.spark.sql.sedona_sql.strategy.join.TraitKNNJoinQueryExec.doExecute(TraitKNNJoinQueryExec.scala:57)
    at org.apache.spark.sql.sedona_sql.strategy.join.TraitKNNJoinQueryExec.doExecute$(TraitKNNJoinQueryExec.scala:55)
    at org.apache.spark.sql.sedona_sql.strategy.join.BroadcastQuerySideKNNJoinExec.doExecute(BroadcastQuerySideKNNJoinExec.scala:35)
    at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:195)
    at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:246)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:243)
    at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:191)
    at org.apache.spark.sql.execution.InputAdapter.inputRDD(WholeStageCodegenExec.scala:527)
    at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs(WholeStageCodegenExec.scala:455)
    at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs$(WholeStageCodegenExec.scala:454)
    at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:498)
    at org.apache.spark.sql.execution.ProjectExec.inputRDDs(basicPhysicalOperators.scala:51)
    at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:751)
    at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:195)
    at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:246)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:243)
    at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:191)
    at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:364)
    at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:445)
    at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$executeCollect$1(AdaptiveSparkPlanExec.scala:390)
    at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.withFinalPlanUpdate(AdaptiveSparkPlanExec.scala:418)
    at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.executeCollect(AdaptiveSparkPlanExec.scala:390)
    at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:4333)
    at org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:3316)
    at org.apache.spark.sql.Dataset.$anonfun$withAction$2(Dataset.scala:4323)
    at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:546)
    at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:4321)
    at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125)
    at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)
    at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)
    at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
    at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)
    at org.apache.spark.sql.Dataset.withAction(Dataset.scala:4321)
    at org.apache.spark.sql.Dataset.head(Dataset.scala:3316)
    at org.apache.spark.sql.Dataset.take(Dataset.scala:3539)
    at org.apache.spark.sql.Dataset.getRows(Dataset.scala:280)
    at org.apache.spark.sql.Dataset.showString(Dataset.scala:315)
    at org.apache.spark.sql.Dataset.show(Dataset.scala:838)
    at org.apache.spark.sql.Dataset.show(Dataset.scala:797)
    at com.example.poc.App$.example(main.scala:81)
    at com.example.poc.App$.main(main.scala:29)

Does anyone know what maybe the problem?

Upvotes: 2

Views: 40

Answers (1)

Jia Yu - Apache Sedona
Jia Yu - Apache Sedona

Reputation: 304

This likely to be a dependency issue. Which Sedona jar did you use? What environment are you using?

Upvotes: 0

Related Questions