Reputation: 323
I'm experiencing problem with maven (tried sbt as well, same result) and Google's guava, which I'm new to.
I found a lot of questions of this kind in SO, but none of the solutions worked for me (searched for internal deps using mvn tree | less
, excluded guava from everywhere, deleted my local .m2, reset cache in IntelliJ, tried all af the Guava versions starting from 22.0). no matter what, I keep getting:
Exception in thread "main" java.lang.RuntimeException: java.lang.reflect.InvocationTargetException
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:135)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3302)
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:124)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3352)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:3320)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:479)
at org.apache.hadoop.fs.Path.getFileSystem(Path.java:361)
at org.apache.spark.sql.execution.streaming.FileStreamSink.<init>(FileStreamSink.scala:137)
at org.apache.spark.sql.execution.datasources.DataSource.createSink(DataSource.scala:320)
at org.apache.spark.sql.streaming.DataStreamWriter.createV1Sink(DataStreamWriter.scala:385)
at org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:363)
at org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:269)
at com.appsflyer.Main$.main(Main.scala:49)
at com.appsflyer.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:928)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1007)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1016)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:133)
... 25 more
Caused by: java.lang.NoSuchMethodError: com.google.common.base.Preconditions.checkArgument(ZLjava/lang/String;JJ)V
at com.google.cloud.hadoop.gcsio.cooplock.CooperativeLockingOptions$Builder.build(CooperativeLockingOptions.java:58)
at com.google.cloud.hadoop.gcsio.cooplock.CooperativeLockingOptions.<clinit>(CooperativeLockingOptions.java:31)
at com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemConfiguration.<clinit>(GoogleHadoopFileSystemConfiguration.java:392)
at com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.<init>(GoogleHadoopFileSystemBase.java:235)
at com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem.<init>(GoogleHadoopFileSystem.java:58)
... 30 more
Here is my pom.xml (some things are commented out to show what I've tried):
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<url>http://maven.apache.org</url>
<groupId>com.example</groupId>
<artifactId>spark-scala-12-maven</artifactId>
<packaging>jar</packaging>
<version>1.0</version>
<properties>
<spark.version>3.0.1</spark.version>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<encoding>UTF-8</encoding>
<scala.version>2.12.11</scala.version>
<scala.binary.version>2.12</scala.binary.version>
<hadoop.version>2.7.7</hadoop.version>
</properties>
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
<scope>${third.party.scope}</scope>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql-kafka-0-10 -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql-kafka-0-10_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>2.6.0</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/com.google.cloud.bigdataoss/gcs-connector -->
<dependency>
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>gcs-connector</artifactId>
<version>hadoop3-2.1.6</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- <!– https://mvnrepository.com/artifact/com.google.api-client/google-api-client –>-->
<!-- <dependency>-->
<!-- <groupId>com.google.api-client</groupId>-->
<!-- <artifactId>google-api-client</artifactId>-->
<!-- <version>1.30.10</version>-->
<!-- </dependency>-->
<!-- https://mvnrepository.com/artifact/com.google.guava/guava -->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>29.0-jre</version>
</dependency>
<!-- <!– https://mvnrepository.com/artifact/org.apache.httpcomponents/httpcore –>-->
<!-- <dependency>-->
<!-- <groupId>org.apache.httpcomponents</groupId>-->
<!-- <artifactId>httpcore</artifactId>-->
<!-- <version>4.4.13</version>-->
<!-- </dependency>-->
</dependencies>
<repositories>
<repository>
<id>maven-repo</id>
<name>Maven Repository</name>
<url>https://repo.maven.apache.org/maven2</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>apache-repo</id>
<name>Apache release repo</name>
<url>https://github.com/adatao/mvnrepos/tree/master/releases/</url>
</repository>
</repositories>
<build>
<sourceDirectory>src/main/scala</sourceDirectory>
<testSourceDirectory>src/test/</testSourceDirectory>
<resources>
<resource>
<directory>src/main/resources</directory>
</resource>
</resources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<!-- <plugin>-->
<!-- <groupId>com.google.code.sbt-compiler-maven-plugin</groupId>-->
<!-- <artifactId>sbt-compiler-maven-plugin</artifactId>-->
<!-- <version>1.0.0</version>-->
<!-- <executions>-->
<!-- <execution>-->
<!-- <id>default-sbt-compile</id>-->
<!-- <goals>-->
<!-- <goal>compile</goal>-->
<!-- <goal>testCompile</goal>-->
<!-- </goals>-->
<!-- </execution>-->
<!-- </executions>-->
<!-- </plugin>-->
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>4.3.1</version>
<executions>
<execution>
<id>compile-scala</id>
<phase>compile</phase>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>test-compile-scala</id>
<phase>test-compile</phase>
<goals>
<goal>add-source</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
<configuration>
<mainClass>com.example.Main</mainClass>
<!-- <scalaVersion>${scala.binary.version}</scalaVersion>-->
</configuration>
</plugin>
<!-- <plugin>-->
<!-- <groupId>org.codehaus.mojo</groupId>-->
<!-- <artifactId>exec-maven-plugin</artifactId>-->
<!-- <version>1.6.0</version>-->
<!-- <executions>-->
<!-- <execution>-->
<!-- <goals>-->
<!-- <goal>exec</goal>-->
<!-- </goals>-->
<!-- </execution>-->
<!-- </executions>-->
<!-- <configuration>-->
<!-- <executable>java</executable>-->
<!-- <includeProjectDependencies>true</includeProjectDependencies>-->
<!-- <includePluginDependencies>false</includePluginDependencies>-->
<!-- <classpathScope>compile</classpathScope>-->
<!-- <mainClass>testConf</mainClass>-->
<!-- </configuration>-->
<!-- </plugin>-->
<plugin>
<!-- NOTE: We don't need a groupId specification because the group is
org.apache.maven.plugins ...which is assumed by default.
-->
<artifactId>maven-assembly-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<archive>
<manifest>
<mainClass>com.example.Main</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id> <!-- this is used for inheritance merges -->
<phase>package</phase> <!-- bind to the packaging phase -->
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
P.S. the goal is to write spark dataframe into GCS.
P.S.S. I see spark-core_2.12/3.0.1
includes hadoop2.7.4 inside and depends on guava-14.0.1
, is there any way to use a newer guava (I have exclusions of guava from spark-core, but it does not help)?
Help me, Obi-Wan Kenobi. You're my only hope.
Upvotes: 2
Views: 3332
Reputation: 323
the solution was to place guava
to the very beginning of the <dependencies>
, remove hadoop
as an independent dependency, switch to hadoop2 (instead of 3) and Java8 (instead of 11) and add maven-shade-plugin
. the resulting pom.xml:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<url>http://maven.apache.org</url>
<groupId>com.example</groupId>
<artifactId>spark-scala-12-maven</artifactId>
<packaging>jar</packaging>
<version>1.0</version>
<properties>
<spark.version>3.0.1</spark.version>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<encoding>UTF-8</encoding>
<scala.version>2.12.11</scala.version>
<scala.binary.version>2.12</scala.binary.version>
</properties>
<dependencies>
<!-- https://mvnrepository.com/artifact/com.google.guava/guava -->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>29.0-jre</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
<scope>compile</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql-kafka-0-10 -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql-kafka-0-10_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>2.6.0</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/com.google.cloud.bigdataoss/gcs-connector -->
<dependency>
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>gcs-connector</artifactId>
<version>hadoop3-2.1.6</version>
<exclusions>
<!-- conflicts with Spark dependencies -->
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</exclusion>
<!-- conflicts with Spark dependencies -->
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-storage</artifactId>
<version>1.113.6</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>maven-repo</id>
<name>Maven Repository</name>
<url>https://repo.maven.apache.org/maven2</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>apache-repo</id>
<name>Apache release repo</name>
<url>https://github.com/adatao/mvnrepos/tree/master/releases/</url>
</repository>
</repositories>
<build>
<sourceDirectory>src/main/scala</sourceDirectory>
<testSourceDirectory>src/test/</testSourceDirectory>
<resources>
<resource>
<directory>src/main/resources</directory>
</resource>
</resources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>4.3.1</version>
<executions>
<execution>
<id>compile-scala</id>
<phase>compile</phase>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>test-compile-scala</id>
<phase>test-compile</phase>
<goals>
<goal>add-source</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
<configuration>
<mainClass>com.example.Main</mainClass>
<!-- <scalaVersion>${scala.binary.version}</scalaVersion>-->
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<minimizeJar>true</minimizeJar>
<filters>
<filter>
<artifact>com.google.**:*</artifact>
<includes>
<include>**</include>
</includes>
</filter>
<filter>
<artifact>com.google.cloud.bigdataoss:gcs-connector</artifact>
<excludes>
<!-- Register a provider with the shaded name instead-->
<exclude>META-INF/services/org.apache.hadoop.fs.FileSystem</exclude>
</excludes>
</filter>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<artifactSet>
<includes>
<include>com.google.*:*</include>
</includes>
<excludes>
<exclude>com.google.code.findbugs:jsr305</exclude>
</excludes>
<excludes>
<exclude>junit:junit</exclude>
<exclude>org.apache.maven:lib:tests</exclude>
<exclude>log4j:log4j:jar:</exclude>
<exclude>slf4j</exclude>
<exclude>org.scala-lang</exclude>
<exclude>org.apache.hadoop</exclude>
<exclude>org.apache.curator</exclude>
<exclude>org.apache.spark</exclude>
<exclude>javax.activation</exclude>
<exclude>javax.annotation</exclude>
<exclude>javax.inject</exclude>
<exclude>org.apache.commons</exclude>
<exclude>javax.servlet</exclude>
<exclude>javax.xml</exclude>
<exclude>io.netty</exclude>
<exclude>io.dropwizard.metrics</exclude>
<exclude>com.twitter</exclude>
<exclude>org.joda</exclude>
<exclude>com.fasterxml.jackson.core</exclude>
</excludes>
</artifactSet>
<relocations>
<relocation>
<pattern>com.google</pattern>
<shadedPattern>com.shaded.google</shadedPattern>
</relocation>
</relocations>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<!-- NOTE: We don't need a groupId specification because the group is
org.apache.maven.plugins ...which is assumed by default.
-->
<artifactId>maven-assembly-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<archive>
<manifest>
<mainClass>com.example.Main</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id> <!-- this is used for inheritance merges -->
<phase>package</phase> <!-- bind to the packaging phase -->
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
Upvotes: 3