Reputation: 536
I wrote this pipeline but when I run it as a jar it can not find the direct runner when I have it specified in my build.gradle, and when I try to pass the parameter --runner=direct or
--runner=Directrunner. Below is my code and my build.gradle file. I am running the gradle task fatJar to create the jar navigating to my build/libs folder to run the jar and seeing this error. this is my command I am using java -jar filepipeline-all-1.0-SNAPSHOT.jar --input="../testdata" --output="./manifest.json" --runner=DirectRunner
Any help with this issue would be greatly appreciated!
My folder structure looks like this: --src --main --java --com.pipeline --BeamPipeline.java
build.gradle
plugins {
id 'java'
}
group 'com.dustin'
version '1.0-SNAPSHOT'
sourceCompatibility = 1.8
repositories {
mavenCentral()
}
task fatJar(type: Jar) {
manifest {
attributes 'Implementation-Title': 'Gradle Jar File',
'Implementation-Version': version,
'Main-Class': 'com.pipeline.BeamPipeline'
}
baseName = project.name + '-all'
from { configurations.compile.collect { it.isDirectory() ? it : zipTree(it) } }
with jar
}
apply plugin: 'application'
mainClassName = 'src.main.java.com.pipeline.BeamPipeline'
dependencies {
runtime group: 'org.apache.beam', name: 'beam-runners-direct-java', version:'2.8.0'
compile group: 'org.apache.beam', name: 'beam-sdks-java-core', version:'2.8.0'
runtime group: 'org.slf4j', name: 'slf4j-jdk14', version:'1.7.25'
testCompile group: 'junit', name: 'junit', version: '4.12'
compile group: 'org.apache.commons', name: 'commons-lang3', version: '3.0'
compile group: 'commons-io', name: 'commons-io', version: '2.6'
compile group: 'commons-codec', name:'commons-codec', version:'1.12'
compileOnly 'org.projectlombok:lombok:1.18.6'
compile group: 'com.google.code.gson', name: 'gson', version: '2.7'
compile group: 'org.json', name: 'json', version: '20180813'
annotationProcessor 'org.projectlombok:lombok:1.18.6'
}
Pipeline:
package com.pipeline;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.PipelineRunner;
import org.apache.beam.sdk.io.FileIO;
import org.apache.beam.sdk.options.*;
import org.apache.beam.sdk.transforms.*;
import org.apache.beam.sdk.values.KV;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
public class BeamPipeline {
private static final Logger log = LoggerFactory.getLogger(BeamPipeline.class);
public static interface MyOptions extends PipelineOptions {
@Validation.Required
@Description("Input Path(with gs:// prefix)")
String getInput();
void setInput(String value);
@Validation.Required
@Description("Output Path (with gs:// prefix)")
String getOutput();
void setOutput(String value);
}
public static void main(String[] args) {
MyOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(MyOptions.class);
Pipeline p = Pipeline.create(options);
File dir = new File(options.getInput());
String output = options.getOutput();
for (File file : dir.listFiles()) {
String inputString = file.toString();
p
.apply("Match Files", FileIO.match().filepattern(inputString))
.apply("Read Files", FileIO.readMatches())
.apply(MapElements.via(new SimpleFunction<FileIO.ReadableFile, KV<String, String>>() {
public KV<String, String> apply(FileIO.ReadableFile file) {
String temp = null;
try {
temp = file.readFullyAsUTF8String();
} catch (IOException e) {
}
String sha256hex = org.apache.commons.codec.digest.DigestUtils.sha256Hex(temp);
return KV.of(file.getMetadata().resourceId().toString(), sha256hex);
}
}))
.apply("Print", ParDo.of(new DoFn<KV<String, String>, Void>() {
@ProcessElement
public void processElement(ProcessContext c) throws IOException {
FileWriter fileWriter = new FileWriter(output,true);
JSONObject obj = new JSONObject();
obj.put(c.element().getKey(), c.element().getValue());
fileWriter.write(obj.toString());
fileWriter.close();
log.info(String.format("File: %s, SHA-256 %s", c.element().getKey(), c.element().getValue()));
}
}));
}
p.run().waitUntilFinish();
}
}
Upvotes: 0
Views: 2669
Reputation: 779
beam-runners-direct-java
is added as runtime dependency and hence not added to the fat jar.
You can add beam-runners-direct-java
as a compile time dependency to use it.
plugins {
id 'java'
}
group 'com.dustin'
version '1.0-SNAPSHOT'
sourceCompatibility = 1.8
repositories {
mavenCentral()
}
task fatJar(type: Jar) {
manifest {
attributes 'Implementation-Title': 'Gradle Jar File',
'Implementation-Version': version,
'Main-Class': 'com.pipeline.BeamPipeline'
}
baseName = project.name + '-all'
from { configurations.compile.collect { it.isDirectory() ? it : zipTree(it) } }
with jar
}
apply plugin: 'application'
mainClassName = 'src.main.java.com.pipeline.BeamPipeline'
dependencies {
compile group: 'org.apache.beam', name: 'beam-runners-direct-java', version:'2.8.0'
compile group: 'org.apache.beam', name: 'beam-sdks-java-core', version:'2.8.0'
runtime group: 'org.slf4j', name: 'slf4j-jdk14', version:'1.7.25'
testCompile group: 'junit', name: 'junit', version: '4.12'
compile group: 'org.apache.commons', name: 'commons-lang3', version: '3.0'
compile group: 'commons-io', name: 'commons-io', version: '2.6'
compile group: 'commons-codec', name:'commons-codec', version:'1.12'
compileOnly 'org.projectlombok:lombok:1.18.6'
compile group: 'com.google.code.gson', name: 'gson', version: '2.7'
compile group: 'org.json', name: 'json', version: '20180813'
annotationProcessor 'org.projectlombok:lombok:1.18.6'
}
Alternately, if you don not want to package DirectRunner
with the fatjar and only want to use it for testing, you can create separate DirectRunner
jar and add it to class part while running the pipeline.
plugins {
id 'java'
}
group 'com.dustin'
version '1.0-SNAPSHOT'
sourceCompatibility = 1.8
repositories {
mavenCentral()
}
task fatJar(type: Jar) {
manifest {
attributes 'Implementation-Title': 'Gradle Jar File',
'Implementation-Version': version,
'Main-Class': 'com.pipeline.BeamPipeline'
}
baseName = project.name + '-all'
from { configurations.compile.collect { it.isDirectory() ? it : zipTree(it) } }
with jar
}
task directrunnerjar(type: Jar) {
manifest {
attributes 'Implementation-Title': 'Gradle Jar File',
'Implementation-Version': version,
'Main-Class': 'com.pipeline.BeamPipeline'
}
baseName = project.name + '-runtime'
from { configurations.runtime.collect { it.isDirectory() ? it : zipTree(it) } }
with jar
}
apply plugin: 'application'
mainClassName = 'src.main.java.com.pipeline.BeamPipeline'
dependencies {
runtime group: 'org.apache.beam', name: 'beam-runners-direct-java', version:'2.8.0'
compile group: 'org.apache.beam', name: 'beam-sdks-java-core', version:'2.8.0'
runtime group: 'org.slf4j', name: 'slf4j-jdk14', version:'1.7.25'
testCompile group: 'junit', name: 'junit', version: '4.12'
compile group: 'org.apache.commons', name: 'commons-lang3', version: '3.0'
compile group: 'commons-io', name: 'commons-io', version: '2.6'
compile group: 'commons-codec', name:'commons-codec', version:'1.12'
compileOnly 'org.projectlombok:lombok:1.18.6'
compile group: 'com.google.code.gson', name: 'gson', version: '2.7'
compile group: 'org.json', name: 'json', version: '20180813'
annotationProcessor 'org.projectlombok:lombok:1.18.6'
}
java -cp "libs/myartifact-runtime-1.0-SNAPSHOT.jar:libs/filepipeline-all-1.0-SNAPSHOT" com.pipeline.BeamPipeline --input="../testdata" --output="./manifest.json" --runner=DirectRunner
Upvotes: 1