Reputation: 51
I'm trying to set a Java System property java.util.Arrays.useLegacyMergeSort
in my Reducer, to enforce the system to use JDK 6 Implementation of Arrays.sort method, instead of JDK8.
package scoring.devicestatus;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.log4j.Logger;
import domain.DeviceEvent;
import domain.DeviceStatus;
import domain.EndOfPeriodEvent;
import domain.PiidBoundaryEvent;
import util.DateUtils;
public class DeviceStatusReducer extends Reducer<Text, Text, Text, NullWritable> {
public static final Logger logger = Logger.getLogger(DeviceStatusReducer.class);
@Override
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
-->Configuration conf = context.getConfiguration();
. .
.
try{
.
.
writeToContext(key, records, context);
}catch(Throwable t){
logger.error("Error processing VIN: " + key.toString(), t);
throw new RuntimeException(t);
}
}
List<DeviceStatus> reduce(Iterable<Text> values, String endDate) {
List<DeviceEvent> events = createEvents(values, endDate);
.
.
}
List<DeviceEvent> createEvents(Iterable<Text> values, String endDate) {
List<DeviceEvent> events = new ArrayList<DeviceEvent>(10);
for(Text text : values){
List<DeviceEvent> instances = DeviceEvent.getInstance(text);
for(DeviceEvent instance : instances){
if(!Constants.ORPHAN_PIID.equals(instance.getProgramInstanceId())){
events.add(instance);
}
}
}
--> System.out.println("Reducer Class:"+System.getProperty("java.util.Arrays.useLegacyMergeSort")+" "+conf.get("java.util.Arrays.useLegacyMergeSort"));
Collections.sort(events);
return events;
}
public void writeToContext(Text key, List<DeviceStatus> deviceStatuses, Context context) throws IOException, InterruptedException {
String vin = key.toString();
for (DeviceStatus status : deviceStatuses) {
context.write(new Text(status.toCsvString(vin)), NullWritable.get());
}
}
}
package domain;
public abstract class DeviceEvent implements Comparable<DeviceEvent> {
.
.
.
@Override
public int compareTo(DeviceEvent arg0) {
int comparison = getTimeStamp().compareTo(arg0.getTimeStamp());
if(comparison == 0){
comparison = isInstalled() ? 1 : -1;
}
return comparison;
}
.
.
.
}
I ran the jar through command line using the -D option as
hadoop jar jarname.jar classname -Djava.util.Arrays.useLegacyMergeSort=true args
The reducer does bring the property into the configuration but my error still doesn't get fixed. When I print the property using the configuration.get and System.getProperty using System.out.println("Reducer Class:" +System.getProperty("java.util.Arrays.useLegacyMergeSort")+ " " +conf.get("java.util.Arrays.useLegacyMergeSort"));
It looks like this:
stdout:
Reducer Class:null true
Reducer Class:null true
Reducer Class:null true
Reducer Class:null true
Reducer Class:null true
Reducer Class:null true
Below is the stack trace:
SysLog:
2016-10-05 16:25:21,032 ERROR [main] DeviceStatusReducer: Error processing VIN: 19XXXXXXXXXXX
java.lang.IllegalArgumentException: Comparison method violates its general contract!
at java.util.ComparableTimSort.mergeLo(ComparableTimSort.java:744)
at java.util.ComparableTimSort.mergeAt(ComparableTimSort.java:481)
at java.util.ComparableTimSort.mergeCollapse(ComparableTimSort.java:406)
at java.util.ComparableTimSort.sort(ComparableTimSort.java:213)
at java.util.Arrays.sort(Arrays.java:1312)
at java.util.Arrays.sort(Arrays.java:1506)
at java.util.ArrayList.sort(ArrayList.java:1454)
at java.util.Collections.sort(Collections.java:141)
at DeviceStatusReducer.createEvents(DeviceStatusReducer.java:139)
at DeviceStatusReducer.reduce(DeviceStatusReducer.java:43)
at DeviceStatusReducer.reduce(DeviceStatusReducer.java:34)
at DeviceStatusReducer.reduce(DeviceStatusReducer.java:24)
at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171)
at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
I know for sure that the java.util.Arrays.useLegacyMergeSort
works because it worked on my eclipse (One JVM) when I set in the Main class as System.setProperty("java.util.Arrays.useLegacyMergeSort","true")
but doesn't work on the Hadoop Cluster(Multiple JVMs). I also tried setting this directly in Reducer as System.setProperty("java.util.Arrays.useLegacyMergeSort","true")
.
How do get the property to spread on all the JVMs? Or How to make a configuration property as a System property? Thanks
Upvotes: 1
Views: 390
Reputation: 51
It works by running the jar as
hadoop jar JarName.jar MainClassName -D mapreduce.reduce.java.opts=-Djava.util.Arrays.useLegacyMergeSort=true args
Note that there is a space after -D for hadoop properties and no space after -D for setting JVM properties.
Upvotes: 1