Michael.P
Michael.P

Reputation: 1

Hadoop - MapReduce

I've been trying to solve a simple Map/Reduce problem in which I would be counting words from some input files, and then have their frequency as one key, and their word length as the other key. The Mapping would emit one eveytime a new word is read from the file, and then it would group all the same words together to have their final count. Then as an output I'd like to see the statistics for each word length what's the most frequent word.

This is as far as we've gotten (me and my team): This is the WordCountMapper class

import java.io.IOException;
import java.util.ArrayList;
import java.util.StringTokenizer;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;


public class WordCountMapper extends MapReduceBase implements
                Mapper<LongWritable, Text, Text, CompositeGroupKey> {

        private final IntWritable one = new IntWritable(1);
        private Text word = new Text();

                 public void map(LongWritable key, Text value,
                 OutputCollector<Text, CompositeGroupKey> output, Reporter reporter)
                 throws IOException {

                 String line = value.toString();
                 StringTokenizer itr = new StringTokenizer(line.toLowerCase());
                 while(itr.hasMoreTokens()) {
                 word.set(itr.nextToken());
                 CompositeGroupKey gky = new CompositeGroupKey(1, word.getLength());
                 output.collect(word, gky);
                 }
                 }
}

This is wordcountreducer class:

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

import com.sun.xml.internal.bind.CycleRecoverable.Context;

public class WordCountReducer extends MapReduceBase
    implements Reducer<Text, CompositeGroupKey, Text, CompositeGroupKey> {

        @Override
        public void reduce(Text key, Iterator<CompositeGroupKey> values,
                        OutputCollector<Text, CompositeGroupKey> output, Reporter reporter)
                        throws IOException {
                int sum = 0;
                int length = 0;
                while (values.hasNext()) {
                CompositeGroupKey value = (CompositeGroupKey) values.next();
                sum += (Integer) value.getCount(); // process value
                length = (Integer) key.getLength();
            }
            CompositeGroupKey cgk = new CompositeGroupKey(sum,length);
            output.collect(key, cgk);
        }
}

This is the class wordcount

import java.util.ArrayList;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobStatus;
import org.apache.hadoop.mapred.jobcontrol.Job;

import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.StringUtils;

public class WordCount {

  public static void main(String[] args) {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(WordCount.class);

// specify output types
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(CompositeGroupKey.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(CompositeGroupKey.class);

    // specify input and output dirs
    FileInputFormat.addInputPath(conf, new Path("input"));
    FileOutputFormat.setOutputPath(conf, new Path("output16"));

    // specify a mapper
    conf.setMapperClass(WordCountMapper.class);

// specify a reducer
    conf.setReducerClass(WordCountReducer.class);
    conf.setCombinerClass(WordCountReducer.class);

    client.setConf(conf);
    try {
      JobClient.runJob(conf);
    } catch (Exception e) {
      e.printStackTrace();
    }

  }
}  

And this is the groupcompositekey

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableUtils;

public class CompositeGroupKey implements WritableComparable<CompositeGroupKey> {
    int count;
    int length;

    public CompositeGroupKey(int c, int l) {
        this.count = c;
        this.length = l;
    }

    public void write(DataOutput out) throws IOException {
        WritableUtils.writeVInt(out, count);
        WritableUtils.writeVInt(out, length);
    }

    public void readFields(DataInput in) throws IOException {
        this.count = WritableUtils.readVInt(in);
        this.length = WritableUtils.readVInt(in);
    }

    public int compareTo(CompositeGroupKey pop) {
        return 0;
    }

    public int getCount() {
        return this.count;
    }

    public int getLength() {
        return this.length;
    }

}

Right now I get this error:

java.lang.RuntimeException: java.lang.NoSuchMethodException: CompositeGroupKey.<init>()  
  at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:80)  
  at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:62)
  at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40)
  at org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:738)
  at org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java:678)
  at org.apache.hadoop.mapred.Task$CombineValuesIterator.next(Task.java:757)
  at WordCountReducer.reduce(WordCountReducer.java:24)
  at WordCountReducer.reduce(WordCountReducer.java:1)
  at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.combineAndSpill(MapTask.java:904)
  at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.sortAndSpill(MapTask.java:785)
  at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.flush(MapTask.java:698)
  at org.apache.hadoop.mapred.MapTask.run(MapTask.java:228)
  at org.apache.hadoop.mapred.TaskTracker$Child.main(TaskTracker.java:2209)
Caused by: java.lang.NoSuchMethodException: CompositeGroupKey.<init>()
  at java.lang.Class.getConstructor0(Unknown Source)
  at java.lang.Class.getDeclaredConstructor(Unknown Source)
  at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:74)

I know the coding's not that good, but right now we don't have any idea where we went wrong, so any help would be welcome!

Upvotes: 0

Views: 470

Answers (2)

Aniruddha Sinha
Aniruddha Sinha

Reputation: 799

Whenever you see some exceptions like the one given below

java.lang.RuntimeException: java.lang.NoSuchMethodException: CompositeGroupKey.<init>()  

Then it will be a problem with the object instantiation which means either of the constructors might not be present.Either
default constructor OR
parameterised constructor
The moment you write a parameterised constructor JVM suppresses the default constructor unless expicitly declared.

The answer given by RusIan Ostafiichuk is enough to answer your query yet I added some more points to make things much clear.

Upvotes: 1

Ruslan Ostafiichuk
Ruslan Ostafiichuk

Reputation: 4692

You have to provide an empty default constructor in your key class CompositeGroupKey. It is used for serialization.

Just add:

public CompositeGroupKey() {
}

Upvotes: 3

Related Questions