
Reputation: 31

Hadoop Not Finding Map Class

I am using hadoop-1.2.1 and trying to run a simple RowCount HBase job using ToolRunner. However, no matter what I seem to try, hadoop cannot find the map class. The jar file is being copied correctly into hdfs, but I can't seem to figure out where it is going wrong. Please help!

Here is the code:

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;

import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class HBaseRowCountToolRunnerTest extends Configured implements Tool

    // What to copy.
    public static final String JAR_NAME = "myJar.jar";
    public static final String LOCAL_JAR = <path_to_jar> + JAR_NAME;
    public static final String REMOTE_JAR = "/tmp/"+JAR_NAME;

    public static void main(String[] args) throws Exception 
        Configuration config = HBaseConfiguration.create();

//All connection configs set here -- omitted to post the code 

        config.set("tmpjars", REMOTE_JAR);

        FileSystem dfs = FileSystem.get(config);

        System.out.println("pathString = " + (new Path(LOCAL_JAR)).toString() + " \n");

        // Copy jar file to remote.
        dfs.copyFromLocalFile(new Path(LOCAL_JAR), new Path(REMOTE_JAR));

        // Get rid of jar file when we're done.
        dfs.deleteOnExit(new Path(REMOTE_JAR));

        // Run the job.
        System.exit(ToolRunner.run(config, new HBaseRowCountToolRunnerTest(), args));

    public int run(String[] args) throws Exception 
        Job job = new RowCountJob(getConf(), "testJob", "myLittleHBaseTable");

        return job.waitForCompletion(true) ? 0 : 1;

    public static class RowCountJob extends Job

        RowCountJob(Configuration conf, String jobName, String tableName) throws IOException
            super(conf, RowCountJob.class.getCanonicalName() + "_" + jobName);


            Scan scan = new Scan();
            scan.setFilter(new FirstKeyOnlyFilter());


            TableMapReduceUtil.initTableMapperJob(tableName, scan,
                    RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, this);



    }//end public static class RowCountJob extends Job

    //Mapper that runs the count
    //TableMapper -- TableMapper<KEYOUT, VALUEOUT> (*OUT by type)
    public static class RowCounterMapper extends TableMapper<ImmutableBytesWritable, Result> 

        //Counter enumeration to count the actual rows
        public static enum Counters {ROWS}

         * Maps the data.
         * @param row  The current table row key.
         * @param values  The columns.
         * @param context  The current context.
         * @throws IOException When something is broken with the data.
         * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN,
         *   org.apache.hadoop.mapreduce.Mapper.Context)
        public void map(ImmutableBytesWritable row, Result values, Context context) throws IOException 
            // Count every row containing data times 2, whether it's in qualifiers or values

    }//end public static class RowCounterMapper extends TableMapper<ImmutableBytesWritable, Result> 

}//end public static void main(String[] args) throws Exception

Upvotes: 1

Views: 685

Answers (2)

Jing He
Jing He

Reputation: 914

I got the same problem today.Finally, I found it was because I forgot to insert the following sentence in the driver class...


Upvotes: 0


Reputation: 31

Ok- I found a workaround to the problem and thought that I would share for all others having similar issues...

As is turns out, I abandoned the tmpjars configuration option and just copied the jar file directed into the DistributedCache from the code itself. Here is what it looks like:

// Copy jar file to remote.
FileSystem dfs = FileSystem.get(conf);
dfs.copyFromLocalFile(new Path(LOCAL_JAR), new Path(REMOTE_JAR));

// Get rid of jar file when we're done.
dfs.deleteOnExit(new Path(REMOTE_JAR));

//Place it in the distributed cache
DistributedCache.addFileToClassPath(new Path(REMOTE_JAR), conf, dfs);

Perhaps it doesn't solve what is going on with tmpjars, but it does work.

Upvotes: 1

Related Questions