user3343620
user3343620

Reputation: 31

PIG UDF throwing error

I am getting an error in PIG script.

PIG SCRIPT :

REGISTER /var/lib/hadoop-hdfs/udf.jar;

REGISTER /var/lib/hadoop-hdfs/udf2.jar;

INPUT_LINES = Load 'hdfs:/Inputdata/DATA_GOV_US_Farmers_Market_DataSet.csv' using PigStorage(',') AS (FMID:chararray, MarketName:chararray, Website:chararray, Street:chararray, City:chararray, County:chararray, State:chararray, Zip:chararray, Schedule:chararray, X:chararray, Y:chararray, Location:chararray, Credit:chararray, WIC:chararray, WICcash:chararray, SFMNP:chararray, SNAP:chararray, Bakedgoods:chararray, Cheese:chararray, Crafts:chararray, Flowers:chararray, Eggs:chararray, Seafood:chararray, Herbs:chararray, Vegetables:chararray, Honey:chararray, Jams:chararray, Maple:chararray, Meat:chararray, Nursery:chararray, Nuts:chararray, Plants:chararray, Poultry:chararray, Prepared:chararray, Soap:chararray, Trees:chararray, Wine:chararray);

FILTERED_COUNTY = FILTER INPUT_LINES BY County=='Los Angeles';

REQUIRED_COLUMNS = FOREACH FILTERED_COUNTY GENERATE FMID,MarketName,$12..;

PER = FOREACH REQUIRED_COLUMNS GENERATE FMID,MarketName,fm($2..) AS Percentage;

STATUS = FOREACH PER GENERATE FMID,MarketName,Percentage,status(Percentage) AS Stat;

UDF1 :

import java.io.IOException;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
public class fm extends EvalFunc<Integer>
{
    String temp;
    int per;
    int count=0;
public Integer exec(Tuple input) throws IOException {
if (input == null || input.size() == 0)
return -1;
try
{
    for(int i=0;i<25;i++)
    {
        if(input.get(i) == "" || input.get(i) == null)
            return -1;

        temp = (String)input.get(i);
        if(temp.equals("Y"))
            count++;
    }
    per =  count*4;
    count = 0;
    return per;
}
catch(Exception e)
{
throw new IOException("Caught exception processing input row ", e);
}
}
}

UDF2 :

import java.io.IOException;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
public class status extends EvalFunc<String>
{
public String exec(Tuple input) throws IOException 
{
if (input == null || input.size() == 0)
return null;
try
{
String str = (String)input.get(0);
int i = Integer.parseInt(str);
if(i>=60)
    return "HIGH";
else if(i<=40)
    return "LOW";
else
    return "MEDIUM";
}
catch(Exception e)
{
throw new IOException("Caught exception processing input row ", e);
}
}
}

Dataset :

https://onedrive.live.com/redir?resid=7F81451078F4DBE8%21113

ERROR :

Pig Stack Trace

ERROR 2078: Caught error from UDF: status [Caught exception processing input row ]

org.apache.pig.impl.logicalLayer.FrontendException: ERROR 1066: Unable to open iterator for alias STATUS. Backend error : Caught error from UDF: status [Caught exception processing input row ]
    at org.apache.pig.PigServer.openIterator(PigServer.java:828)
    at org.apache.pig.tools.grunt.GruntParser.processDump(GruntParser.java:696)
    at org.apache.pig.tools.pigscript.parser.PigScriptParser.parse(PigScriptParser.java:320)
    at org.apache.pig.tools.grunt.GruntParser.parseStopOnError(GruntParser.java:194)
    at org.apache.pig.tools.grunt.GruntParser.parseStopOnError(GruntParser.java:170)
    at org.apache.pig.tools.grunt.Grunt.run(Grunt.java:69)
    at org.apache.pig.Main.run(Main.java:538)
    at org.apache.pig.Main.main(Main.java:157)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
    at java.lang.reflect.Method.invoke(Method.java:597)
    at org.apache.hadoop.util.RunJar.main(RunJar.java:208)
Caused by: org.apache.pig.backend.executionengine.ExecException: ERROR 2078: Caught error from UDF: status [Caught exception processing input row ]
    at org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POUserFunc.getNext(POUserFunc.java:365)
    at org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POUserFunc.getNext(POUserFunc.java:434)
    at org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator.getNext(PhysicalOperator.java:340)
    at org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach.processPlan(POForEach.java:372)
    at org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach.getNext(POForEach.java:297)
    at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.runPipeline(PigGenericMapBase.java:283)
    at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:278)

Upvotes: 2

Views: 2034

Answers (1)

matterhayes
matterhayes

Reputation: 458

It appears that your problem may be that you are casting your input to a String in your status UDF. Your fm UDF actually returns an Integer. So instead you should have:

Integer i = (Integer)input.get(0);

This definitely will cause a problem unless you fix it. Without the original error message I can't say whether or not there is some other problem that occurs earlier.

I would have expected your stack trace to include the original exception message, which would help you debug this issue. Strange that it doesn't. Without it all you have to go off of is analyzing the code.

This might help with debugging in the future:

throw new IOException("Caught exception processing input row " + e.getMessage(), e);

For the fm UDF, I also recommend making the variables temp, per, and count local to the exec method instead of instances of the class, because they don't need to be. This probably won't cause an error but it is better coding practice.

Upvotes: 3

Related Questions