Reputation:
I'm writring a lua script to realize Decision Tree.But it always throws exception in a function when creating the tree.I can't find where it is wrong even if "assert".And sorry for my poor English:P.
DTree.lua:
-- Decision Tree (ID3)
-- by Darksun2010
function calcShannonEnt(dataSet)
local labelScore={};
for _,v in pairs(dataSet) do
local curLabel=v[#v];
if (labelScore[curLabel]) then
labelScore[curLabel]=labelScore[curLabel]+1;
else
labelScore[curLabel]=1;
end
end
local result=0.0;
for _,v in pairs(labelScore) do
local prob=v/#dataSet;
result=result-prob*math.log(prob,2);
end
return result;
end
function splitDataSet(dataSet,axis,value)
local reDataSet={};
for _,v in pairs(dataSet) do
if v[axis]==value then
local reSet=table.pack(table.unpack(v,1,axis-1),
table.unpack(v,axis+1,#v));
if (reSet[1]==nil) then
table.remove(reSet,1);
end
if (reSet[#reSet]==nil) then
table.remove(reSet);
end
table.insert(reDataSet,reSet);
end
end
return reDataSet;
end
function chooseBest(dataSet)
local numFeaures=#dataSet[1]-1;
local baseEnt=calcShannonEnt(dataSet);
local maxGain,bestFeature=0,-1;
for i=1,numFeaures do
local featSet={};
local newGain=0;
for _,v in pairs(dataSet) do
featSet[v[i]]=1;
end
for k,_ in pairs(featSet) do
local subDataSet=splitDataSet(dataSet,i,k);
local prob=#subDataSet/#dataSet;
newGain=newGain+prob*calcShannonEnt(subDataSet);
end
local infoGain=baseEnt-newGain;
if (infoGain>maxGain) then
maxGain=infoGain;
bestFeature=i;
end
end
return bestFeature;
end
function majorityCnt(classList)
local classCount={};
for _,v in pairs(classList) do
if (classCount[v]) then
classCount[v]=classCount[v]+1;
else
classCount[v]=1;
end
end
local maxN,maxK=0,nil;
for k,v in pairs(classCount) do
if (v>maxN) then
maxN,maxK=v,k;
end
end
return maxK;
end
function makeDTree(dataSet,labels)
local classList={};
for _,v in pairs(dataSet) do
table.insert(classList,v[#v]);
end
local endFlag=true;
for _v in pairs(classList) do
if (not endFlag) then
break;
end
endFlag=endFlag and (v==classList[1]);
end
if (#dataSet[1]==1) then
return majorityCnt(classList);
end
local bestFeat=chooseBest(dataSet);
local bestFeatLabel=labels[bestFeat];
local myTree,featSet={},{};
for _,v in pairs(dataSet) do
featSet[v[bestFeat]]=1;
end
for k,_ in pairs(featSet) do
local subLabels=labels;
local subTree=makeDTree(splitDataSet(dataSet,bestFeat,k),subLabels);
myTree[bestFeatLabel][k]=subTree;
end
return myTree;
end
I call it as this:
>labels={'no surfacing','flippers'}
>dataSet={{1,1,'yes'},{1,1,'yes'},{1,0,'no'},{0,1,'no'},{0,1,'no'}}
>makeDTree(dataSet,labels)
DTree.lua:106: attempt to index a nil value (field '?')
stack traceback:
DTree.lua:106: in function 'makeDTree'
DTree.lua:105: in function 'makeDTree'
(...tail calls...)
[C]: in ?
Upvotes: 0
Views: 700
Reputation: 72312
Line 106 is myTree[bestFeatLabel][k]=subTree;
but myTree
is empty and so myTree[bestFeatLabel]
is nil.
Upvotes: 1