Скачиваний:
29
Добавлен:
01.05.2014
Размер:
4.51 Кб
Скачать
unit EntropyCalculator;

interface

uses

DistributionClass,
Utils,
Math;

var

log2: double;


function oldEnt(bags: Distribution): double;

// Computes entropy of distribution after splitting.
function newEnt(bags: Distribution): double;

function logFunc(num: double): double;

// This method computes the information gain in the same way C4.5 does.
function splitCritValue(bags: Distribution; totalNoInst: double): double;

//This method computes the gain ratio in the same way C4.5 does.
// @param bags the distribution
// @param totalnoInst the weight of ALL instances
// @param numerator the info gain
// function splitCritValueN(bags: Distribution; totalnoInst: double; numerator: double): double;
function splitCritValueN(bags: Distribution; totalnoInst: double; oldEnt: double): double;

//Computes entropy after splitting without considering the class values.
// function splitEnt(bags: Distribution): double;

// Help method for computing the split entropy.
// function splitEnt(bags: Distribution; totalnoInst: double): double;



implementation

function oldEnt(bags: Distribution): double;
var
returnValue: double;
j: integer;
begin
returnValue := 0;
for j := 0 to bags.numClasses()-1
do
returnValue := returnValue + logFunc(bags.perClass(j));
result:=logFunc(bags.total()) - returnValue;
end;


// Computes entropy of distribution after splitting.
function newEnt(bags: Distribution): double;
var
returnValue: double;
i, j: integer;

begin
returnValue := 0;
for i := 0 to bags.numBags()-1 do
begin
for j := 0 to bags.numClasses()-1 do
returnValue := returnValue + logFunc(bags.perClassPerBag(i, j));
returnValue := returnValue - logFunc(bags.perBag(i));
end;
result := -returnValue;
end;


function logFunc(num: double): double;
begin
// Constant hard coded for efficiency reasons
if (num < 1e-6)
then
begin
result := 0;
exit;
end
else
result := num * Math.Log2(num) / log2;
end;


// This method computes the information gain in the same way C4.5 does.
function splitCritValue(bags: Distribution; totalNoInst: double): double;
var
numerator: double;
noUnknown: double;
unknownRate: double;

begin

noUnknown := totalNoInst-bags.total();
unknownRate := noUnknown/totalNoInst;
numerator := (oldEnt(bags)-newEnt(bags));
numerator := (1-unknownRate)*numerator;

// Splits with no gain are useless.
if (Utils.eq(numerator,0)) then
begin
result := 0;
exit;
end;
result :=numerator/bags.total();
end;


//This method computes the gain ratio in the same way C4.5 does.
// @param bags the distribution
// @param totalnoInst the weight of ALL instances
// @param numerator the info gain
function splitCritValueN(bags: Distribution; totalnoInst: double; oldEnt: double): double;
var
numerator: double;
noUnknown: double;
unknownRate: double;
begin
noUnknown := totalNoInst-bags.total();
unknownRate := noUnknown/totalNoInst;
numerator := (oldEnt-newEnt(bags));
numerator := (1-unknownRate)*numerator;

// Splits with no gain are useless.
if (Utils.eq(numerator,0))
then
begin
result := 0;
exit;
end;

result :=numerator/bags.total();

end;


// Help method for computing the split entropy.
{ function splitEnt(bags: Distribution; totalnoInst: double): double;
var
returnValue: double;
noUnknown: double;
i: integer;

begin

returnValue := 0;
noUnknown := totalnoInst-bags.total();

if (Utils.gr(bags.total(),0))
then
begin
for i :=0 to bags.numBags()-1 do
returnValue := returnValue-logFunc(bags.perBag(i));
returnValue := returnValue-logFunc(noUnknown);
returnValue := returnValue+logFunc(totalnoInst);
end;
result := returnValue;
end;}


{ function EntropyCalcularorClass.splitEnt(Distribution bags): double;
var
i: integer;
returnValue: double;
begin

returnValue := 0;

for i := 0 to bags.numBags()
do
returnValue := returnValue + logFunc(bags.perBag(i));
result := logFunc(bags.total()) - returnValue;
end;}


initialization

log2 := Math.Log2(2);



end.
Соседние файлы в папке j48