Скачиваний:
28
Добавлен:
01.05.2014
Размер:
13.63 Кб
Скачать
unit DistributionClass;

interface

uses

DmmTypes,
Instances,
FastVector,
Utils;


type Distribution = class

private

//Weight of instances per class per bag.
m_PerClassPerBag: D2DArray;

//Weight of instances per bag.
m_PerBag: DArray;

//Weight of instances per class.
m_PerClass: DArray;

//Total weight of instances.
m_Total: double;


procedure addWeights(instance: TDMInstance; weights: DArray);

public

constructor CreateI(source: TDMInstances);

constructor Create(numBags: integer; numClasses: integer);

constructor CreateM(source: TDMInstances; modelToUse: TObject);

constructor createDI(toMerge: Distribution; index: integer);

// Конструктор копирования
constructor CreateC(distr: Distribution);

procedure setNums(numBags: integer; numClasses: integer);

//Adds given instance to given bag.
procedure add(bagIndex: integer; instance: TDMInstance);

procedure addRange(bagIndex: integer; source: TDMInstances; startIndex: integer; lastPlusOne: integer);

//Shifts all instances in given range from one bag to another one.
procedure shiftRange(from: integer; too: integer ; source: TDMInstances; startIndex: integer; lastPlusOne: integer);

//Returns number of classes.
function numClasses: integer;

//Returns total
function total: double;

function numBags(): integer;

function perClassPerBag(bagIndex: integer; classIndex: integer): double;

//Returns number of (possibly fractional) instances in given bag.
function perBag(bagIndex: integer):double;

//Returns number of (possibly fractional) instances of given class.
function perClass(index: integer):double;


function check(minNoObj: double): boolean;

function maxClass(): integer;

procedure addInstWithUnknown(source: TDMInstances; attIndex: integer);

function numCorrect():double;

function numCorrectI(n: integer):double;

function numIncorrect():double;

function numIncorrectI(n: integer):double;

function maxBag(): integer;

function maxClassI(index: integer): integer;


end;

implementation

uses
J48SplitClass;



constructor Distribution.CreateC(distr: Distribution);
begin
m_Total := distr.total;
distr.numClasses();
SetLength(m_perClass, distr.numClasses());
m_PerClass := Copy(distr.m_perClass, 0, distr.numClasses());

SetLength(m_perBag, distr.numBags);
m_PerBag := Copy(distr.m_PerBag, 0, distr.numBags());

SetLength(m_perClassPerBag, 1);

//= new double[1][0];
SetLength(m_perClassPerBag[0], distr.numClasses());

if distr.numClasses()>0
then
m_perClassPerBag[0] := Copy(distr.m_PerClassPerBag[0], 0, distr.numClasses());

end;

constructor Distribution.Create(numBags: integer; numClasses: integer);
var
i: integer;
begin
m_Total := 0;
setLength(m_perClass, numClasses);
setLength(m_perBag, numBags);
setLength(m_perClassPerBag, numBags);
for i := 0 to numBags-1
do
setLength(m_perClassPerBag[i], numClasses);
end;

constructor Distribution.CreateM(source: TDMInstances; modelToUse: TObject);
var
index: integer;
instance: TDMInstance;
weights: DArray;
i: integer;
model: J48Split;
enu: TDMInstanceEnumeration;
begin
model := modelToUse as J48Split;
setLength(m_perBag, model.numSubsets());
setLength(m_perClass, source.numClasses());
setLength(m_perClassPerBag, model.numSubsets());
for i := 0 to model.numSubsets()-1
do
setLength(m_perClassPerBag[i], source.numClasses());
m_Total := 0;
enu := source.enumerateInstances();
while (enu.hasMoreElements())
do
begin
instance := enu.nextElement();
index := model.whichSubset(instance);
if (index <> -1)
then
add(index, instance)
else
begin
weights := model.getWeights(instance);
addWeights(instance, weights);
end;
end;
end;


procedure Distribution.addWeights(instance: TDMInstance; weights: DArray);
var

classIndex: integer;
i: integer;
weight: double;

begin
classIndex := trunc(instance.classValue());
for i := 0 to numBags()-1
do
begin
weight := instance.weight() * weights[i];
m_perClassPerBag[i][classIndex] := m_perClassPerBag[i][classIndex] +
weight;
m_perBag[i] := m_perBag[i] + weight;
m_perClass[classIndex] := m_perClass[classIndex] + weight;
m_Total := totaL + weight;
end;
end;

procedure Distribution.setNums(numBags: integer; numClasses: integer);
var
i: integer;
begin
m_Total := 0;
setLength(m_perClass, numClasses);
setLength(m_perBag, numBags);
setLength(m_perClassPerBag, numClasses);
for i := 0 to numBags-1
do
setLength(m_perClassPerBag[i], numClasses);
end;


function Distribution.maxBag(): integer;
var
max: double;
maxIndex: integer;
i: integer;

begin
max := 0;
maxIndex := -1;
for i := 0 to numBags()-1
do
begin
if (Utils.grOrEq(m_perBag[i], max))
then
begin
max := m_perBag[i];
maxIndex := i;
end;
end;
result := maxIndex;
end;


//Adds given instance to given bag.
procedure Distribution.add(bagIndex: integer; instance: TDMInstance);
var
classIndex: integer;
weight: double;
begin
classIndex := trunc(instance.classValue());// div 1;// as integer;// :-[ //
weight := instance.weight;
m_perClassPerBag[bagIndex][classIndex] :=
m_perClassPerBag[bagIndex][classIndex] + weight;
m_perBag[bagIndex] := m_perBag[bagIndex] + weight;
m_perClass[classIndex] := m_perClass[classIndex] + weight;
m_Total := m_Total + weight;
end;


//Adds all instances in given range to given bag.
procedure Distribution.addRange(bagIndex: integer; source: TDMInstances; startIndex: integer; lastPlusOne: integer);
var
sumOfWeights: double;
classIndex: integer;
instance: TDMInstance;
i: integer;
begin
sumOfWeights := 0;
for i := startIndex to lastPlusOne-1 do
begin
instance := source.instance(i) as TDMInstance;
classIndex := trunc(instance.classValue());
sumOfWeights := sumOfWeights + instance.weight();
m_perClassPerBag[bagIndex][classIndex] := m_perClassPerBag[bagIndex][classIndex] + instance.weight();
m_perClass[classIndex] := m_perClass[classIndex] + instance.weight();
end;
m_PerBag[bagIndex] := m_PerBag[bagIndex] + sumOfWeights;
m_Total := m_Total + sumOfWeights;
end;


//Returns number of classes.
function Distribution.numClasses: integer;
begin
result := Length(m_PerClass);
end;


//Returns total
function Distribution.total: double;
begin
result := m_Total;
end;


function Distribution.numBags(): integer;
begin
result := Length(m_PerBag);
end;

function Distribution.perClassPerBag(bagIndex: integer; classIndex: integer): double;
begin
result := m_perClassPerBag[bagIndex][classIndex];
end;


//Shifts all instances in given range from one bag to another one.
procedure Distribution.shiftRange(from: integer; too: integer ; source: TDMInstances; startIndex: integer; lastPlusOne: integer);
var
classIndex: integer;
weight: double;
instance: TDMInstance;
i: integer;
begin

for i := startIndex to lastPlusOne-1 do
begin
instance := source.instance(i) as TDMInstance;
classIndex := trunc(instance.classValue());
weight := instance.weight();
m_perClassPerBag[from][classIndex] := m_perClassPerBag[from][classIndex] - weight;
m_perClassPerBag[too][classIndex] := m_perClassPerBag[too][classIndex] + weight;
m_perBag[from] := m_perBag[from] - weight;
m_perBag[too] := m_perBag[too] + weight;
end;
end;

function Distribution.perBag(bagIndex: integer):double;
begin
if (bagIndex>=0)
then
result := m_perBag[bagIndex]
else
result := 0;
end;

function Distribution.perClass(index: integer):double;
begin
if (index>=0)
then
result := m_perClass[index]
else
result := 0;
end;


function Distribution.maxClass(): integer;
var
maxCount: double;
maxIndex: integer;
i: integer;
max: integer;
begin
maxCount := 0;
maxIndex := 0;
max := numClasses();
if (max<1)
then
begin
result := -1;
exit;
end;

for i := 0 to max-1 do
begin
if (Utils.gr(m_perClass[i], maxCount))
then
begin
maxCount := m_perClass[i];
maxIndex := i;
end;
end;
result := maxIndex;
end;

//Checks if at least two bags contain a minimum number of instances.
function Distribution.check(minNoObj: double): boolean;
var
counter: integer;
i: integer;
begin
counter := 0;
for i := 0 to length(m_PerBag)-1 do
begin
if (Utils.grOrEq(m_perBag[i], minNoObj))
then
counter := counter+1;
end;
if (counter > 1)
then
begin
result := true;
end
else
result := false;
end;


// Creates a distribution with only one bag according
// to instances in source.
constructor Distribution.createI(source: TDMInstances);
var
enu: TDMInstanceEnumeration;
i: integer;
begin
m_Total := 0;
setLength(m_perBag, 1);
setLength(m_perClass, source.numClasses());
SetLength(m_perClassPerBag, source.numClasses());
for i := 0 to 1
do
setLength(m_perClassPerBag[i], source.numClasses());
enu := source.enumerateInstances();
while (enu.hasMoreElements())
do
add(0, enu.nextElement());
end;

function Distribution.numCorrect():double;
var
max: integer;
begin
max := maxClass();
if (max>-1)
then
result := m_perClass[max]
else
result := 0;

end;

function Distribution.numIncorrect():double;
begin
result := totaL - numCorrect();
end;


procedure Distribution.addInstWithUnknown(source: TDMInstances; attIndex: integer);
var
probs: DArray;
weight, newWeight: double;
classIndex: integer;
instance: TDMInstance;
j: integer;
enu: TDMInstanceEnumeration;
begin
SetLength(probs, Length(m_PerBag));
for j := 0 to numBags()-1
do
begin
if (Utils.eq(totaL, 0))
then
begin
probs[j] := 1.0 / maxIndex(probs);
end
else
begin
probs[j] := m_perBag[j] / totaL;
end;
end;
enu := source.enumerateInstances();
while (enu.hasMoreElements())
do
begin
instance := enu.nextElement();
if (instance.isMissing(attIndex))
then
begin
classIndex := trunc(instance.classValue());
weight := instance.weight();
m_perClass[classIndex] := m_perClass[classIndex] + weight;
m_Total := trunc(totaL) + trunc(weight);
for j := 0 to numBags()-1
do
begin
newWeight := probs[j] * weight;
m_perClassPerBag[j][classIndex] := m_perClassPerBag[j][classIndex] +
newWeight;
m_perBag[j] := m_perBag[j] + newWeight;
end;
end;
end;

end;

function Distribution.maxClassI(index: integer): integer;
var
maxCount: double;
maxInd: integer;
i: integer;
begin
maxCount := 0;
maxInd := 0;
if (Utils.gr(m_perBag[index],0))
then
begin
if(Length(m_PerClass)<>0)
then
for i :=0 to Length(m_PerClass)-1
do
if (Utils.gr(m_perClassPerBag[index][i],maxCount))
then
begin
maxCount := m_perClassPerBag[index][i];
maxInd := i;
end;
result := maxInd;
end
else
result := maxClass();
end;


function Distribution.numIncorrectI(n: integer): double;
begin

result := m_perBag[n]-numCorrectI(n);
end;

function Distribution.numCorrectI(n: integer): double;
begin

result := m_perClassPerBag[n][maxClassI(n)];
end;


constructor Distribution.createDI(toMerge: Distribution; index: integer);
var
i: integer;
begin

m_Total := toMerge.total;
setLength(m_perClass, toMerge.numClasses());



for i := 0 to toMerge.numClasses()-1
do
m_PerClass[i] := toMerge.m_perClass[i];

setLength(m_perClassPerBag, 2);
for i := 0 to 1
do
setLength(m_perClassPerBag[i], toMerge.numClasses());

for i := 0 to toMerge.numClasses()-1
do
m_perClassPerBag[0][i] := toMerge.m_perClassPerBag[index][i];

for i := 0 to
toMerge.numClasses()-1
do
m_perClassPerBag[1][i] := toMerge.m_perClass[i]-m_perClassPerBag[0][i];

setLength(m_perBag, 2);
m_perBag[0] := toMerge.m_perBag[index];
m_perBag[1] := m_Total-m_perBag[0];
end;



end.
Соседние файлы в папке j48