Скачиваний:
28
Добавлен:
01.05.2014
Размер:
16.76 Кб
Скачать
unit PriorEstimation;

interface
uses
Instances,
uContainers,
uConsts,
dmmTypes,
SpecialFunctions,
DoubleObject,
math,
ItemSet,
RuleItem,
SysUtils,
associator,
Windows;

const
//параметр используемый в качестве шага при генерации правил
SEED = 0;

//максимальное число атрибутов для которых может быть вычислена априорная точность
MAX_N = 1024;


type TDMPriorEstimation = class

protected
//число случайных правил
m_numRandRules : integer;

//число интервалов
m_numIntervals : integer;

//вектора
m_instances : TDMInstances ;

//поиск стандартных/для класса ассоциативных правил
m_CARs : boolean;

//хэш таблица хранит значений точности сгенерированных правил
m_distribution : TStringHashtable ;

//хэш таблица хранит значения априорных оценок
m_priors : TStringHashtable ;

//сумма достоверности правил заданной длины
m_sum : double;

//точка разбиения интервалов
m_midPoints : DArray;

public
constructor Create(instances : TDMInstances; numRules : integer; numIntervals : integer; car : boolean);
//оценка априорных вероятностей
function estimatePrior() : TStringHashtable;
function calculatePriorSum(weighted : boolean; mPoint : double ) : double;
procedure generateDistribution();
procedure midPoints();
function midPoint(size : double; number : integer) : double ;
//генерация случайного ассоциативного правила заданной длины
function randomRule(maxLength : integer; actualLength : integer) : IArray;
//разбаение набора на правую и левую часть
function splitItemSet (premiseLength : integer; itemArray : IArray) : TDMRuleItem;
//генерация наборов заданной длины
function randomCARule(maxLength : integer; actualLength : integer) : IArray;
function addCons (itemArray : IArray) : TDMRuleItem;
//пересчитать значение поддержки
procedure updateCounters(var itemSet : TDMItemSet );
//пересчитать распределение достоверности
procedure buildDistribution(conf : double; length : double);
function findIntervall(conf : double ) : double;
function getMidPoints() : DArray;
destructor Destroy; override;
end;
//log2 для биномиальных коэффициентов
function logbinomialCoefficient(upperIndex : integer; lowerIndex : integer) : double ;

implementation

constructor TDMPriorEstimation.Create(instances : TDMInstances; numRules : integer; numIntervals : integer; car : boolean);
begin
m_instances := TDMInstances.Create(instances);
m_CARs := car;
m_numRandRules := numRules;
m_numIntervals := numIntervals;
RandSeed := m_instances.getRandomNumberGenerator(SEED);
m_distribution := nil ;
m_priors := nil ;
m_midPoints := nil;
end;

function logbinomialCoefficient(upperIndex : integer; lowerIndex : integer) : double ;
var
curResult : double;
begin
cuRresult := 1.0;
if(upperIndex = lowerIndex) or (lowerIndex = 0) then
begin
result := curResult;
exit;
end;
curResult := SpecialFunctions.log2Binomial(upperIndex, lowerIndex);
result := curResult;
end;

procedure TDMPriorEstimation.midPoints();
var
i : integer;
begin
SetLength(m_midPoints,m_numIntervals);
i := 0;
while(i < m_numIntervals) do
begin
m_midPoints[i] := midPoint(1.0/m_numIntervals, i);
inc(i);
end;
end;

function TDMPriorEstimation.midPoint(size : double; number : integer) : double ;
begin
Result := (size * number) + (size / 2.0);
end;

function TDMPriorEstimation.randomRule(maxLength : integer; actualLength : integer) : IArray;
var
itemArray : IArray;
k,h : integer;
help : integer;
mark : integer;
begin
SetLEngth(itemArray,maxLength);
k := 0;
while (k < length(itemArray)) do
begin
itemArray[k] := -1;
inc(k);
end;

help := actualLength;
if(help = maxLength) then
begin
help := 0;
h := 0;
while(h < length(itemArray)) do
begin
itemArray[h] := randomRange(0,(m_instances.attribute(h)).numValues());
inc(h);
end;
end;
while(help > 0)do
begin
mark := randomRange(0,maxLength);
if(itemArray[mark] = -1) then
begin
dec(help);
itemArray[mark] := randomRAnge(0,(m_instances.attribute(mark)).numValues());
end;
end;
result := itemArray;
end;

function TDMPriorEstimation.splitItemSet (premiseLength : integer; itemArray : IArray) : TDMRuleItem;
var
cons : IArray;
i,help : integer;
mark : integer;
premise,consequence : TDMItemSet;
current : TDMRuleItem;
begin
SetLEngth(cons,m_instances.numAttributes());
cons :=Copy(itemArray, 0, length(itemArray));
help := premiseLength;
while(help > 0) do
begin
mark := randomRange(0,length(itemArray));
if(cons[mark] <> -1) then
begin
dec(help);
cons[mark] :=-1;
end;
end;

if(premiseLength = 0)then
begin
i := 0;
while(i < length(itemArray)) do
begin
itemArray[i] := -1;
inc(i);
end;
end else
begin
i := 0;
while(i < length(itemArray)) do
begin
if(cons[i] <> -1) then
itemArray[i] := -1;
inc(i);
end;
end;
premise := TDMItemSet.Create(itemArray);
consequence := TDMItemSet.Create(cons);
current := TDMRuleItem.Create(premise, consequence, true);

result := current;
end;

function TDMPriorEstimation.randomCARule(maxLength : integer; actualLength : integer) : IArray;
var
itemArray : IArray;
help : integer;
k, h : integer;
mark : integer;
begin
SetLength(itemArray,maxLength);
k := 0;
while(k < length(itemArray)) do
begin
itemArray[k] := -1;
inc(k);
end;
if(actualLength = 1) then
begin
result := itemArray;
exit;
end;

help := actualLength-1;
if(help = maxLength-1) then
begin
help := 0;
h := 0;
while(h < length(itemArray)) do
begin
if(h <> m_instances.classIndex()) then
itemArray[h] := randomRange(0, (m_instances.attribute(h)).numValues());
inc(h);
end;
end;
while(help > 0) do
begin
mark := randomRange(0,maxLength);
if(itemArray[mark] = -1) and (mark <> m_instances.classIndex()) then
begin
dec(help);
itemArray[mark] := randomRange(0,(m_instances.attribute(mark)).numValues());
end;
end;
result := itemArray;
end;

function TDMPriorEstimation.addCons (itemArray : IArray) : TDMRuleItem;
var
premise : TDMItemSet;
cons : IArray;
i : integer;
consequence : TDMItemSet;
current : TDMRuleItem;
begin
premise := TDMItemSet.Create(itemArray);
SetLength(cons,length(itemArray));

i := 0;
while(i < length(itemArray)) do
begin
cons[i] := -1;
inc(i);
end;
cons[m_instances.classIndex()] := randomRange(0,(m_instances.attribute(m_instances.classIndex())).numValues());
consequence := TDMItemSet.Create(cons);
current := TDMRuleItem.Create(premise,consequence, true);

result := current;
end;

procedure TDMPriorEstimation.updateCounters(var itemSet : TDMItemSet );
var
i : integer;
begin
i := 0;
while (i < m_instances.numInstances()) do
begin
itemSet.upDateCounter(m_instances.instance(i));
inc(i);
end;
end;

function TDMPriorEstimation.findIntervall(conf : double ) : double;
var
endInt : integer;
startInt : integer;
mid : integer;
begin
if(conf = 1.0) then
begin
result := m_midPoints[length(m_midPoints)-1];
exit;
end;
endInt := length(m_midPoints)-1;
startInt := 0;
while (abs(endInt-startInt) > 1) do
begin
mid := (startInt + endInt) div 2;
if (conf > m_midPoints[mid]) then
startInt := mid+1;
if (conf < m_midPoints[mid]) then
endInt := mid-1;
if(conf = m_midPoints[mid]) then
begin
result := m_midPoints[mid];
exit;
end;
end;
if(abs(conf-m_midPoints[startInt]) <= abs(conf-m_midPoints[endInt])) then
result := m_midPoints[startInt]
else
result := m_midPoints[endInt];
end;

procedure TDMPriorEstimation.buildDistribution(conf : double; length : double);
var
mPoint : double;
key : AnsiString;
mPointValue : AnsiString;
lengthValue : AnsiString;
curTDMDoubleObject : TDMDoubleObject;

begin
key :='';
mPoint := findIntervall(conf);
Str(mPoint:4:3,mPointValue);
Str(length:4:1,lengthValue);

key := mPointValue + lengthValue;
m_sum := m_sum + conf;

if m_distribution.Contains(key) then
begin
curTDMDoubleObject := m_distribution.Items[key];
if(curTDMDoubleObject <> nil) then
begin
conf := conf + curTDMDoubleObject.doubleValue();
end;
m_distribution.OwnValues := True;
m_distribution.remove(key);
m_distribution.OwnValues := false;
end;

m_distribution.Add(key,TDMDoubleObject.Create(conf));

end;


procedure TDMPriorEstimation.generateDistribution();
var
jump : boolean;
i,maxLength, count, ruleCounter : integer;
itemArray : IArray;
current : TDMRuleItem;

h,j,k,w : integer;
ruleItem : IArray;
rule : TDMItemSet;
key : AnsiString;
mPointVal : AnsiString;
iVal : AnsiString;
curTDMDoubleObject : TDMDoubleObject;
conf : double;
doublei : double;
begin
maxLength := m_instances.numAttributes();
count := 0;

m_distribution := TStringHashtable.Create(false,maxLength*m_numIntervals);

if(m_instances.numAttributes() = 0) then
raise Exception.Create('Нет атрибутов');
if(m_instances.numAttributes() >= MAX_N) then
raise Exception.Create('Слишком много аттрибутов для априорной оценки');
if(m_instances.numInstances() = 0) then
raise Exception.Create('Нет векторов');

h := 0;
while ( h < maxLength) do
begin
if (m_instances.attribute(h).isNumeric()) then
raise Exception.Create('Количественные атрибуты не поддерживаются');
inc(h);
end;
if(m_numIntervals = 0) or (m_numRandRules = 0) then
raise Exception.Create('Невозможна априорная инициализация');

midPoints();

//генерация случайных правил длиной i и вычисление поддержки;
//если поддержка >0 вычисляется достоверность

i := 1;
while(i <= maxLength) do
begin
if (terminated) then
begin
ExitThread(1);
destroy;
end;
m_sum := 0;
j := 0;
count := 0;

while(j < m_numRandRules) do
begin
inc(count);
jump := false;
if(not m_CARs) then
begin
itemArray := randomRule(maxLength,i);
current := splitItemSet(randomRange(0,i), itemArray);
end else
begin
itemArray := randomCARule(maxLength,i);
current := addCons(itemArray);
end;

SetLength(ruleItem,maxLength);

k := 0;
while (k < length(itemArray)) do
begin
if(current.m_premise.m_items[k] <> -1) then
ruleItem[k] := current.m_premise.m_items[k]
else
if(current.m_consequence.m_items[k] <> -1) then
ruleItem[k] := current.m_consequence.m_items[k]
else
ruleItem[k] := -1;
inc(k);
end;
rule := TDMItemSet.Create(ruleItem);
updateCounters(rule);
ruleCounter := rule.m_counter;
FreeandNil(rule);
if(ruleCounter > 0) then
jump := true;
updateCounters(current.m_premise);
inc(j);
if(jump) then
buildDistribution(ruleCounter/current.m_premise.m_counter,i);
current.OwnValues := true;
FreeandNil(current);
end;

//нормализация
if(m_sum > 0) then
begin
w := 0;
while ( w < length(m_midPoints)) do
begin
key :='';
Str(m_midPoints[w]:4:3,mPointVal);
doublei := i;
Str(doublei:4:1,iVal);

key := mPointVal + iVal;

if(m_distribution.Contains(key) = false )then
begin
m_distribution.Add(key,TDMDoubleObject.Create(1.0/m_numIntervals));
m_sum := m_sum + 1.0/m_numIntervals;
end;
inc(w);
end;

w := 0;
while (w < length(m_midPoints)) do
begin
conf := 0;
key :='';
Str(m_midPoints[w]:4:3,mPointVal);
doublei := i;
Str(doublei:4:1,iVal);
key := mPointVal + iVal;

if(m_distribution.Contains(key) = true) then
begin
curTDMDoubleObject := m_distribution.Items[key];
if (curTDMDoubleObject <> nil) then
begin
conf := curTDMDoubleObject.doubleValue / m_sum;
m_distribution.OwnValues := true;
m_distribution.remove(key);
m_distribution.OwnValues := false;
m_distribution.add(key,TDMDoubleObject.Create(conf));
end;
end;
inc(w);
end;
end else
begin
w := 0;
while(w < length(m_midPoints)) do
begin
key :='';
Str(m_midPoints[w]:4:3,mPointVal);
doublei := i;
Str(doublei:4:1,iVal);
key := mPointVal + iVal;
m_distribution.add(key,TDMDoubleObject.Create(1.0/m_numIntervals));
inc(w);
end;
end;
inc(i);
end;

end;

function TDMPriorEstimation.estimatePrior() : TStringHashtable;
var
prior, denominator, mPoint : double;
m_priors : TStringHashtable;
i : integer;
mPointVal : AnsiString;
j : integer;

begin
m_priors := TStringHashtable.Create(false,m_numIntervals);
denominator := calculatePriorSum(false,1.0);
generateDistribution();

i := 0;
while(i < m_numIntervals) do
begin
mPoint := m_midPoints[i];
prior := calculatePriorSum(true,mPoint) / denominator;
Str(mPoint,mPointVal);
m_priors.add(mPointVAl, TDMDoubleObject.Create(prior));
inc(i);
end;
result := m_priors;
end;

function TDMPriorEstimation.calculatePriorSum(weighted : boolean; mPoint : double ) : double;
var
distr, sum, max : double;
i : integer;
key : AnsiString;
mPointValue : ansiString;
iValue : AnsiString;
curTDMDoubleObject : TDMDoubleObject;
addend : double;
doublei : double;
begin
sum := 0;
max := logbinomialCoefficient(m_instances.numAttributes(),trunc(m_instances.numAttributes()/2));

i := 1;
while(i <= m_instances.numAttributes()) do
begin
if(weighted) then
begin
key := '';
Str(mPoint:4:3,mPointValue);
doublei := i;
Str(doublei:4:1,iValue);

key := mPointValue + iValue;
curTDMDoubleObject := m_distribution.Items[key];

if(curTDMDoubleObject <> nil) then
begin
distr := curTDMDoubleObject.doubleValue;
end
else
distr := 0;
if(distr <> 0) then
begin
addend := log2(distr) - max + log2((power(2,i)-1)) + logbinomialCoefficient(m_instances.numAttributes(),i);
sum := sum + power(2,addend);
end;
end else
begin
addend := log2((power(2,i)-1)) - max + logbinomialCoefficient(m_instances.numAttributes(),i);
sum := sum + power(2,addend);
end;
inc(i);
end;
result := sum;
end;

function TDMPriorEstimation.getMidPoints() : DArray;
begin
result := m_midPoints;
end;

destructor TDMPriorEstimation.Destroy;
begin
FreeAndNil(m_instances);

m_distribution.OwnValues := true;
FreeAndNil(m_distribution);

FreeAndNil(m_priors) ;

m_midPoints := nil;
end;
end.
Соседние файлы в папке DMAssociations