Скачиваний:
28
Добавлен:
01.05.2014
Размер:
13.45 Кб
Скачать
unit ItemSet;

interface
uses
StandardObject,
Instances,
dmmTypes,
FastVector,
uContainers,
doubleObject,
SysUtils,
math;

type

TDMItemSet = class (TDMStandardObject)

public
m_id : integer;

//количество векторов, которые содержат данный набор
m_counter : integer;

//общее число векторов
m_totalTransactions : integer;


m_OwnValues : boolean;

public
//набор
m_items : IArray;
//условия (юзать аккуратно и хорошо подумав ;) )
m_condidtions : IArray;

public
property OwnValues: Boolean read m_OwnValues write m_OwnValues;

constructor Create(totalTrans : integer); overload;
constructor Create(itemsArray : IArray); overload;
constructor Create(totalTrans : integer; itemsArray : IArray; conditionValues : IArray); overload;
constructor Create(itemsArray : IArray; conditionValues : IArray);overload;
function copyObject : TObject;override;

procedure setCounter(counter : integer);
//проверка содержит ли вектор данный набор
function containedBy(instance : TDMInstance) : boolean;
//проверка являются ли два набора одинаковыми
function equals(itemSet : TObject ) : boolean;
function hashCode() : AnsiString;
//пересчет поддержки на основе заданного вектора
procedure upDateCounter(instance : TDMInstance);
procedure pruneRules(var rules : TDMFastVectorArray; minConfidence : double);
function support() : integer;
function toString(instances : TDMInstances) : AnsiString;

destructor Destroy; override;
end;

//пересчет поддержки
procedure upDateCounters(var itemSets : TDMFastVector; instances : TDMInstances);
//удаление наборов у которых поддержка меньше минимальной
function deleteItemSets( var itemSets : TDMFastVector; minSupport : integer; maxSupport : integer) : TDMFastVector;
//заполнить хэш таблицу
function getHashtable( itemSets : TDMFastVector; initialSize : integer) : TStringHashtable;
//обрезать наборы длиной k
function pruneItemSets(var toPrune : TDMFastVector; kMinusOne : TStringHashtable) : TDMFastVector;
//наборы из обного элемента
function getSingletons(instances : TDMInstances) : TDMFastVector;
//формирование k-элементных наборов из k-1-элементных наборов
function mergeAllItemSets( itemSets : TDMFastVector; size : integer; totalTrans : integer) : TDMFastVector;


implementation

var
curId : integer;

constructor TDMItemSet.Create(totalTrans : integer);
begin
m_totalTransactions := totalTrans;
m_id := curId;
inc(curId);
m_items := nil;
m_condidtions := nil;
m_OwnValues := true;
m_condidtions := nil;
end;

constructor TDMItemSet.Create(totalTrans : integer; itemsArray : IArray; conditionValues : IArray);
begin
m_totalTransactions := totalTrans;
m_items := itemsArray;
m_condidtions := conditionValues;
m_counter :=1;
m_OwnValues := true;
end;

constructor TDMItemSet.Create(itemsArray : IArray; conditionValues : IArray);
begin
m_items := itemsArray;
m_condidtions := conditionValues;
m_counter := 0;
m_OwnValues := true;
end;

constructor TDMItemSet.Create(itemsArray : IArray);
begin
m_items := itemsArray;
m_counter := 0;
m_OwnValues := true;
end;


function TDMItemSet.copyObject : TObject;
var
rItemSet : TDMItemSet;
begin
rItemSet := TDMItemSet.Create;
rItemSet.m_totalTransactions := m_totalTransactions;
rItemSet.m_id := m_id;
rItemSet.m_items := Copy(m_items);
rItemSet.m_condidtions := Copy(m_items);
rItemSet.m_counter := m_counter;
result := rItemSet;
end;

procedure TDMItemSet.setCounter(counter : integer);
begin
m_counter := counter;
end;

//переписать; предварительно посоветоваться с Наташей
function TDMItemSet.containedBy(instance : TDMInstance) : boolean;
var
i : integer;
begin
i := 0;
while ( i < instance.numAttributes()) do
begin
if (m_items[i] > -1) then
begin
if (instance.isMissing(i)) then
begin
result := false;
Exit;
end;
if (m_items[i] <> instance.value(i)) then
begin
result := false;
Exit;
end;
end;
inc(i);
end;
result := true;
end;

//шо це токе?
function deleteItemSets(var itemSets : TDMFastVector; minSupport : integer;
maxSupport : integer) : TDMFastVector;
var
newVector : TDMFastVector;
current : TDMItemSet;
i : integer;
begin
newVector := TDMFastVector.Create(itemSets.size());

i := 0;
while (i < itemSets.size()) do
begin
current := itemSets.elementAt(i) as TDMItemSet;
if ((current.m_counter >= minSupport) and (current.m_counter <= maxSupport)) then
newVector.addElement(current)
else
FreeAndNil(current);
inc(i);
end;
itemSets.removeAllElements;
itemSets := nil;
result := newVector;
end;

function TDMItemSet.equals(itemSet : TObject ) : boolean;
var
i : integer;
begin
if ((itemSet = nil) or not(itemSet.ClassType()=(self.ClassType()))) then
begin
result := false;
exit;
end;
if (length(m_items) <> length(m_items)) then
begin
result := false;
exit;
end;

i := 0;
while (i < length(m_items)) do
begin
if (m_items[i] <> (itemSet as TDMItemSet).m_items[i]) then
begin
result := false;
exit;
end;
inc(i);
end;
result := true;
end;

function TDMItemSet.hashCode() : AnsiString;
var
current : AnsiString;
i : integer;
begin
current := '';
i := length(m_items)-1;
while (i >= 0) do
begin
current := current + chr(m_items[i]);
dec(i);
end;
result := current;
end;

//что делает и зачем?
function getHashtable( itemSets : TDMFastVector; initialSize : integer) : TStringHashtable;
var
setsHashtable : TStringHashtable;
current: TDMItemSet;

i : integer;
PTDMItemSet : ^TDMItemSet;
begin
PTDMItemSet := nil;
setsHashtable := TStringHashtable.Create(false,initialSize);

i := 0;
while (i < itemSets.size()) do
begin
current := itemSets.elementAt(i) as TDMItemSet;

setsHashtable.Add(current.hashCode(),current);
PTDMItemSet := nil;

inc(i);
end;

result := setsHashtable;
end;

procedure TDMItemSet.upDateCounter(instance : TDMInstance);
begin
if (containedBy(instance)) then
inc(m_counter);
end;

procedure upDateCounters(var itemSets : TDMFastVector; instances : TDMInstances);
var
i : integer;
enu : TDMFastVectorEnumeration;
begin
i := 0;
while (i < instances.numInstances()) do
begin
enu := TDMFastVectorEnumeration.Create(itemSets);
while (enu.hasMoreElements) do
(enu.nextElement() as TDMItemSet).upDateCounter(instances.instance(i));
inc(i);
FreeandNil(enu);
end;
end;

//аналогично
function pruneItemSets(var toPrune : TDMFastVector; kMinusOne : TStringHashtable) : TDMFastVector;
var
newVector : TDMFastVector;
help, i, j : integer;
current : TDMItemSet;

begin
newVector := TDMFastVector.Create(toPrune.size());
i := 0;

while (i < toPrune.size()) do
begin
current := toPrune.elementAt(i) as TDMItemSet;
j := 0;
while ( j < length(current.m_items)) do
begin
if (current.m_items[j] <> -1) then
begin
help := current.m_items[j];
current.m_items[j] := -1;
if (kMinusOne.Items[current.hashCode] = nil) then
begin
current.m_items[j] := help;
break;
end
else
current.m_items[j] := help;
end;
inc(j);
end;
if (j = length(current.m_items)) then
newVector.addElement(current)
else
FreeAndNil(current);
inc(i);
end;
toPrune.removeAllElements;
toPrune := nil;
result := newVector;
end;

procedure TDMItemSet.pruneRules(var rules : TDMFastVectorArray; minConfidence : double);
var
newPremises : TDMFastVector;
newConsequences : TDMFastVector;
newConf : TDMFastVector;
i : integer;

begin
newPremises := TDMFastVector.Create(rules[0].size());
newConsequences := TDMFastVector.Create(rules[1].size());
newConf := TDMFastVector.Create(rules[2].size());

i := 0;
while (i < rules[0].size()) do
begin
if (not(((rules[2].elementAt(i)) as TDMDoubleObject).doubleValue() < minConfidence)) then
begin
newPremises.addElement(rules[0].elementAt(i));
newConsequences.addElement(rules[1].elementAt(i));
newConf.addElement(rules[2].elementAt(i));
end
else
begin

rules[0].removeAndClearElementAt(i);
rules[1].removeAndClearElementAt(i);
rules[2].removeAndClearElementAt(i);
end;
inc(i);
end;

rules[0].removeAllElements();
rules[1].removeAllElements();
rules[2].removeAllElements();

rules[0] := newPremises;
rules[1] := newConsequences;
rules[2] := newConf;
end;

function TDMItemSet.support() : integer;
begin
result := m_counter;
end;

function TDMItemSet.toString(instances : TDMInstances) : AnsiString;
var
text : AnsiString;
i : integer;
begin
text :='';
i := 0;
while (i < instances.numAttributes()) do
begin
if (m_items[i] <> -1) then
begin
text := text + instances.attribute(i).name()+'=';
text := text + instances.attribute(i).value(m_items[i])+' ';
end;
inc(i);
end;
text := text + IntToStr(m_counter);
result := text;
end;

function getSingletons(instances : TDMInstances) : TDMFastVector;
var
setOfItemSets : TDMFastVector;
current : TDMItemSet;
i,j,k : integer;
begin
setOfItemSets := TDMFastVector.Create();

i := 0;
while ( i < instances.numAttributes())do
begin
if (instances.attribute(i).isNumeric()) then
raise Exception.Create('Не поддерживаются количествекнные атрибуты!');

j := 0;
while (j < instances.attribute(i).numValues()) do
begin
current := TDMItemSet.Create(instances.numInstances());
SetLength(current.m_items,instances.numAttributes());

k := 0;
while (k < instances.numAttributes()) do
begin
current.m_items[k] := -1;
inc(k);
end;
current.m_items[i] := j;
setOfItemSets.addElement(current);
inc(j);
end;
inc(i);
end;
result := setOfItemSets;
end;

function mergeAllItemSets(itemSets : TDMFastVector; size : integer; totalTrans : integer) : TDMFastVector;
var
newVector : TDMFastVector;
curResult : TDMItemSet;
numFound, k : integer;
i,j : integer;
first, second : TDMItemSet;
exitWhile : boolean;

begin
newVector := TDMFastVector.Create();
i := 0;
while ( i < itemSets.size()) do
begin
first := itemSets.elementAt(i) as TDMItemSet;
j := i+1;
while (j < itemSets.size()) do
begin
exitWhile := false;
second := itemSets.elementAt(j) as TDMItemSet;
curResult := TDMItemSet.Create(totalTrans);
SetLength(curResult.m_items, length(first.m_items));

numFound := 0;
k := 0;
while (numFound < size) do
begin
if (first.m_items[k] = second.m_items[k]) then
begin
if (first.m_items[k] <> -1) then
inc(numFound);
curResult.m_items[k] := first.m_items[k];
end
else
begin
exitWhile := true;
FreeAndNil(curResult);
break;
end;
inc(k);
end;

if exitWhile then
begin
FreeAndNil(curResult);
break;
end;

while (k < length(first.m_items)) do
begin
if ((first.m_items[k] <> -1) and (second.m_items[k] <> -1)) then
begin
FreeAndNil(curResult);
break
end
else
begin
if (first.m_items[k] <> -1) then
curResult.m_items[k] := first.m_items[k]
else
curResult.m_items[k] := second.m_items[k];
end;
inc(k);
end;

if (k = length(first.m_items)) then
begin
curResult.m_counter := 0;
newVector.addElement(curResult);
end
else
FreeAndNil(curResult);
inc(j);
end;
inc(i);
end;


first := nil;
second := nil;
result:= newVector;
end;

destructor TDMItemSet.Destroy;
begin
if (m_OwnValues) then
SetLength(m_items, 0);
end;


initialization
curId := 0;
end.
Соседние файлы в папке DMAssociations