Скачиваний:
28
Добавлен:
01.05.2014
Размер:
21.65 Кб
Скачать
unit ClusterAlg;

interface
uses SysUtils, Math, Matrix, Instances, Attribute;
type
{Matrix of prototypes}
TPrototypeMatrix = array of array of double;
{Array of covariance matrix}
TCovarianceMatrixArray = array of MatrixPtr;
{Array of data description}
TDataDescr = array of string;
var
{Pointer to data matrix}
M : MatrixPtr;
{
w - Fuzzy indicator (usually 2 or 1.25)
e - Termination tolerance (i.e. 0.001)
c - Number of prototypes
n - Dim of vectors
d - Number of vectors
}
c,d,n : integer;
w,e: double;
{Log file descriptor}
Log : text;
{Pointer to partition matrix}
U : MatrixPtr;
{Exception indicator}
Exception_Hdld : Boolean;
{Data Description}
DataDescr : TDataDescr;

procedure Throw(msg : string);
procedure OpenLog(var Log : text);
procedure CloseLog(var Log : text);
function PrintMatrix(MPtr : MatrixPtr;_Int,_Frac : byte) : boolean;
procedure RunGKAlg(inst1:TDMInstances;c1 : integer; w1,e1: double;var k:string;var institog:TDMInstances);
procedure FillDataMatrix(inst1:TDMInstances;var M1: MatrixPtr; d1,n1 :integer);
function GetProbability() : double;
procedure FillMatrixProbability(U : MatrixPtr);
function GetPMElem(Matr : MatrixPtr;i,j:integer;w:double):double;
procedure Solve(M : MatrixPtr;var U : MatrixPtr;n_dim,c_dim,d : integer; w,e : double);
procedure OutputResults(OutputFile : string;U,M : MatrixPtr;DD : TDataDescr);
procedure OutputResultsInst(var inst1: TDMInstances;U,M : MatrixPtr);
implementation

procedure OpenLog(var Log : text);
begin
Assign(Log,'GKCluster.log');
{$I-}
Rewrite(Log);
{$I+}
if (IOResult <> 0) then Throw('[OpenLog] Невозможно создать файл протокола GKCluster.log');
writeln(Log,'Starting new session of GK algorithm');
end;

procedure CloseLog(var Log : text);
begin
writeln(Log,'Closing session of GK algorithm');
{$I-}
Close(Log);
{$I+}
if (IOResult <> 0) then Throw('[CloseLog] Невозможно закрыть файл протокола GKCluster.log');
end;

{
Description:
This procedure throws exception
Params:
Exception message
}
procedure Throw(msg : string);
begin
Raise Exception.Create(msg);
end;

function PrintMatrix(MPtr : MatrixPtr;_Int,_Frac : byte) : boolean;
var
i,j : byte;
begin
if MPtr = nil then PrintMatrix:= FALSE
else with MPtr^ do begin
for i:= 1 to MatrixRow do begin
for j:= 1 to MatrixCol do
write(Log,GetMatrixElement(MPtr,i,j) : _Int : _Frac, ' ');
writeln(Log);
end;
PrintMatrix:= TRUE;
end;
end;

procedure RunGKAlg(inst1:TDMInstances;c1 : integer; w1,e1: double;var k:string; var institog:TDMInstances);
var inst,instcopy: TDMInstances;
ins: TDMInstance;
t1: integer;
outp : text;
begin
Assign(outp,'outp.log');
Exception_Hdld := false;
try
inst:=inst1;
c:=c1;
w:=w1;
e:=e1;
d:=inst.numInstances();
n:=inst.numAttributes();
instcopy:=TDMInstances.Create(inst);
k:= 'Расчет окончен';
{Create data matrix}
M:=CreateMatrix(d,n);
{Fill data matrix}
FillDataMatrix(inst,M,d,n);
{Open log file}
OpenLog(Log);
writeln(Log,'Was received following params:');
writeln(Log,' Dimesion of space = ',n);
writeln(Log,' Number of clusters = ',c);
writeln(Log,' Number of vectors = ',d);
writeln(Log,' Weighting factor = ', w:2:3);
writeln(Log,' Termination tolerance = ',e:2:3);
writeln(Log,'Data matrix:');
PrintMatrix(M,3,2);
U:=CreateMatrix(c,d);
FillMatrixProbability(U);
writeln(Log,'Partition matrix U0:');
PrintMatrix(U,3,2);
{Solve problem}
Solve(M,U,n-1,c,d,w,e);
{Output results}
OutputResultsInst(instcopy,U,M);
institog:=TDMInstances.Create(instcopy);
{Free data description array}
{DataDescr:=nil;}
except
on E : Exception do
begin
writeln(Log,'Error : ',E.Message);
CloseLog(Log);
Exception_Hdld := true;
Raise;
end;
end;
if not Exception_Hdld then
begin
CloseLog(Log);
end;
end;

{
Description:
This procedure fills data matrix from instances
Params:
Name of instances
Pointer to the data matrix
d1-number of instances(number of vectors)
n1-number of attributes(dimension of space)
}
procedure FillDataMatrix(inst1:TDMInstances;var M1:MatrixPtr; d1,n1: integer);
var
dim, num: integer;
row,col: integer;
elem: double;
begin
dim := d1;
num := n1;
for row := 0 to dim-1 do
begin
for col := 0 to (num-1) do
begin
elem:=inst1.Instance(row).value(col);
SetMatrixElement(M1,row+1,col+1,elem);
end;
end;
end;

{
Description:
This function returns element of partition matrix (U[i,j]) in power w
Params:
Pointer to the partition matrix
i,j - row and col
w - fuzzy indicator
Return:
(Matr[i,j])^w
}
function GetPMElem(Matr : MatrixPtr;i,j:integer;w:double):double;
begin
Result:=Power(GetMatrixElement(Matr,i,j),w);
end;

{---------------------------------------------------------------------------------------------------------------}
{
Description:
This procedure solves clustering problem. After return, U - partiton matrix (solution)
Params:
Pointer to the data matrix
Pointer to the partition matrix (transit)
n_dim,c_dim,d - n,c,d
w,e
}
procedure Solve(M : MatrixPtr;var U : MatrixPtr;n_dim,c_dim,d : integer; w,e : double);
var
C : TPrototypeMatrix;
{Array of pointers to covariance matrix}
CovMatrixArr : TCovarianceMatrixArray;
Dist : MatrixPtr;
Old_U : MatrixPtr;
DiffNorm : double;
{}
Stage : integer;
tmp1,tmp2,tmp3 : MatrixPtr;
{///////////////////////////////////////////////////////////////////////////////}
{
Description:
This procedure computes prototypes matrix
Params:
Pointer to the data matrix
Pointer to the partition matrix
Pointer to the prototipes matrix
d
w
}
procedure ComputePrototypes(M,U : MatrixPtr; var C : TPrototypeMatrix; d : integer; w : double);
var
i,j,k : integer;
denom : double;
begin
writeln(Log,'Computing prototypes matrix...');
{Zero prototypes}
for i:=Low(C) to High(C) do
for j:=Low(C[i]) to High(C[i]) do C[i,j]:=0;
{Calculate prototypes}
for i:=Low(C) to High(C) do
begin
denom:=0;
for j:=1 to d do
begin
denom := denom + GetPMElem(U,i+1,j,w);
for k:=Low(C[i]) to High(C[i]) do
begin
C[i,k]:=C[i,k] + GetPMElem(U,i+1,j,w)*GetMatrixElement(M,j,k+1);
{writeln(Log,'C[',i+1,', ',k+1,'] = ',C[i,k]:5:5, ' u[i,j] = ',GetPMElem(U,i+1,j,w):5:5,' M[i,j] = ',GetMatrixElement(M,j,k+1):5:5);}
end;

end;
{writeln(Log,'[debug] denom on stage ',i+1, ' = ',denom:2:3); }
for k:=Low(C[i]) to High(C[i]) do
if (denom<>0) then C[i,k]:=C[i,k]/denom
else Throw('[ComputePrototypes] Обнаружена коллизия в данных : матрица разбиения имеет нулевую строку');
end;
{Print prototypes matrix to log}
writeln(Log,'Prototype matrix:');
for i:=Low(C) to High(C) do
begin
for j:=Low(C[i]) to High(C[i]) do write(Log,C[i,j]:2:3,' ');
writeln(Log);
end;
end;{ComputePrototypes}
{///////////////////////////////////////////////////////////////////////////////}
{
Description:
This procedure computes covariance matrices
Params:
Pointer to the data matrix
Pointer to the partition matrix
Pointer to the prototipes matrix
d
w
n_dim - n
Array of pointer to covariance matrices (transit)
}
procedure CalculateCovarianceMatrix(M : MatrixPtr; U : MatrixPtr; C : TPrototypeMatrix; d : integer;w : double; n_dim : integer;var CMArr : TCovarianceMatrixArray);
var
i,j,k : integer;
denom : double;
V,V_T,F : MatrixPtr;
begin
writeln(Log,'Computing covariance matrix...');
{Loop by prototypes}
for i:=Low(C) to High(C) do
begin
{--- Compute denominator ---}
denom:=0;
for j:=1 to d do denom:=denom+GetPMElem(U,i+1,j,w);
{writeln(Log,'[debug] denom = ',denom:5:5,' on stage ',i+1);}
{writeln(Log,'Denominator on stage ',i+1,' = ',denom:2:3);}
{--- Compute current covariance matrix ---}
{Vectors}
V:=CreateMatrix(n_dim,1);
V_T:=CreateMatrix(1,n_dim);
{Current stage matrix}
F:=CreateMatrix(n_dim,n_dim);
FillMatrix(F,0);

{Loop by all data vectors}
for j:=1 to d do
begin
{Fill vectors V and V_T}
for k:=1 to n_dim do
begin
SetMatrixElement(V,k,1,GetMatrixElement(M,j,k)-C[i,k-1]);
SetMatrixElement(V_T,1,k,GetMatrixElement(M,j,k)-C[i,k-1]);
end;
{writeln(Log,'----- Vector V on stage ',i+1);
PrintMatrix(V,2,3);
writeln(Log,'----- Vector V_T on stage ',i+1);
PrintMatrix(V_T,2,3); }
{Multiple vectors : V * V_T = tmp1 }
tmp1:=MultipleMatrixOnMatrix(V,V_T);
{writeln(Log,'----- V * V_T on stage ',i+1);
PrintMatrix(tmp1,2,3); }
{Multiple matrix on U[i,j]}
tmp2:=MultipleMatrixOnNumber(tmp1,GetPMElem(U,i+1,j,w));
{writeln(Log,'----- Nominator on stage ',i+1,', ',j);
PrintMatrix(tmp2,2,3); }
{Add old F and new F (e.g. tpm2)}
tmp3:=AddMatrixOnMatrix(F,tmp2);
{writeln(Log,'----- F on stage ',i+1,', ',j);
PrintMatrix(tmp3,2,3); }
{Free unused memory}
if not DeleteMatrix(tmp1) then Throw('[CalculateCovarianceMatrix] Невозможно удалить временную матрицу');
if not DeleteMatrix(tmp2) then Throw('[CalculateCovarianceMatrix] Невозможно удалить временную матрицу');
if not DeleteMatrix(F) then Throw('[CalculateCovarianceMatrix] Невозможно удалить временную матрицу');
{Define new current matrix F}
F:=tmp3;
end;{loop by data vectors}

{--- Compute F[i] ---}
if (denom <> 0) then CMArr[i]:=MultipleMatrixOnNumber(F,1/denom)
else Throw('[CalculatePrototypesMatrix] Обнаружена коллизия в данных : матрица разбиения имеет нулевую строку');
writeln(Log,'----- F[',i+1,'] -----');
PrintMatrix(CMArr[i],15,15);
{Free memory}
if not DeleteMatrix(F) then Throw('[CalculateCovarianceMatrix] Невозможно удалить временную матрицу F');
if not DeleteMatrix(V) then Throw('[CalculateCovarianceMatrix] Невозможно удалить временную матрицу V');
if not DeleteMatrix(V_T) then Throw('[CalculateCovarianceMatrix] Невозможно удалить временную матрицу V_T');
end;{loop by prototypes}
end;{CalculateCovarianceMatrix}

{///////////////////////////////////////////////////////////////////////////////}
{
Description:
This procedure computes distances between each cluser and data vertor
Params:
Pointer to the data matrix
Pointer to the prototipes matrix
n_dim - n
d
Pointer to distances matrix (transit)
}
procedure ComputeDistanceMatrix(M : MatrixPtr; C : TPrototypeMatrix; n_dim,d : integer; var Dist : MatrixPtr);
var
i,j,k : integer;
V,V_T : MatrixPtr;
begin
writeln(Log,'Computing distances...');
{Vectors}
V:=CreateMatrix(n_dim,1);
V_T:=CreateMatrix(1,n_dim);
{Loop by all prototypes}
for i:=Low(C) to High(C) do
begin
{Compute norming matrix}
if DetMatrix(CovMatrixArr[i]) < 0.0000001 then Throw('[ComputeDistances] Получена критическая ошибка! Опеделитель ковариационной матрицы = 0. Алгоритм не устойчив по отношению к входным данным');
tmp1:=ReverseMatrix(CovMatrixArr[i]);
{writeln(Log,'----- inv(F[',i+1,']) -----');
PrintMatrix(tmp1,5,5);
PrintMatrix(CovMatrixArr[i],5,5); }
if tmp1 = nil then Throw('[ComputeDistances] Невозможно найти обратную матрицу для CovMatrixArr[i]');
tmp2:=MultipleMatrixOnNumber(tmp1,Power(DetMatrix(CovMatrixArr[i]),1/n_dim));
{writeln(Log,'----- inv(F[',i+1,']) * det^(1/n) -----');
PrintMatrix(tmp2,5,5);
writeln(Log,'[debug] Determinant = ',DetMatrix(CovMatrixArr[i]):5:5);}
if tmp2 = nil then Throw('[ComputeDistances] Невозможно умножить матрицу на число');
if not DeleteMatrix(tmp1) then Throw('[ComputeDistances] Невозможно удалить матрицу');
{Loop by all data vectors}
for j:=1 to d do
begin
{Fill vectors V and V_T}
for k:=1 to n_dim do
begin
SetMatrixElement(V,k,1,C[i,k-1]-GetMatrixElement(M,j,k));
SetMatrixElement(V_T,1,k,C[i,k-1]-GetMatrixElement(M,j,k));
end;
tmp3:=MultipleMatrixOnMatrix(V_T,tmp2);
if tmp3 = nil then Throw('[ComputeDistances] Невозможно умножить матрицу на матрицу');
tmp1:=MultipleMatrixOnMatrix(tmp3,V);
if tmp1 = nil then Throw('[ComputeDistances] Невозможно умножить матрицу на матрицу');
if not DeleteMatrix(tmp3) then Throw('[ComputeDistances] Невозможно удалить матрицу');

{tmp1:= MultipleMatrixOnMatrix(V_T,V);}

SetMatrixElement(Dist,i+1,j,GetMatrixElement(tmp1,1,1));
DeleteMatrix(tmp1);
end;{loop by data vectors}
if not DeleteMatrix(tmp2) then Throw('[ComputeDistances] Невозможно удалить матрицу');
end;{loop by prototypes}
DeleteMatrix(V);
DeleteMatrix(V_T);
writeln(Log,'Distance matrix:');
PrintMatrix(Dist,2,3);
end;{ComputeDistanceMatrix}
{///////////////////////////////////////////////////////////////////////////////}
{
Description:
This procedure updates partition matrix
Params:
Pointer to the partition matrix (transit)
c_dim - c
d
w
}
procedure UpdatePartitionMatrix(var U : MatrixPtr; c_dim,d : integer; w : double);
var
i,j,k : integer;
num : double;
begin
for i:=1 to c_dim do
begin
for j:=1 to d do
begin
num:=0;
for k:=1 to c_dim do num:=num+Power(GetMatrixElement(Dist,i,j)/GetMatrixElement(Dist,k,j),1/(w-1));
SetMatrixElement(U,i,j,1/num);
end;
end;
writeln(Log,'Updated partition matrix:');
PrintMatrix(U,2,3);
end;{UpdatePartitionMatrix}
{///////////////////////////////////////////////////////////////////////////////}
{
Description:
This procedure allocates memory for matrices
Params:
Pointer to the prototipes matrix
Array of pointer to the covariance matrices
Pointer to the distance matrix
c_dim - c
n_dim - n
}
procedure Init(var C : TPrototypeMatrix; var CovMatrixArr : TCovarianceMatrixArray; var Dist : MatrixPtr; c_dim,n_dim : integer);
var
i : integer;
begin
{Allocate memory for prototypes matrix}
SetLength(C,c_dim,n_dim);

{Allocate memory for array of pointers to MatrixPtr}
SetLength(CovMatrixArr,c_dim);
for i:=Low(CovMatrixArr) to High(CovMatrixArr) do CovMatrixArr[i]:=nil;
{Allocate memory for distanse matrix}
Dist:=CreateMatrix(c_dim,d);
end;
{///////////////////////////////////////////////////////////////////////////////}
{
Description:
This procedure frees memory
Params:
Pointer to the prototipes matrix
Array of pointer to the covariance matrices
Pointer to the distance matrix
c_dim - c
n_dim - n
}
procedure Destroy(var C : TPrototypeMatrix; var CovMatrixArr : TCovarianceMatrixArray; var Dist : MatrixPtr; c_dim,n_dim : integer);
begin
{Deallocate memory for prototypes matrix}
C:=nil;
{Deallocate memory for array of pointers to MatrixPtr}
CovMatrixArr:=nil;
{Dellocate memory for distanse matrix}
if not DeleteMatrix(Dist)then Throw('[Destroy] Невозможно удалить матрицу расстояний');
end;
{///////////////////////////////////////////////////////////////////////////////}
{
Description:
This procedure computes norm of difference of partition matrices on stages n and n-1
Params:
Pointer to the current partition matrix
Pointer to the old partition matrix
}
function GetDiffNorm(U,U_Old : MatrixPtr) : double;
var
DiffMatrix : MatrixPtr;
max,tmp : double;
i,j : integer;
begin
DiffMatrix:=SubMatrixOnMatrix(U,U_Old);
max:=0;
for j:=1 to GetMatrixCol(DiffMatrix) do
begin
tmp:=0;
for i:=1 to GetMatrixRow(DiffMatrix) do tmp:=tmp+abs(GetMatrixElement(DiffMatrix,i,j));
if (tmp > max) then max:=tmp;
end;
DeleteMatrix(DiffMatrix);
Result:=max;
end;

procedure PrintFun();
var i,j:integer;
sum:double;
begin
sum:=0;
for i:=1 to c_dim do
for j:=1 to d do sum:=sum+GetPMElem(M,i,j,w)*GetMatrixElement(Dist,i,j);
writeln(Log,'Function value = ',sum:5:5);
end;
{///////////////////////////////////////////////////////////////////////////////}
begin
{Initialization}
Init(C,CovMatrixArr,Dist,c_dim,n_dim);
{Main calculation loop}
Stage:=0;
Repeat
inc(Stage);
writeln(Log,' Stage ',Stage);
{Compute prototypes}
ComputePrototypes(M,U,C,d,w);
{Calculate covariance matrix}
CalculateCovarianceMatrix(M,U,C,d,w,n_dim,CovMatrixArr);
{Compute distance matrix}
ComputeDistanceMatrix(M,C,n_dim,d,Dist);
{Save current partition matrix}
Old_U := CloneMatrix(U);
{Update partition matrix}
UpdatePartitionMatrix(U,c_dim,d,w);
{Calculate norm of difference of matrix on stages n and n-1}
DiffNorm := GetDiffNorm(U,Old_U);
writeln(Log,'DiffNorm = ',DiffNorm:2:3);
{Delete old partition matrix (previous stage matrix)}
DeleteMatrix(Old_U);
{PrintFun();}
Until ( DiffNorm < e );
{Free memory}
Destroy(C,CovMatrixArr,Dist,c_dim,n_dim);
end;{Solve}

function GetProbability() : double;
var
denom : double;
begin
Randomize;
denom:=RandomRange(1,100);
Result:=1.0/denom;
end;
{---------------------------------------------------------------------------------------------------------------}
procedure FillMatrixProbability(U : MatrixPtr);
var
{ i,j : integer;}
i,j,k : integer;
v,vv : array of double;
fr : double;

begin
{ for i:=1 to GetMatrixRow(U) do
for j:=1 to GetMatrixCol(U) do SetMatrixElement(U,i,j,GetProbability); }
for j:=1 to GetMatrixCol(U) do
begin
{Create array of random number}
SetLength(v,c);
v[c-1]:=1;
for i:=0 to c-2 do
begin
v[i]:=GetProbability;
end;
{Sort array}
for i:=0 to c-1 do
begin
for k:=0 to c-1 do
if (v[i]<v[k]) then
begin
v[i]:=v[i]-v[k];
v[k]:=v[i]+v[k];
v[i]:=v[k]-v[i];
end;
end;
{Create array of intervals}
SetLength(vv,c);
{Fill it}
fr:=0;
for i:=0 to c-1 do
begin
vv[i]:=v[i]-fr;
fr:=v[i];
end;
for i:=1 to GetMatrixRow(U) do
begin
SetMatrixElement(U,i,j,vv[i-1]);
end;
end;

end;

procedure OutputResults(OutputFile : string;U,M : MatrixPtr;DD : TDataDescr);
var
i,j,maxcol,maxrow,max_cl,n_dim : integer;
max : double;
fd : text;
begin
{Open input file}
Assign(fd,OutputFile);
{$I-}
Rewrite(fd);
{$I+}
if (IOResult <> 0) then Throw('Невозможно открыть выходной файл ' + OutputFile);
n_dim:=GetMatrixCol(M);
writeln(fd,'@RELATION "'{,DD[0],'"'});
writeln(fd);
for i:=1 to n_dim do writeln(fd,'@ATTRIBUTE "'{,DD[i],'" REAL'});
writeln(fd,'@ATTRIBUTE "Номер кластера" INTEGER');
writeln(fd,'@ATTRIBUTE "Степень принадлежности" REAL');
writeln(fd);
writeln(fd,'@DATA');
maxcol:=GetMatrixCol(U);
maxrow:=GetMatrixRow(U);
for j:=1 to maxcol do
begin
max:=0;
max_cl:=0;
for i:=1 to maxrow do
if (GetMatrixElement(U,i,j) > max) then
begin
max:=GetMatrixElement(U,i,j);
max_cl:=i;
end;
for i:=1 to n_dim do write(fd,GetMatrixElement(M,j,i):5:5,',');
writeln(fd,max_cl,',',max:2:3);
end;
close(fd);
end;

procedure OutputResultsInst(var inst1: TDMInstances;U,M : MatrixPtr);
var
att, att1: TDMAttribute;
i,j,maxcol,maxrow,max_cl,n_dim : integer;
max : double;
//s: string;
//F: text;

begin
att:=TDMAttribute.Create('Cluster number');
att1:=TDMAttribute.Create('Grade of membership');
inst1.insertAttributeAt(att,0);
inst1.insertAttributeAt(att1,1);
maxcol:=GetMatrixCol(U);
maxrow:=GetMatrixRow(U);
for j:=1 to maxcol do
begin
max:=0;
max_cl:=0;
for i:=1 to maxrow do
if (GetMatrixElement(U,i,j) > max) then
begin
max:=GetMatrixElement(U,i,j);
max_cl:=i;
end;
inst1.instance(j-1).setValue(0,max_cl);
inst1.instance(j-1).setValue(1,max);
end;
// S := inst1.ToString();
//AssignFile(F, 'irisgood');
//Rewrite(F);
//Write(F, S);
//CloseFile(F);
end;


end.
Соседние файлы в папке GK