Добавил:
Опубликованный материал нарушает ваши авторские права? Сообщите нам.
Вуз: Предмет: Файл:
dissa2 old.docx
Скачиваний:
27
Добавлен:
12.06.2018
Размер:
790.09 Кб
Скачать

Продолжение Приложения а

std::map<int,float>::const_iterator itv = vCls[ (*fst).second ].begin();

itv = vCls[ (*fst).second ].find( (*itr).first );

if(itv != vCls[ (*fst).second ].end() )

comp++;

}

if( vCls[ (*fst).second ].size()*th2 < comp){

vMapCls[cnt].push_back(vCls[ (*scnd).second ]);

vIdStr[cnt].push_back(vId[(*scnd).second ]);

}

}

}

cnt++;

}

}

vCls.clear();

}

int main(int argc, char * argv[])

{

if (argc < 7 || argc > 15) {error ("number of argv isn't correct: -fl input_file -w2v classes_w2v -n N -th1 n -th2 n -th3 n -th4 n -out output_file!\n"

"format input_file - IDdocs(numbers)\\ttext\n"

"format classes_w2v - word\\tclass\\tweight\n"

"n - the numbers of docs in one epoch; default eof\n"

"th1 - the min numbers of w2v classes in claster; default = 4;\n"

"th2 - the proximity of clasters 0-1; default = 0.7;\n"

"th3 - the min numbers of docs in clasters; default = 3;\n"

"th4 - the similarity of clasters according to the docs; default = 0.6;\n"

);

exit(0);}

string line;

string strText;

char * namefile;

char * w2vfile;

ofstream out;

int epo=-1;

int th1=0;

float th2=0;

int th3=0;

float th4=0;

Продолжение Приложения а

string argum = argv [1];

if ("-fl" == argum)

namefile = argv [2];

else{

error ("No input file! First argument: -fl input");

exit(0);

}

string arguw = argv [3];

if ("-w2v" == arguw)

w2vfile = argv [4];

else{

error ("No w2v file! Second argument: -w2v inw2v");

exit(0);

}

string outFile;

bool flgoutfile = false;

for (int i=5; i< argc-1; i++){

string snum = argv [i];

if ("-out" == snum){

outFile = (argv[i+1]);

flgoutfile = true;

i++;

}

else if ("-n" == snum){

epo = atoi (argv[i+1]);

i++;

}

else if ("-th1" == snum){

th1 = atoi (argv[i+1]);

i++;

}

else if ("-th2" == snum){

th2 = atof (argv[i+1]);

i++;

}

else if ("-th3" == snum){

th3 = atoi (argv[i+1]);

i++;

}

else if ("-th4" == snum){

th4 = atof (argv[i+1]);

i++;

Продолжение Приложения а

}

}

//default

if (th1==0)

th1=4;

if (th2==0)

th2=0.7;

if (th3==0)

th3=3;

if (th4==0)

th4=0.6;

if (flgoutfile == false){

error ("no output file!");

exit(0);

}

else

out.open(outFile.c_str());

map <string, string> w2v;

ifstream myfile(namefile, ifstream::in );

if (!myfile) {

error ("No intext file!",namefile);

exit (1);

}

ifstream wfile(w2vfile, ifstream::in);

if (!wfile) {

error ("No inw2v file!",w2vfile);

exit (1);

}

if (wfile.is_open()){

while (getline(wfile, line)) {

string strCls;

string strWrd;

int nPosTab;

int nPosEnd = line.length();

int nPos=0;

while (nPos < line.length()){

nPosTab = line.find('\t', nPos);

Продолжение Приложения а

if (nPosEnd==-1)

nPosEnd=line.length();

if (-1 != nPosTab ) {

strWrd = line.substr(nPos,nPosTab-nPos);

strCls = line.substr(nPosTab-nPos+1,nPosEnd-nPos);

nPos=nPosEnd+1;

}

else if (nPos != nPosEnd) {

error("format is not correct!");

exit(1);

}

}

w2v.insert ( std::pair<string,string>(strWrd,strCls) );

}

}

std::vector < std::vector < int > > vOutIdStr;

std::vector < std::vector <std::map<int,float> > > vOutMapCls;

std::vector < std::vector < int > > vIdStr;

std::vector < int > vId;

std::vector < std::vector <std::map<int,float> > > vMapCls;

std::vector < std::map <int,float> > vCls;

int idstr;

int cntepo=0;

if (myfile.is_open()){

while (getline(myfile, line)) {

cntepo++;

std::map <int,float> cls;

int cntmap=0;

string strWrd;

string strID;

int nPosTab;

int nPosSpace;

int nPosEnd = line.length();

int nPos=0;

while (nPos < line.length()){

nPosTab = line.find('\t', nPos);

nPosSpace = line.find(' ', nPos);

if (nPosEnd==-1)

nPosEnd=line.length();

else if (-1 != nPosTab && nPosTab < nPosEnd) {

strID = line.substr(nPos,nPosTab-nPos);

Соседние файлы в предмете [НЕСОРТИРОВАННОЕ]