Добавил:
Опубликованный материал нарушает ваши авторские права? Сообщите нам.
Вуз: Предмет: Файл:

lab6 / Aho_Corasick

.cpp
Скачиваний:
5
Добавлен:
05.02.2020
Размер:
3.45 Кб
Скачать
#include "Aho_Corasick.h"

Trie::Trie(set<wstring> patterns): root(new Node(make_pair(-1, nullptr))) {
	for(auto &pattern: patterns) {
		Node* state = root;

		for(auto &letter: pattern) {
			auto outEdge = state->outEdges.find(letter);

			if (outEdge == state->outEdges.end()) {
				Node* node = new Node(make_pair(letter, state));
				state->outEdges.emplace(letter, node);
			}

			state = state->outEdges[letter];
		}

		state->pattern = pattern;
	}

	makeLinks(root);

	queue<Node*> myQueue;
	myQueue.push(root);

	while(!myQueue.empty()) {
		Node* parent = myQueue.front();
		myQueue.pop();

		for(auto &outEdge: parent->outEdges) {
			makeLinks(outEdge.second);		
			myQueue.push(outEdge.second);
		}
	}
}

void Trie::makeLinks(Node* &node) {
	if (node == root || node->inEdge.second == root) {
		node->failure = root;
		return;
	}

	Node* failure = node->inEdge.second->failure, *output = nullptr;
	char edgeToFind = node->inEdge.first;

	while(true) {
		auto outEdge = failure->outEdges.find(edgeToFind);
		
		if (outEdge != failure->outEdges.end()) {
			failure = outEdge->second;

			if (!failure->pattern.empty()) {
				output = failure;
			} else if (failure->output) {
				output = failure->output;
			}

			break;
		} else if (failure == root) {
			break;
		} else
			failure = failure->failure;
	}

	node->failure = failure;
	node->output = output;
}

map<wstring, set<lli>> AhoCorasick(wstring &text, Trie &trie) {
	map<wstring, set<lli>> solutions;

	Node* state = trie.root;

	lli i = 0, size = text.size();

	while (i < size) {
		auto outEdge = state->outEdges.find(text[i]);

		if (outEdge != state->outEdges.end()) {
			state = outEdge->second;

			Node* output = state->output;

			while(output) {
				solutions[output->pattern].emplace(i - output->pattern.size() + 2);
				output = output->output;
			}

			if (!state->pattern.empty()) {
				solutions[state->pattern].emplace(i - state->pattern.size() + 2);
				
				if (state->outEdges.empty()) {
					state = state->failure;
				}
			}

			i++;
		} else if (state == trie.root){
			i++;	
		} else {
			state = state->failure;
		}
	}

	return solutions;
}

set<wstring> split(wstring &myTemplate, char delim){
	wstring buffer{L""};
	set<wstring> patterns;
	int i = 0;

	for(auto letter:myTemplate) {
		if(letter != delim) {
			buffer += letter;
		} else if(letter == delim && buffer != L"") {
			patterns.emplace(buffer);
			buffer = L"";
		}
		i++;
	}

	if(buffer != L"") {
		patterns.emplace(buffer);
	}

	return patterns;
}

set<lli> AhoCorasickWithJoker(wstring &text, wstring &myTemplate, wchar_t joker) {
	set<wstring> patterns = split(myTemplate, joker);
	
	Trie trie(patterns);

	map<wstring, set<lli>> myTemplateSolutions = AhoCorasick(myTemplate, trie), 
						  textSolutions = AhoCorasick(text, trie);

	lli textSize = text.size(), countOfPatterns = 0;
	
	vector<lli> entries(textSize, 0);

	for(auto &pattern: patterns) {
		set<lli> templateSolution = myTemplateSolutions[pattern],
				 textSolution = textSolutions[pattern];
		
		countOfPatterns += templateSolution.size();

		for(auto &textPosition: textSolution) {
			for(auto &templatePosition: templateSolution) {
				lli index = textPosition - templatePosition;

				if (index > -1) {
					entries[index]++;
				}
			}
		}
	}

	set<lli> templateEntries;

	for(int p = 0; p < textSize; p++) {
		if (entries[p] == countOfPatterns) {
			templateEntries.emplace(p + 1);
		}
	}

	return templateEntries;
}
Соседние файлы в папке lab6