您好,登錄后才能下訂單哦!
AC自動機(Aho-Corasick算法)是一種用于在文本中查找多個模式字符串的高效算法。它基于Trie樹(字典樹)和有限狀態自動機實現。以下是一個簡單的C語言實現:
#include<stdio.h>
#include <stdlib.h>
#include<string.h>
#define ALPHABET_SIZE 256
typedef struct TrieNode {
struct TrieNode *children[ALPHABET_SIZE];
int isEndOfPattern;
int patternIndex;
struct TrieNode *fail;
} TrieNode;
TrieNode *createTrieNode() {
TrieNode *node = (TrieNode *)malloc(sizeof(TrieNode));
node->isEndOfPattern = 0;
node->patternIndex = -1;
node->fail = NULL;
for (int i = 0; i < ALPHABET_SIZE; i++) {
node->children[i] = NULL;
}
return node;
}
void insertPattern(TrieNode *root, const char *pattern, int patternIndex) {
TrieNode *node = root;
for (int i = 0; pattern[i] != '\0'; i++) {
if (node->children[(int)pattern[i]] == NULL) {
node->children[(int)pattern[i]] = createTrieNode();
}
node = node->children[(int)pattern[i]];
}
node->isEndOfPattern = 1;
node->patternIndex = patternIndex;
}
void buildFailureLinks(TrieNode *root) {
TrieNode *queue[1000];
int front = 0, rear = 0;
for (int i = 0; i < ALPHABET_SIZE; i++) {
if (root->children[i]) {
root->children[i]->fail = root;
queue[rear++] = root->children[i];
}
}
while (front< rear) {
TrieNode *currentNode = queue[front++];
for (int i = 0; i < ALPHABET_SIZE; i++) {
if (currentNode->children[i]) {
TrieNode *childNode = currentNode->children[i];
queue[rear++] = childNode;
TrieNode *failNode = currentNode->fail;
while (failNode && !failNode->children[i]) {
failNode = failNode->fail;
}
if (!failNode) {
childNode->fail = root;
} else {
childNode->fail = failNode->children[i];
if (childNode->fail->isEndOfPattern) {
childNode->isEndOfPattern = 1;
childNode->patternIndex = childNode->fail->patternIndex;
}
}
}
}
}
}
void searchPatterns(const char *text, TrieNode *root) {
TrieNode *node = root;
for (int i = 0; text[i] != '\0'; i++) {
while (node && !node->children[(int)text[i]]) {
node = node->fail;
}
node = node->children[(int)text[i]];
if (!node) {
node = root;
}
TrieNode *temp = node;
while (temp && temp->isEndOfPattern) {
printf("Pattern %d found at index %d\n", temp->patternIndex, i - strlen(patterns[temp->patternIndex]) + 1);
temp = temp->fail;
}
}
}
int main() {
const char *patterns[] = {"he", "she", "his", "hers"};
int numPatterns = sizeof(patterns) / sizeof(patterns[0]);
TrieNode *root = createTrieNode();
for (int i = 0; i < numPatterns; i++) {
insertPattern(root, patterns[i], i);
}
buildFailureLinks(root);
const char *text = "ushers";
searchPatterns(text, root);
return 0;
}
這個程序首先創建一個Trie樹,然后將要搜索的模式字符串插入到Trie樹中。接下來,它構建失敗鏈接,這是AC自動機的關鍵部分。最后,它在給定的文本中搜索模式字符串。當找到匹配時,它會輸出模式字符串的索引和在文本中的位置。
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。