📜  搜索给定字符串中的模式

📅  最后修改于: 2022-05-13 01:56:09.718000             🧑  作者: Mango

搜索给定字符串中的模式

给定两个字符串,文本 pattern ,大小分别为NM (N > M),任务是打印所有出现的pattern 文本中。

例子:

方法:这个问题的方法基于以下思想:

上面的想法可以使用队列来实现。按照下面提到的步骤来实现这个想法。

  • 首先,使用 256 大小的 unordered_set 数组。遍历文本并将每个索引插入到相应字符的集合中。
C++
for (int i = 0; i < text.length(); i++)
    // Insert every index to the hash set using character
    // ASCII.
    structured_text].insert(i);


C++
for (int ind : structured_text[pattern[0]])
    q_indices.push(ind);


C++
for (int i = 1; i < pattern.length(); i++) {
    char c = pattern[i];
    int q_size = q_indices.size();
    /*  the queue contains the number of occurrences of the
    previous character. traverse the queue for q_size times
    Check the next character of the pattern found or not. */
    while (q_size--) {
        int ind = q_indices.front();
        q_indices.pop();
        if (structured_text.find(ind + 1)
            != structured_text.end())
            q_indices.push(ind + 1);
    }
}


C++
// C++ code for the above approach:
  
#include 
using namespace std;
  
// Using a 256 sized array of
// hash sets.
unordered_set structured_text[256];
  
// Function to perform the hashing
void StringSearch(string text)
{
    // Structure the text. It will be
    // helpful in pattern searching
    for (int i = 0; i < text.length(); i++)
  
        // Insert every index to the
        // hash set using character ASCII.
        structured_text].insert(i);
}
  
// Function to search the pattern
void pattern_search(string text, string pattern)
{
    StringSearch(text);
  
    // Queue contain the indices
    queue q_indices;
  
    for (int ind : structured_text[pattern[0]])
        q_indices.push(ind);
  
    // Pattern length
    int pat_len = pattern.length();
    for (int i = 1; i < pat_len; i++) {
        char c = pattern[i];
        int q_size = q_indices.size();
  
        // The queue contains the
        // number of occurrences of
        // the previous character.
        // Traverse the queue for
        // q_size times.
        // Check the next character of
        // the pattern found or not.
        while (q_size--) {
            int ind = q_indices.front();
            q_indices.pop();
  
            if (structured_text.find(ind + 1)
                != structured_text.end())
                q_indices.push(ind + 1);
        }
    }
    cout << "Pattern found at indexes:";
    while (!q_indices.empty()) {
  
        // last_ind is the last index
        // of the pattern in the text
        int last_ind = q_indices.front();
        q_indices.pop();
        cout << " " << last_ind - (pat_len - 1);
    }
    cout << endl;
}
  
// Driver code
int main()
{
    // Passing the Text
    string text = "Welcome to Geeks for Geeks";
    string pattern = "Geeks";
  
    // Function call
    pattern_search(text, pattern);
    return 0;
}


  • 在数组中搜索模式的第一个字符,并将哈希集中包含的每个索引推送到队列中。

C++

for (int ind : structured_text[pattern[0]])
    q_indices.push(ind);
  • 然后遍历模式i = 1 到 M-1
    • 如果structured_text[pattern[i]]中的索引与pat[i-1]中存在的任何字符相邻,则将其推送到队列以进行下一次迭代。
    • 否则,继续检查其他位置。

C++

for (int i = 1; i < pattern.length(); i++) {
    char c = pattern[i];
    int q_size = q_indices.size();
    /*  the queue contains the number of occurrences of the
    previous character. traverse the queue for q_size times
    Check the next character of the pattern found or not. */
    while (q_size--) {
        int ind = q_indices.front();
        q_indices.pop();
        if (structured_text.find(ind + 1)
            != structured_text.end())
            q_indices.push(ind + 1);
    }
}
  • 如果找到整个模式,则返回这些索引。

以下是上述方法的实现:

C++

// C++ code for the above approach:
  
#include 
using namespace std;
  
// Using a 256 sized array of
// hash sets.
unordered_set structured_text[256];
  
// Function to perform the hashing
void StringSearch(string text)
{
    // Structure the text. It will be
    // helpful in pattern searching
    for (int i = 0; i < text.length(); i++)
  
        // Insert every index to the
        // hash set using character ASCII.
        structured_text].insert(i);
}
  
// Function to search the pattern
void pattern_search(string text, string pattern)
{
    StringSearch(text);
  
    // Queue contain the indices
    queue q_indices;
  
    for (int ind : structured_text[pattern[0]])
        q_indices.push(ind);
  
    // Pattern length
    int pat_len = pattern.length();
    for (int i = 1; i < pat_len; i++) {
        char c = pattern[i];
        int q_size = q_indices.size();
  
        // The queue contains the
        // number of occurrences of
        // the previous character.
        // Traverse the queue for
        // q_size times.
        // Check the next character of
        // the pattern found or not.
        while (q_size--) {
            int ind = q_indices.front();
            q_indices.pop();
  
            if (structured_text.find(ind + 1)
                != structured_text.end())
                q_indices.push(ind + 1);
        }
    }
    cout << "Pattern found at indexes:";
    while (!q_indices.empty()) {
  
        // last_ind is the last index
        // of the pattern in the text
        int last_ind = q_indices.front();
        q_indices.pop();
        cout << " " << last_ind - (pat_len - 1);
    }
    cout << endl;
}
  
// Driver code
int main()
{
    // Passing the Text
    string text = "Welcome to Geeks for Geeks";
    string pattern = "Geeks";
  
    // Function call
    pattern_search(text, pattern);
    return 0;
}
输出
Pattern found at indexes: 21 11

时间复杂度: O(N * logK),其中 K 是任何字符的最大出现次数
辅助空间: O(d),d代表一个256大小的unordered_set数组