计算在字符串恰好出现 K 次的 M 长度子字符串

📌 相关文章

📜 计算在字符串恰好出现 K 次的 M 长度子字符串

📅 最后修改于: 2021-09-07 02:34:27 🧑 作者: Mango

给定一个长度为N的字符串S和两个整数M和K ，任务是计算长度为M的子字符串在字符串S 中恰好出现K次的数量。

例子：

Input: S = “abacaba”, M = 3, K = 2
Output: 1
Explanation: All distinct substrings of length 3 are “aba”, “bac”, “aca”, “cab”.
Out of all these substrings, only “aba” occurs twice in the string S.
Therefore, the count is 1.

Input: S = “geeksforgeeks”, M = 2, K = 1
Output: 4
Explanation:
All distinct substrings of length 2 are “ge”, “ee”, “ek”, “ks”, “sf”, “fo”, “or”, “rg”.
Out of all these strings, “sf”, “fo”, “or”, “rg” occurs once in the string S.
Therefore, the count is 4.

编程需要懂一点英语

朴素方法：最简单的方法是生成所有长度为M的子串，并将每个子串在字符串S中的频率存储在一个 Map 中。现在，遍历 Map 并且如果频率等于K ，则将count增加1 。完成以上步骤后，打印count作为结果。
时间复杂度： O((N – M)*N*M)
辅助空间： O(N – M)

高效方法：上述方法可以通过使用算法KMP用于找到字符串中的子串的频率进行优化。请按照以下步骤解决问题：

初始化一个变量，比如count为0 ，以存储所需子字符串的数量。
生成所有长度为 M 的子串从字符串S 中取出它们并将它们插入到一个数组中，比如arr[]。
遍历数组arr[]并针对数组中的每个字符串，使用KMP算法计算其在字符串S 中的频率。
如果字符串的频率等于P ，则将计数增加1 。
完成上述步骤后，打印count的值作为结果子串的计数。

下面是上述方法的实现：

C++

// C++ program for the above approach
#include 
using namespace std;
 
// Function to compute the LPS array
void computeLPSArray(string pat, int M,
                     int lps[])
{
    // Length of the previous
    // longest prefix suffix
    int len = 0;
    int i = 1;
    lps[0] = 0;
 
    // Iterate from [1, M - 1] to find lps[i]
    while (i < M) {
 
        // If the characters match
        if (pat[i] == pat[len]) {
 
            len++;
            lps[i] = len;
            i++;
        }
 
        // If pat[i] != pat[len]
        else {
 
            // If length is non-zero
            if (len != 0) {
                len = lps[len - 1];
 
                // Also, note that i is
                // not incremented here
            }
 
            // Otherwise
            else {
                lps[i] = len;
                i++;
            }
        }
    }
}
 
// Function to find the frequency of
// pat in the string txt
int KMPSearch(string pat, string txt)
{
    // Stores length of both strings
    int M = pat.length();
    int N = txt.length();
 
    // Initialize lps[] to store the
    // longest prefix suffix values
    // for the string pattern
    int lps[M];
 
    // Store the index for pat[]
    int j = 0;
 
    // Preprocess the pattern
    // (calculate lps[] array)
    computeLPSArray(pat, M, lps);
 
    // Store the index for txt[]
    int i = 0;
    int res = 0;
    int next_i = 0;
 
    while (i < N) {
        if (pat[j] == txt[i]) {
            j++;
            i++;
        }
        if (j == M) {
 
            // If pattern is found the
            // first time, iterate again
            // to check for more patterns
            j = lps[j - 1];
            res++;
 
            // Start i to check for more
            // than once occurrence
            // of pattern, reset i to
            // previous start + 1
            if (lps[j] != 0)
                i = ++next_i;
            j = 0;
        }
 
        // Mismatch after j matches
        else if (i < N
                 && pat[j] != txt[i]) {
 
            // Do not match lps[0..lps[j-1]]
            // characters, they will
            // match anyway
            if (j != 0)
                j = lps[j - 1];
            else
                i = i + 1;
        }
    }
 
    // Return the required frequency
    return res;
}
 
// Function to find count of substrings
// of length M occurring exactly P times
// in the string, S
void findCount(string& S, int M, int P)
{
 
    // Store all substrings of length M
    set vec;
 
    // Store the size of the string, S
    int n = S.length();
 
    // Pick starting point
    for (int i = 0; i < n; i++) {
 
        // Pick ending point
        for (int len = 1;
             len <= n - i; len++) {
 
            // If the substring is of
            // length M, insert it in vec
            string s = S.substr(i, len);
            if (s.length() == M) {
                vec.insert(s);
            }
        }
    }
 
    // Initialise count as 0 to store
    // the required count of substrings
    int count = 0;
 
    // Iterate through the set of
    // substrings
    for (auto it : vec) {
 
        // Store its frequency
        int ans = KMPSearch(it, S);
 
        // If frequency is equal to P
        if (ans == P) {
 
            // Increment count by 1
            count++;
        }
    }
 
    // Print the answer
    cout << count;
}
 
// Driver Code
int main()
{
    string S = "abacaba";
    int M = 3, P = 2;
 
    // Function Call
    findCount(S, M, P);
 
    return 0;
}

Java

// Java Program to implement
// the above approach
 
import java.io.*;
import java.util.*;
 
class GFG {
 
    // Function to compute the LPS array
    static void computeLPSArray(String pat, int M,
                                int lps[])
    {
        // Length of the previous
        // longest prefix suffix
        int len = 0;
        int i = 1;
        lps[0] = 0;
 
        // Iterate from [1, M - 1] to find lps[i]
        while (i < M) {
 
            // If the characters match
            if (pat.charAt(i) == pat.charAt(len)) {
 
                len++;
                lps[i] = len;
                i++;
            }
 
            // If pat[i] != pat[len]
            else {
 
                // If length is non-zero
                if (len != 0) {
                    len = lps[len - 1];
 
                    // Also, note that i is
                    // not incremented here
                }
 
                // Otherwise
                else {
                    lps[i] = len;
                    i++;
                }
            }
        }
    }
 
    // Function to find the frequency of
    // pat in the string txt
    static int KMPSearch(String pat, String txt)
    {
        // Stores length of both strings
        int M = pat.length();
        int N = txt.length();
 
        // Initialize lps[] to store the
        // longest prefix suffix values
        // for the string pattern
        int lps[] = new int[M];
 
        // Store the index for pat[]
        int j = 0;
 
        // Preprocess the pattern
        // (calculate lps[] array)
        computeLPSArray(pat, M, lps);
 
        // Store the index for txt[]
        int i = 0;
        int res = 0;
        int next_i = 0;
 
        while (i < N) {
            if (pat.charAt(j) == txt.charAt(i)) {
                j++;
                i++;
            }
            if (j == M) {
 
                // If pattern is found the
                // first time, iterate again
                // to check for more patterns
                j = lps[j - 1];
                res++;
 
                // Start i to check for more
                // than once occurrence
                // of pattern, reset i to
                // previous start + 1
                if (lps[j] != 0)
                    i = ++next_i;
                j = 0;
            }
 
            // Mismatch after j matches
            else if (i < N
                     && pat.charAt(j) != txt.charAt(i)) {
 
                // Do not match lps[0..lps[j-1]]
                // characters, they will
                // match anyway
                if (j != 0)
                    j = lps[j - 1];
                else
                    i = i + 1;
            }
        }
 
        // Return the required frequency
        return res;
    }
 
    // Function to find count of substrings
    // of length M occurring exactly P times
    // in the string, S
    static void findCount(String S, int M, int P)
    {
 
        // Store all substrings of length M
        // set vec;
        TreeSet vec = new TreeSet<>();
 
        // Store the size of the string, S
        int n = S.length();
 
        // Pick starting point
        for (int i = 0; i < n; i++) {
 
            // Pick ending point
            for (int len = 1; len <= n - i; len++) {
 
                // If the substring is of
                // length M, insert it in vec
                String s = S.substring(i, i + len);
                if (s.length() == M) {
                    vec.add(s);
                }
            }
        }
 
        // Initialise count as 0 to store
        // the required count of substrings
        int count = 0;
 
        // Iterate through the set of
        // substrings
        for (String it : vec) {
 
            // Store its frequency
            int ans = KMPSearch(it, S);
 
            // If frequency is equal to P
            if (ans == P) {
 
                // Increment count by 1
                count++;
            }
        }
 
        // Print the answer
        System.out.println(count);
    }
 
    // Driver Code
    public static void main(String[] args)
    {
 
        String S = "abacaba";
        int M = 3, P = 2;
 
        // Function Call
        findCount(S, M, P);
    }
}
 
// This code is contributed by kingash.

Python3

# Python 3 program for the above approach
 
# Function to compute the LPS array
def computeLPSArray(pat, M, lps):
   
    # Length of the previous
    # longest prefix suffix
    len1 = 0
    i = 1
    lps[0] = 0
 
    # Iterate from [1, M - 1] to find lps[i]
    while (i < M):
       
        # If the characters match
        if (pat[i] == pat[len1]):
            len1 += 1
            lps[i] = len1
            i += 1
 
        # If pat[i] != pat[len]
        else:
            # If length is non-zero
            if (len1 != 0):
                len1 = lps[len1 - 1]
 
                # Also, note that i is
                # not incremented here
 
            # Otherwise
            else:
                lps[i] = len1
                i += 1
 
# Function to find the frequency of
# pat in the string txt
def KMPSearch(pat, txt):
   
    # Stores length of both strings
    M = len(pat)
    N = len(txt)
 
    # Initialize lps[] to store the
    # longest prefix suffix values
    # for the string pattern
    lps = [0 for i in range(M)]
 
    # Store the index for pat[]
    j = 0
 
    # Preprocess the pattern
    # (calculate lps[] array)
    computeLPSArray(pat, M, lps)
 
    # Store the index for txt[]
    i = 0
    res = 0
    next_i = 0
 
    while (i < N):
        if (pat[j] == txt[i]):
            j += 1
            i += 1
        if (j == M):
           
            # If pattern is found the
            # first time, iterate again
            # to check for more patterns
            j = lps[j - 1]
            res += 1
 
            # Start i to check for more
            # than once occurrence
            # of pattern, reset i to
            # previous start + 1
            if (lps[j] != 0):
                next_i += 1
                i = next_i
            j = 0
 
        # Mismatch after j matches
        elif (i < N and pat[j] != txt[i]):
            # Do not match lps[0..lps[j-1]]
            # characters, they will
            # match anyway
            if (j != 0):
                j = lps[j - 1]
            else:
                i = i + 1
 
    # Return the required frequency
    return res
 
# Function to find count of substrings
# of length M occurring exactly P times
# in the string, S
def findCount(S, M, P):
   
    # Store all substrings of length M
    vec = set()
 
    # Store the size of the string, S
    n = len(S)
 
    # Pick starting point
    for i in range(n):
       
        # Pick ending point
        for len1 in range(n - i + 1):
           
            # If the substring is of
            # length M, insert it in vec
            s = S[i:len1]
             
          #  if (len1(s) == M):
           #     vec.add(s)
 
    # Initialise count as 0 to store
    # the required count of substrings
    count = 1
 
    # Iterate through the set of
    # substrings
    for it in vec:
       
        # Store its frequency
        ans = KMPSearch(it, S)
 
        # If frequency is equal to P
        if (ans == P):
           
            # Increment count by 1
            count += 1
 
    # Print the answer
    print(count)
 
# Driver Code
if __name__ == '__main__':
    S = "abacaba"
    M = 3
    P = 2
 
    # Function Call
    findCount(S, M, P)
     
    # This code is contributed by ipg2016107.

C#

// C# program for the above approach
using System;
using System.Collections.Generic;
class GFG
{
 
  // Function to compute the LPS array
  static void computeLPSArray(string pat, int M, int[] lps)
  {
 
    // Length of the previous
    // longest prefix suffix
    int len = 0;
    int i = 1;
    lps[0] = 0;
 
    // Iterate from [1, M - 1] to find lps[i]
    while (i < M)
    {
 
      // If the characters match
      if (pat[i] == pat[len])
      {
        len++;
        lps[i] = len;
        i++;
      }
 
      // If pat[i] != pat[len]
      else {
 
        // If length is non-zero
        if (len != 0) {
          len = lps[len - 1];
 
          // Also, note that i is
          // not incremented here
        }
 
        // Otherwise
        else {
          lps[i] = len;
          i++;
        }
      }
    }
  }
 
  // Function to find the frequency of
  // pat in the string txt
  static int KMPSearch(string pat, string txt)
  {
 
    // Stores length of both strings
    int M = pat.Length;
    int N = txt.Length;
 
    // Initialize lps[] to store the
    // longest prefix suffix values
    // for the string pattern
    int[] lps = new int[M];
 
    // Store the index for pat[]
    int j = 0;
 
    // Preprocess the pattern
    // (calculate lps[] array)
    computeLPSArray(pat, M, lps);
 
    // Store the index for txt[]
    int i = 0;
    int res = 0;
    int next_i = 0;
 
    while (i < N) {
      if (pat[j] == txt[i]) {
        j++;
        i++;
      }
      if (j == M) {
 
        // If pattern is found the
        // first time, iterate again
        // to check for more patterns
        j = lps[j - 1];
        res++;
 
        // Start i to check for more
        // than once occurrence
        // of pattern, reset i to
        // previous start + 1
        if (lps[j] != 0)
          i = ++next_i;
        j = 0;
      }
 
      // Mismatch after j matches
      else if (i < N
               && pat[j] != txt[i]) {
 
        // Do not match lps[0..lps[j-1]]
        // characters, they will
        // match anyway
        if (j != 0)
          j = lps[j - 1];
        else
          i = i + 1;
      }
    }
 
    // Return the required frequency
    return res;
  }
 
  // Function to find count of substrings
  // of length M occurring exactly P times
  // in the string, S
  static void findCount(string S, int M, int P)
  {
 
    // Store all substrings of length M
    HashSet vec = new HashSet();
 
    // Store the size of the string, S
    int n = S.Length;
 
    // Pick starting point
    for (int i = 0; i < n; i++) {
 
      // Pick ending point
      for (int len = 1;
           len <= n - i; len++) {
 
        // If the substring is of
        // length M, insert it in vec
        string s = S.Substring(i, len);
        if (s.Length == M) {
          vec.Add(s);
        }
      }
    }
 
    // Initialise count as 0 to store
    // the required count of substrings
    int count = 0;
 
    // Iterate through the set of
    // substrings
    foreach(string it in vec) {
 
      // Store its frequency
      int ans = KMPSearch(it, S);
 
      // If frequency is equal to P
      if (ans == P) {
 
        // Increment count by 1
        count++;
      }
    }
 
    // Print the answer
    Console.WriteLine(count);
  }
 
  // Driver code
  static void Main() {
    string S = "abacaba";
    int M = 3, P = 2;
 
    // Function Call
    findCount(S, M, P);
  }
}
 
// This code is contributed by divyeshrabadiya07.

Javascript

输出：

时间复杂度： O((N*M) + (N ² – M ² ))
辅助空间： O(N – M)

如果您想与行业专家一起参加直播课程，请参阅Geeks Classes Live