📌  相关文章
📜  使用多项式滚动哈希函数数组中存在的不同字符串

📅  最后修改于: 2021-09-03 14:49:52             🧑  作者: Mango

给定一个字符串数组arr[] ,任务是使用多项式滚动哈希函数找到数组中存在的不同字符串的计数。

例子:

方法:可以使用Hashing解决问题。这个想法是使用滚动散列函数来计算数组中所有字符串的散列值并将其存储在另一个数组中,比如Hash[] 。最后,打印Hash[]数组中不同元素的计数。请按照以下步骤解决问题:

  • 初始化一个数组,比如Hash[] ,以使用滚动哈希函数存储数组中存在的所有字符串的哈希值。
  • 初始化一个变量,比如cntElem ,以存储数组中存在的不同字符串的计数。
  • 遍历数组arr[] 。对于遇到的每个字符串,计算该字符串的哈希值,并将其存储的哈希[]数组英寸
  • 对数组hash[] 进行排序。
  • 遍历数组hash[] 。对于每个数组元素,检查hash[i]hash[i – 1]是否相等。如果发现为假,则将cntElem增加1
  • 最后,打印cntElem的值。

下面是上述方法的实现:

CPP
// C++ program to implement
// the above approach
 
#include
using namespace std;
 
// Function to find the hash value
// of a string
long long compute_hash(string str)
{
 
    int p = 31;
    int MOD = 1e9 + 9;
    long long hash_val = 0;
    long long mul = 1;
 
    // Traverse the string
    for (char ch : str) {
 
        // Update hash_val
        hash_val
            = (hash_val + (ch - 'a' + 1) * mul)
              % MOD;
 
        // Update mul
        mul = (mul * p) % MOD;
    }
 
    // Return hash_val of str
    return hash_val;
}
 
// Function to find the count of distinct
// strings present in the given array
int distinct_str(vector& arr, int n)
{
    // Store the hash values of
    // the strings
    vector hash(n);
 
    // Traverse the array
    for (int i = 0; i < n; i++) {
 
        // Stores hash value of arr[i]
        hash[i] = compute_hash(arr[i]);
    }
 
    // Sort hash[] array
    sort(hash.begin(), hash.end());
 
    // Stores count of distinct
    // strings in the array
    int cntElem = 1;
 
    // Traverse hash[] array
    for (int i = 1; i < n; i++) {
        if (hash[i] != hash[i - 1]) {
 
            // Update cntElem
            cntElem++;
        }
    }
 
    return cntElem;
}
 
// Driver Code
int main()
{
    vector arr{ "abcde", "abcce",
                        "abcdf", "abcde" };
 
    int N = arr.size();
 
    cout << distinct_str(arr, N) << endl;
 
    return 0;
}


Java
// Java program to implement
// the above approach
import java.util.Arrays;  
 
public class GFG {
     
    // Function to find the hash value
    // of a string
    static int compute_hash(String str)
    {
     
        int p = 31;
        int MOD = (int)1e9 + 9;
        int  hash_val = 0;
        int mul = 1;
     
        // Traverse the string
        for (int i = 0; i < str.length(); i++) {
     
            char ch = str.charAt(i);
             
            // Update hash_val
            hash_val
                = (hash_val + (ch - 'a' + 1) * mul)
                  % MOD;
     
            // Update mul
            mul = (mul * p) % MOD;
        }
     
        // Return hash_val of str
        return hash_val;
    }
     
    // Function to find the count of distinct
    // strings present in the given array
    static int distinct_str(String arr[], int n)
    {
        // Store the hash values of
        // the strings
        int hash[] = new int [n];
     
        // Traverse the array
        for (int i = 0; i < n; i++) {
     
            // Stores hash value of arr[i]
            hash[i] = compute_hash(arr[i]);
        }
     
        // Sort hash[] array
        Arrays.sort(hash);
     
        // Stores count of distinct
        // strings in the array
        int cntElem = 1;
     
        // Traverse hash[] array
        for (int i = 1; i < n; i++) {
            if (hash[i] != hash[i - 1]) {
     
                // Update cntElem
                cntElem++;
            }
        }
     
        return cntElem;
    }
 
    // Driver Code
    public static void main (String[] args)
    {
        String arr[] = { "abcde", "abcce",
                            "abcdf", "abcde" };
     
        int N = arr.length;
     
        System.out.println(distinct_str(arr, N));   
    }
}
 
// This code is contributed by AnkThon


Python3
# Python3 program to implement
# the above approach
 
# Function to find the hash value
# of a
def compute_hash(str):
 
    p = 31
    MOD = 10**9 + 9
    hash_val = 0
    mul = 1
 
    # Traverse the
    for ch in str:
 
        # Update hash_val
        hash_val = (hash_val + (ord(ch) - ord('a') + 1) * mul) % MOD
 
        # Update mul
        mul = (mul * p) % MOD
 
    # Return hash_val of str
    return hash_val
 
# Function to find the count of distinct
# strings present in the given array
def distinct_str(arr, n):
   
    # Store the hash values of
    # the strings
    hash = [0]*(n)
 
    # Traverse the array
    for i in range(n):
 
        # Stores hash value of arr[i]
        hash[i] = compute_hash(arr[i])
 
    # Sort hash[] array
    hash = sorted(hash)
 
    # Stores count of distinct
    # strings in the array
    cntElem = 1
 
    # Traverse hash[] array
    for i in range(1, n):
        if (hash[i] != hash[i - 1]):
 
            # Update cntElem
            cntElem += 1
   
    return cntElem
 
# Driver Code
if __name__ == '__main__':
    arr=["abcde", "abcce","abcdf", "abcde"]
 
    N = len(arr)
 
    print(distinct_str(arr, N))
 
# This code is contributed by mohit kumar 29


C#
// C# program to implement
// the above approach
using System;
 
class GFG
{
     
    // Function to find the hash value
    // of a string
    static int compute_hash(string str)
    {
     
        int p = 31;
        int MOD = (int)1e9 + 9;
        int  hash_val = 0;
        int mul = 1;
     
        // Traverse the string
        for (int i = 0; i < str.Length; i++)
        {   
            char ch = str[i];
             
            // Update hash_val
            hash_val = (hash_val + (ch -
                        'a' + 1) * mul) % MOD;
     
            // Update mul
            mul = (mul * p) % MOD;
        }
     
        // Return hash_val of str
        return hash_val;
    }
     
    // Function to find the count of distinct
    // strings present in the given array
    static int distinct_str(string []arr, int n)
    {
       
        // Store the hash values of
        // the strings
        int []hash = new int [n];
     
        // Traverse the array
        for (int i = 0; i < n; i++)
        {
     
            // Stores hash value of arr[i]
            hash[i] = compute_hash(arr[i]);
        }
     
        // Sort hash[] array
        Array.Sort(hash);
     
        // Stores count of distinct
        // strings in the array
        int cntElem = 1;
     
        // Traverse hash[] array
        for (int i = 1; i < n; i++)
        {
            if (hash[i] != hash[i - 1])
            {
     
                // Update cntElem
                cntElem++;
            }
        }   
        return cntElem;
    }
 
    // Driver Code
    public static void Main (String[] args)
    {
        string []arr = { "abcde", "abcce",
                            "abcdf", "abcde" }; 
        int N = arr.Length; 
        Console.WriteLine(distinct_str(arr, N));   
    }
}
 
// This code is contributed by AnkThon


输出:
3

时间复杂度: O(N * M),其中M是字符串的最大长度
辅助空间: O(N)