📜  字符串数组中最常用的单词

📅  最后修改于: 2021-04-23 05:57:24             🧑  作者: Mango

给定一组单词,找到其中出现次数最多的单词

例子:

Input : arr[] = {"geeks", "for", "geeks", "a", 
                "portal", "to", "learn", "can",
                "be", "computer", "science", 
                 "zoom", "yup", "fire", "in", 
                 "be", "data", "geeks"}
Output : Geeks 
"geeks" is the most frequent word as it 
occurs 3 times

一个简单的解决方案是运行两个循环并计算每个单词的出现次数。该解决方案的时间复杂度为O(n * n * MAX_WORD_LEN)。

一个有效的解决方案是使用Trie数据结构。这个想法很简单,首先我们将在trie中插入。在trie中,我们保留以节点结尾的单词计数。我们进行预遍历并比较每个节点上存在的计数并找到出现的最大单词

// CPP code to find most frequent word in
// an array of strings
#include 
using namespace std;
  
/*structing the trie*/
struct Trie {
    string key;
    int cnt;
    unordered_map map;
};
  
/* Function to return a new Trie node */
Trie* getNewTrieNode()
{
    Trie* node = new Trie;
    node->cnt = 0;
    return node;
}
  
/* function to insert a string */
void insert(Trie*& root, string& str)
{
    // start from root node
    Trie* temp = root;
  
    for (int i = 0; i < str.length(); i++) {
  
        char x = str[i];
  
        /*a new node if path doesn't exists*/
        if (temp->map.find(x) == temp->map.end())
            temp->map[x] = getNewTrieNode();
  
        // go to next node
        temp = temp->map[x];
    }
  
    // store key and its count in leaf nodes
    temp->key = str;
    temp->cnt += 1;
}
  
/* function for preorder traversal */
bool preorder(Trie* temp, int& maxcnt, string& key)
{
    if (temp == NULL)
        return false;
  
    for (auto it : temp->map) {
  
        /*leaf node will have non-zero count*/
        if (maxcnt < it.second->cnt) {
            key = it.second->key;
            maxcnt = it.second->cnt;
        }
  
        // recurse for current node children
        preorder(it.second, maxcnt, key);
    }
}
  
void mostFrequentWord(string arr[], int n)
{
    // Insert all words in a Trie
    Trie* root = getNewTrieNode();
    for (int i = 0; i < n; i++)
        insert(root, arr[i]);
  
    // Do preorder traversal to find the
    // most frequent word
    string key;
    int cnt = 0;
    preorder(root, cnt, key);
  
    cout << "The word that occurs most is : "
         << key << endl;
    cout << "No of times: " << cnt << endl;
}
  
// Driver code
int main()
{
    // given set of keys
    string arr[] = { "geeks", "for", "geeks", "a",
                     "portal", "to", "learn", "can", "be",
                     "computer", "science", "zoom", "yup",
                     "fire", "in", "be", "data", "geeks" };
    int n = sizeof(arr) / sizeof(arr[0]);
  
    mostFrequentWord(arr, n);
  
    return 0;
}

输出:

The word that occurs most is : geeks
No of times: 3

时间复杂度:O(n * MAX_WORD_LEN)

另一个有效的解决方案是使用哈希。有关详细信息,请参考以投票代表候选人姓名的“寻找选举的获胜者”。

更简单的解决方案是使用HashMap。

方法:
使用HashMap,可以跟踪单词及其频率。下一步包括对其进行迭代并找出频率最高的单词。
下面是上述方法的实现。

Java
// Java implementation
import java.util.*;
  
class GKG {
  
    // Function returns word with highest frequency
    static String findWord(String[] arr)
    {
  
        // Create HashMap to store word and it's frequency
        HashMap hs = new HashMap();
  
        // Iterate through array of words
        for (int i = 0; i < arr.length; i++) {
            // If word already exist in HashMap then increase it's count by 1
            if (hs.containsKey(arr[i])) {
                hs.put(arr[i], hs.get(arr[i]) + 1);
            }
            // Otherwise add word to HashMap
            else {
                hs.put(arr[i], 1);
            }
        }
  
        // Create set to iterate over HashMap
        Set > set = hs.entrySet();
        String key = "";
        int value = 0;
  
        for (Map.Entry me : set) {
            // Check for word having highest frequency
            if (me.getValue() > value) {
                value = me.getValue();
                key = me.getKey();
            }
        }
  
        // Return word having highest frequency
        return key;
    }
  
    // Driver code
    public static void main(String[] args)
    {
        String arr[] = { "geeks", "for", "geeks", "a",
                         "portal", "to", "learn", "can", "be",
                         "computer", "science", "zoom", "yup",
                         "fire", "in", "be", "data", "geeks" };
        String sol = findWord(arr);
  
        // Print word having highest frequency
        System.out.println(sol);
    }
}
  
// This code is contributed by Divyank Sheth


C#
// C# implementation 
using System;
using System.Collections.Generic;
  
class GFG
{ 
  
    // Function returns word with highest frequency 
    static String findWord(String[] arr) 
    { 
  
        // Create Dictionary to store word 
        // and it's frequency 
        Dictionary hs = 
            new Dictionary(); 
  
        // Iterate through array of words 
        for (int i = 0; i < arr.Length; i++) 
        { 
            // If word already exist in Dictionary 
            // then increase it's count by 1 
            if (hs.ContainsKey(arr[i])) 
            { 
                hs[arr[i]] = hs[arr[i]] + 1; 
            } 
              
            // Otherwise add word to Dictionary 
            else 
            { 
                hs.Add(arr[i], 1); 
            } 
        } 
  
        // Create set to iterate over Dictionary 
        String key = ""; 
        int value = 0; 
  
        foreach(KeyValuePair me in hs)
        { 
            // Check for word having highest frequency 
            if (me.Value > value) 
            { 
                value = me.Value; 
                key = me.Key; 
            } 
        } 
  
        // Return word having highest frequency 
        return key; 
    } 
  
    // Driver code 
    public static void Main(String[] args) 
    { 
        String []arr = { "geeks", "for", "geeks", "a", 
                        "portal", "to", "learn", "can", "be", 
                        "computer", "science", "zoom", "yup", 
                        "fire", "in", "be", "data", "geeks" }; 
        String sol = findWord(arr); 
  
        // Print word having highest frequency 
        Console.WriteLine(sol); 
    } 
} 
  
// This code is contributed by Rajput-Ji


输出:

geeks