📌  相关文章
📜  根据好单词的出现频率对字符串数组进行排序

📅  最后修改于: 2021-04-17 10:45:54             🧑  作者: Mango

给定一组不同客户的产品评论( R )和包含用_分隔的好词的字符串S ,任务是按照评论的好坏程度从高到低的顺序对其进行排序。
善良值是由该评论中存在的好词的数量定义的。

例子:

幼稚的方法:将所有好的单词插入unordered_set中,然后遍历Reviews数组的每个句子的每个单词,并通过检查该单词是否在该组好的单词中来保留这些单词的数量。然后,我们使用稳定的排序算法,并根据R中存在的每个评论中的好的单词数对数组R进行排序。很明显,此方法的时间复杂度大于O(N * NlogN)

高效的方法:对所有好的单词进行一次Trie,并使用Trie在评论中检查每个单词的优劣。

  1. 插入所有好的单词。
  2. 对于每个评论,通过检查给定单词是否存在于特里来计算其中的好单词数量。

下面是上述方法的实现:

// C++ implementation of the approach
#include 
using namespace std;
#define F first
#define S second
#define MAX 26
  
// Comparator function for sorting
bool cmp(const pair& a, const pair& b)
{
  
    // Compare the number of good words
    if (a.F == b.F)
        return a.S < b.S;
    return a.F > b.F;
}
  
// Structure of s Trie node
struct node {
    bool exist;
    node* arr[MAX];
    node(bool bul = false)
    {
        exist = bul;
        for (int i = 0; i < MAX; i++)
            arr[i] = NULL;
    }
};
  
// Function to add a string to the trie
void add(string s, node* trie)
{
    // Add a node to the trie
    int n = s.size();
    for (int i = 0; i < n; i++) {
  
        // If trie doesn't already contain
        // the current node then create one
        if (trie->arr[s[i] - 'a'] == NULL)
            trie->arr[s[i] - 'a'] = new node();
  
        trie = trie->arr[s[i] - 'a'];
    }
    trie->exist = true;
    return;
}
  
// Function that returns true if
// the trie contains the string s
bool search(string s, node* trie)
{
    // Search for a node in the trie
    for (int i = 0; i < s.size(); i++) {
        if (trie->arr[s[i] - 'a'] == NULL)
            return false;
  
        trie = trie->arr[s[i] - 'a'];
    }
    return trie->exist;
}
  
// Function to replace every '_' with a
// white space in the given string
void convert(string& str)
{
    // Convert '_' to spaces
    for (int i = 0; i < str.size(); i++)
        if (str[i] == '_')
            str[i] = ' ';
    return;
}
  
// Function to sort the array based on good words
void sortArr(string good, vector& review)
{
  
    // Extract all the good words which
    // are '_' separated
    convert(good);
    node* trie = new node();
    string word;
    stringstream ss;
    ss << good;
  
    // Building the entire trie by stringstreaming
    // the 'good words' string
    while (ss >> word)
        add(word, trie);
    int k, n = review.size();
  
    // To store the number of good words
    // and the string index pairs
    vector > rating(n);
    for (int i = 0; i < n; i++) {
        convert(review[i]);
        ss.clear();
        ss << review[i];
        k = 0;
        while (ss >> word) {
  
            // If this word is present in the trie
            // then increment its count
            if (search(word, trie))
                k++;
        }
  
        // Store the number of good words in the
        // current string paired with its
        // index in the original array
        rating[i].F = k;
        rating[i].S = i;
    }
  
    // Using comparator function to
    // sort the array as required
    sort(rating.begin(), rating.end(), cmp);
  
    // Print the sorted array
    for (int i = 0; i < n; i++)
        cout << review[rating[i].S] << "\n";
}
  
// Driver code
int main()
{
  
    // String containing good words
    string S = "geeks_for_geeks_is_great";
  
    // Vector of strings to be sorted
    vector R = { "geeks_are_geeks", "geeks_dont_lose",
                         "geeks_for_geeks_is_love" };
  
    // Sort the array based on the given conditions
    sortArr(S, R);
}
输出:
geeks for geeks is love
geeks are geeks
geeks dont lose