1. 程式人生 > >leetcode 692. Top K Frequent Words

leetcode 692. Top K Frequent Words

692. Top K Frequent Words

Given a non-empty list of words, return the k most frequent elements.

Your answer should be sorted by frequency from highest to lowest. If two words have the same frequency, then the word with the lower alphabetical order comes first.

Example 1:

Input: ["i", "love", "leetcode", "i", "love", "coding"], k = 2
Output:
["i", "love"] Explanation: "i" and "love" are the two most frequent words. Note that "i" comes before "love" due to a lower alphabetical order.

Example 2:

Input: ["the", "day", "is", "sunny", "the", "the", "the", "sunny", "is", "is"], k = 4
Output: ["the", "is", "sunny", "day"]
Explanation: "the", "is", "sunny" and "day" are the four most frequent words,
    with the number of occurrence being 4, 3, 2 and 1 respectively.

Note:

  1. You may assume k is always valid, 1 ≤ k ≤ number of unique elements.
  2. Input words contain only lowercase letters.

Follow up:

  1. Try to solve it in O(n log k) time and O(n) extra space.

//首先想到用set紅黑樹排序來做
//這個方法的複雜度是nlogn
class word{
public:
    string content;
    int frequent;
    
    word(string a, int b):content(a), frequent(b) {}
    bool operator<(const word b) const
    {
        if (this->frequent == b.frequent) {
            int k = 0;
            while (k < this->content.size() && k < b.content.size()){
                if (this->content[k] == b.content[k])
                {
                    k++;
                    continue;
                }
                return this->content[k] < b.content[k];
            }
        }
        return this->frequent > b.frequent;
    }
};

class Solution {
public:
    vector<string> topKFrequent(vector<string>& wordss, int k)
    {   //第一階段
        unordered_map<string, int> mp; //hash O(n)
        for (auto word : wordss)
        {
            mp[word]++;
        }
        //第二階段
        multiset<word> st;  //必須是multiset,不然會出錯。某些情況下,相同的frequence是不能新增進set的
        for (auto it : mp)  //O(nlogn)
        {
            st.insert(word(it.first, it.second));
        }
        for (auto it : st)
            cout<<it.content<<"-"<<it.frequent<<"   ";
        //第三階段
        vector<string> ret;
        for (auto it : st)
        {
            ret.push_back( it.content );
            if (--k == 0) break;
        }
        return ret;
    }
};

//一看到複雜度nlogk,就應該知道用堆排序,維護一個k大小的堆。
//應該想到堆排序。
//make_heap:O(k)
//pop_heap:O(logk)
//push_heap:O(logk)
class word{
public:
    string content;
    int frequent;

    word(string a, int b):content(a), frequent(b) {}
};

bool compare(const word a, const word b)
{
    if (a.frequent == b.frequent) {
        int k = 0;
        while (k < a.content.size() && k < b.content.size()){
            if (a.content[k] == b.content[k])
            {
                k++;
                continue;
            }
            return a.content[k] < b.content[k];
        }
        return k == a.content.size();
    }
    return a.frequent > b.frequent;
}

class Solution {
public:
    vector<string> topKFrequent(vector<string>& wordss, int k)
    {   //第一階段
        unordered_map<string, int> mp; //hash O(n)
        for (auto word : wordss)
        {
            mp[word]++;
        }
        //第二階段
        vector<word> vt;
        for (auto it : mp)
        {
            if (vt.size() < k)
            {
                vt.push_back(word(it.first, it.second));
                if (vt.size() == k)
                    make_heap(vt.begin(), vt.end(), compare);
            }
            else
            {
                if ( compare(word(it.first, it.second), vt[0]) ) //當前遍歷的值比heap裡面最小的大,就應該替換出這個最小的
                {
                    pop_heap(vt.begin(), vt.end(), compare);
                    vt.pop_back();
                    vt.push_back(word(it.first, it.second));
                    push_heap(vt.begin(), vt.end(), compare);   //重新構成堆
                }
            }
        }
        //第三階段 彈出堆頂,從後往前放置到ret中
        vector<string> ret(k, "");
        for (int i = k - 1; i >= 0; i --)
        {
            pop_heap(vt.begin(), vt.end(), compare);
            ret[i] = vt.back().content;
            vt.pop_back();
        }
        return ret;
    }
};