-
Notifications
You must be signed in to change notification settings - Fork 13
/
Heap_FindKMostFrequentWordsInFile.java
55 lines (46 loc) · 1.81 KB
/
Heap_FindKMostFrequentWordsInFile.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import java.util.*;
// https://leetcode.com/problems/top-k-frequent-words/ (best explanation)
// https://stackoverflow.com/questions/185697/the-most-efficient-way-to-find-top-k-frequent-words-in-a-big-word-sequence
public class Heap_FindKMostFrequentWordsInFile {
public static List<String> frequentWords(List<String> vec,int k) {
// store the frequency of each words
Map<String, Integer> frequencyMap = getFrequencyOfWords(vec);
// get K most frequently words in min heap
Queue<String> minHeap = getKMostFrequentlyWords(frequencyMap, k);
List<String> result = new ArrayList<>();
while(!minHeap.isEmpty()) {
result.add(0, minHeap.poll());
}
return result;
}
private static Map<String, Integer> getFrequencyOfWords(List<String> list) {
Map<String, Integer> map = new HashMap<>();
for(String word : list) {
map.put(word, map.getOrDefault(word, 0) + 1);
}
return map;
}
private static Queue<String> getKMostFrequentlyWords(Map<String, Integer> map, int k) {
Queue<String> minHeap = new PriorityQueue<>(
(w1, w2) -> map.get(w1).equals(map.get(w2)) ? w2.compareTo(w1) : map.get(w1) - map.get(w2)
);
for(String word : map.keySet()) {
minHeap.add(word);
if(minHeap.size() > k) {
minHeap.poll();
}
}
return minHeap;
}
public static void main(String[] args) {
Scanner sc=new Scanner(System.in);
int n=sc.nextInt();
List<String> list = new ArrayList<>();
for(int i=0;i<n;i++)
list.add(sc.next());
int k=sc.nextInt();
List<String> ans = frequentWords(list, k);
for(int i=0;i<k;i++)
System.out.println(ans.get(i));
}
}