-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathlossyCounter.hpp
121 lines (104 loc) · 2.83 KB
/
lossyCounter.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#ifndef LOSSY_COUNTER_HPP
#define LOSSY_COUNTER_HPP
#include <sstream>
#include <vector>
#include <queue>
#include <deque>
#include <unordered_map>
using namespace std;
template<typename Key>
class LossyCounter {
public:
typedef unordered_map<Key, size_t> Map;
typedef typename Map::iterator Iterator;
typedef pair<Key, size_t> Pair;
LossyCounter(const size_t capacity_) : capacity(capacity_) {
number = 0;
delta = 0;
}
void count(Key key) {
number ++;
if (counter.find(key) != counter.end()) {
counter[key] ++;
} else {
counter[key] = delta + 1;
}
if (number/capacity != delta) {
delta = number/capacity;
for (auto i = counter.begin(); i != counter.end();) {
if (i->second < delta) {
i = counter.erase(i);
} else {
i ++;
}
}
}
}
Iterator begin() {
return counter.begin();
}
Iterator end() {
return counter.end();
}
size_t size() {
return counter.size();
}
void takeFrequent(deque<Pair > &result, size_t size) {
struct Compare {
bool operator()(Pair &a, Pair &b) {
return a.second > b.second;
}
};
priority_queue<Pair, vector<Pair>, Compare> que;
for (auto i = counter.begin(); i != counter.end();) {
que.push(*i);
if (que.size() == size+1) {
que.pop();
}
i = counter.erase(i);
}
while (!que.empty()) {
result.push_front(que.top());
que.pop();
}
}
private:
Map counter;
size_t number, delta;
const size_t capacity;
};
inline vector<string> parseLine(const string &line,
const string &beginning, const string &ending) {
vector<string> result;
istringstream iss(line);
string word;
if (beginning != "") {
result.push_back(beginning);
}
while (getline(iss, word, ' ')) {
result.push_back(word);
}
if (ending != "") {
result.push_back(ending);
}
return result;
}
inline vector<string> extractNgram(const string &line, const size_t order,
const string &beginning, const string &ending) {
vector<string> result;
deque<string> context;
for (string word : parseLine(line, beginning, ending)) {
string ngram = word;
for (auto i = context.rbegin(); i != context.rend(); i++) {
result.push_back(ngram);
ngram = *i + " " + ngram;
}
result.push_back(ngram);
if (context.size() == order-1) {
context.pop_front();
}
context.push_back(word);
}
return result;
}
#endif