-
Notifications
You must be signed in to change notification settings - Fork 1
/
mha.js
416 lines (386 loc) · 13.9 KB
/
mha.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
const Telegraf = require('telegraf');
const config = require('./config/config.json');
const mha_config = require('./config/mha.json');
const MongoClient = require('mongodb').MongoClient;
const uuidv4 = require('uuid').v4;
const fs = require('fs');
const https = require('https');
const http = require('http');
const path = require('path');
const basename = path.basename;
const CsvReadableStream = require('csv-reader');
const createCsvWriter = require('csv-writer').createObjectCsvWriter;
const url = require('url');
const URL = url.URL;
const fetch = require('node-fetch');
// NOTICE: THIS FILE IS OLD AND NOT USED ANYMORE. USE THE MemeHub-Awards REPOSITORY INSTEAD!
if (process.argv[2] === 'users') {
export_users();
}
if (process.argv[2] === 'nominees') {
export_top(500, "like", new Date("2020-12-17"), new Date("2021-12-16"));
}
if (process.argv[2] === 'weeb') {
export_top(20, "weeb", new Date("2020-12-17"), new Date("2021-12-16"));
}
if (process.argv[2] === 'media') {
export_media();
}
if (process.argv[2] === 'broadcast') {
broadcast();
}
if (process.argv[2] === 'mentions') {
mentions();
}
if (process.argv[2] === 'evaluate') {
evaluate();
}
async function export_nominees() {
console.log('Exporting nominees...');
console.log('Connecting to mongodb...');
const client = new MongoClient(config.mongodb.connection_string, { useNewUrlParser: true, useUnifiedTopology: true });
await client.connect();
const db = client.db(config.mongodb.database);
console.log('Connectet!');
const nominees = {};
const collection = db.collection(config.mongodb.collection_names.memes);
for (category of mha_config.nominees.categories) {
console.log(`Getting memes of category ${category}...`);
const votes_field = category == "Weeb" ? "$votes.weeb" : "$votes.like";
const match = {
categories: category,
post_date: {
$gt: "2020-12-17",
$lt: "2021-12-16"
}
};
match[`votes.${category == 'Weeb' ? 'weeb' : 'like'}`] = { $exists: true };
const result = await collection.aggregate([
{ $match: match },
{
$project: {
id: '$_id',
user_id: '$poster_id',
votes: { $size: votes_field },
_id: false
}
},
{ $sort: { votes: -1 } },
{ $limit: 10 },
{
$project: {
id: 1,
user_id: 1
}
}
]);
const memes = await result.toArray();
nominees[`#${category}`] = memes.map(meme => meme.id);
console.log(`Got ${nominees[`#${category}`].length} Nominees for ${category}!`);
}
const json = JSON.stringify(nominees, null, ' ');
for (const file of mha_config.nominees.nominees_paths) {
await fs.promises.writeFile(file, json);
}
console.log('Done!');
}
/**
* Queries the best memes posted during a given timeframe and downloads them.
* The names of the files will include the amount of votes, the categories and
* the meme id.
* @param {*} amount The amount of memes to download.
* @param {*} votes_field The vote filed used to determine best memes.
* @param {*} date_start The start of the timeframe in which the memes have been posted.
* @param {*} date_end The end of the timeframe in which the memes have been posted.
*/
async function export_top(amount, votes_field, date_start, date_end) {
console.log(`Exporting nominees (${amount} best, using the vote file ${votes_field})...`);
console.log('Connecting to mongodb...');
const client = new MongoClient(config.mongodb.connection_string, { useNewUrlParser: true, useUnifiedTopology: true });
await client.connect();
const db = client.db(config.mongodb.database);
console.log('Connectet!');
const collection = db.collection(config.mongodb.collection_names.memes);
const match = {
post_date: {
$gte: date_start,
$lt: date_end
}
};
match[`votes.${votes_field}`] = { $exists: true };
const result = await collection.aggregate([
{ $match: match },
{
$project: {
id: '$_id',
user_id: '$poster_id',
votes: { $size: `$votes.${votes_field}` },
categories: true,
_id: false
}
},
{ $sort: { votes: -1 } },
{ $limit: amount },
{
$lookup: {
from: config.mongodb.collection_names.users,
localField: "user_id",
foreignField: "_id",
as: "users"
}
},
{
$replaceRoot: {
newRoot: { $mergeObjects: [{ $arrayElemAt: ["$users", 0] }, "$$ROOT"] }
}
}
]);
const memes = await result.toArray();
if (memes.length !== amount) {
console.log(`WARNING: Not enough memes found: ${memes.length} / ${amount}`);
}
for (const meme of memes) {
console.log(meme);
// Get metadata for file
const meta_result = await fetch(`https://media.memehub.leifb.dev/${meme.id}/meta`);
const meta = await meta_result.json();
// Download file itself
const file_name = `${meme.votes}_${meme.username}_${meme.categories.join("-")}_[${meme.id}]`;
await download(`https://media.memehub.leifb.dev/${meme.id}/file`, `./nominees/${file_name}.${meta.ext}`);
}
console.log('Done!');
}
async function export_users() {
console.log('Exporting users...');
console.log('Connecting to mongodb...');
const client = new MongoClient(config.mongodb.connection_string, { useNewUrlParser: true, useUnifiedTopology: true });
await client.connect();
const db = client.db(config.mongodb.database);
console.log('Connectet!');
const users = await get_users(db);
const final_object = {};
users.forEach(user => final_object[uuidv4()] = user);
console.log('Writing to file...');
const json = JSON.stringify(final_object, null, ' ');
await fs.promises.writeFile("users.json", json);
console.log('Done!');
}
async function export_media() {
const bot = new Telegraf(config.bot_token);
const nominees = require(mha_config.media.nominees_path);
console.log(`Downloading media for ${Object.keys(nominees).length} nominees...`);
await fs.promises.mkdir(mha_config.media.media_path, { recursive: true });
const media = {}
for (const category in nominees) {
for (const id of nominees[category]) {
try {
media[id] = await download_image(bot, id);
}
catch (err) {
console.error(`failed downloading file for id "${id}"`);
console.error(err);
}
}
}
const json = JSON.stringify(media, null, ' ');
for (file of mha_config.media.media_files) {
await fs.promises.writeFile(file, json);
}
console.log("Done!");
}
async function broadcast() {
const users = require("./config/users.json");
const bot = new Telegraf(config.bot_token);
for (token in users) {
try {
const id = users[token].id;
await bot.telegram.sendMessage(id, `${mha_config.broadcast.message}${mha_config.broadcast.url_base}${token}`, { parse_mode: 'markdown' });
}
catch (err) {
console.log('Cannot broadcast message.');
console.log(err);
}
}
}
async function mentions() {
console.log('Aggregating mentions...');
console.log('Connecting to mongodb...');
const client = new MongoClient(config.mongodb.connection_string, { useNewUrlParser: true, useUnifiedTopology: true });
await client.connect();
const db = client.db(config.mongodb.database);
const mentions = require('./js/mentions');
const memes = db.collection(config.mongodb.collection_names.memes);
await mentions.most_voting(memes);
await mentions.best_average(memes);
await mentions.most_likes(memes);
await mentions.most_memes(memes);
await mentions.most_weeb_votes(memes);
await mentions.most_condemn_votes(memes);
await mentions.most_oc(memes);
await mentions.lowest_average_likes(memes);
await mentions.new_and_most_memes(memes);
await mentions.new_and_most_likes(memes);
await mentions.new_and_best_avg(memes);
await mentions.most_memes_in_a_day(memes);
await mentions.self_like(memes);
await mentions.best_meme(memes);
}
async function evaluate() {
const inputStream = fs.createReadStream('report.csv', 'utf8');
const client = new MongoClient(config.mongodb.connection_string, { useNewUrlParser: true, useUnifiedTopology: true });
await client.connect();
const db = client.db(config.mongodb.database);
const memes = db.collection(config.mongodb.collection_names.memes);
const csvWriter = createCsvWriter({
path: 'report_enriched.csv',
alwaysQuote: false,
fieldDelimiter: ",",
header: [
{ id: 'id', title: 'id' },
{ id: 'category', title: 'Kategorie' },
{ id: 'votes', title: 'Votes' },
{ id: 'user', title: 'User' },
{ id: 'likes', title: 'likes' },
{ id: 'weebs', title: 'weebs' }
]
});
const csv = [];
inputStream
.pipe(CsvReadableStream({ parseNumbers: true, parseBooleans: true, trim: true }))
.on('data', async function (row) {
// console.log(row);
if (row[0] === 'id') return;
csv.push({
id: row[0],
category: row[1],
votes: row[2],
});
})
.on('end', async function () {
for (const meme of csv) {
const stats = await getMemeStats(memes, meme.id);
meme.user = `@${stats.user.username}`;
meme.likes = stats.likes;
meme.weebs = stats.weebs;
}
await csvWriter.writeRecords(csv);
console.log('done');
});
}
async function get_users(db) {
const collection = db.collection(config.mongodb.collection_names.users);
console.log('Getting all users...');
const result = collection.aggregate([
{ $match: {} },
{
$project: {
id: '$_id',
name: '$first_name',
_id: false
}
}
]);
const users = await result.toArray();
console.log(`Got ${users.length} users!`);
return users;
}
async function download_image(bot, id) {
const file_data = await bot.telegram.getFile(id);
const file_type = get_file_type(file_data, id);
if (!['jpg', 'png', 'mp4', 'gif'].includes(file_type)) throw 'unknown file type! ' + file_data.file_path;
const local_file_path = `${mha_config.media.media_path}${id}.${file_type}`
const local_file = fs.createWriteStream(local_file_path);
const result = await doRequest(`https://api.telegram.org/file/bot${config.bot_token}/${file_data.file_path}`);
result.pipe(local_file);
return `${mha_config.media.media_prefix}${id}.${file_type}`;
}
function get_file_type(file_data, id) {
const segments = file_data.file_path.split('.');
if (segments.length < 2) {
console.error(`No file extension found for meme "${id}"! using jpg.`);
return "jpg";
}
return segments.slice(-1)[0];
}
async function doRequest(url) {
return new Promise((resolve, reject) => {
const req = https.get(url);
req.on('response', res => {
resolve(res);
});
req.on('error', err => {
reject(err);
});
});
}
async function getMemeStats(memes, id) {
const result = await memes.aggregate([
{
$match: {
_id: id,
}
},
{
$lookup: {
from: 'users',
localField: 'poster_id',
foreignField: '_id',
as: 'user'
}
},
{
$replaceRoot: {
newRoot: {
user: {
$arrayElemAt: ["$user", 0]
},
likes: {
$size: {
$ifNull: ["$votes.like", []]
}
},
weebs: {
$size: {
$ifNull: ["$votes.weeb", []]
}
}
}
}
}
]);
return await result.next();
}
const TIMEOUT = 10000;
function download(url, dest) {
const uri = new URL(url)
if (!dest) {
dest = basename(uri.pathname)
}
const pkg = url.toLowerCase().startsWith('https:') ? https : http
return new Promise((resolve, reject) => {
const request = pkg.get(uri.href).on('response', (res) => {
if (res.statusCode === 200) {
const file = fs.createWriteStream(dest, { flags: 'wx' })
res
.on('end', () => {
file.end()
// console.log(`${uri.pathname} downloaded to: ${path}`)
resolve()
})
.on('error', (err) => {
file.destroy()
fs.unlink(dest, () => reject(err))
}).pipe(file)
} else if (res.statusCode === 302 || res.statusCode === 301) {
// Recursively follow redirects, only a 200 will resolve.
download(res.headers.location, dest).then(() => resolve())
} else {
reject(new Error(`Download request failed, response status: ${res.statusCode} ${res.statusMessage}`))
}
})
request.setTimeout(TIMEOUT, function () {
request.abort()
reject(new Error(`Request timeout after ${TIMEOUT / 1000.0}s`))
})
})
}