-
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathindex.js
51 lines (42 loc) · 1.17 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import cheerio from 'cheerio';
const matchers = [
'.instapaper_title',
'article h1',
'.entry-content h1',
'.markdown-body h1',
'.entry-title',
'.post-title',
'.pageTitle',
'.post_title',
'.headline h1',
'.headline',
'.story h1',
'.entry-header h1',
'.news_title',
'#page-post h1',
'.postheader h1',
'.postheader h2',
'.type-post h1',
];
const clean = string => string.replace(/\r?\n/g, '').replace(/\s+/g, ' ').trim();
const findSelectorMatch = $ => {
for (const matcher of matchers) {
const element = $(matcher).first().text().trim();
if (element && element.length > 0) {
return element;
}
}
};
export default function articleTitle(html) {
const $ = cheerio.load(html);
let documentTitle = $('title').text().replace(/\r?\n/g, '');
documentTitle = (/^[^|\-/•—]+/.exec(documentTitle) || [])[0] || documentTitle;
documentTitle = (((documentTitle || '').match(/:(?<documentTitle>.*)/) || []).groups || '').documentTitle || documentTitle;
documentTitle = (documentTitle || '').trim();
let title = documentTitle;
const heading = findSelectorMatch($);
if (heading && heading.length > 5 && heading.length < 100) {
title = heading;
}
return clean(title);
}