Skip to content

Commit

Permalink
fix linter issues
Browse files Browse the repository at this point in the history
  • Loading branch information
inhumantsar committed Jun 11, 2024
1 parent 740ddd3 commit 729dca3
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

module.exports = {
"parserOptions": {
"ecmaVersion": 6,
"ecmaVersion": 2017,
},
"env": {
"es6": true,
Expand Down
56 changes: 51 additions & 5 deletions Readability.js
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ Readability.prototype = {
// could assume it's the full title.
var headings = this._concatNodeLists(
doc.getElementsByTagName("h1"),
doc.getElementsByTagName("h2")
doc.getElementsByTagName("h2"),
);
var trimmedTitle = curTitle.trim();
var match = this._someNode(headings, function(heading) {
Expand Down Expand Up @@ -1401,7 +1401,7 @@ Readability.prototype = {
if (!parsed["@type"] && Array.isArray(parsed["@graph"])) {
parsed = parsed["@graph"].find(function(it) {
return (it["@type"] || "").match(
this.REGEXPS.jsonLdArticleTypes
this.REGEXPS.jsonLdArticleTypes,
);
});
}
Expand Down Expand Up @@ -1470,6 +1470,36 @@ Readability.prototype = {
return metadata ? metadata : {};
},

/**
* Swaps the "Surname, GivenName" formatted bylines to "GivenName Surname".
*
* @param {string|string[]} name
* @returns Name or names in "GivenName Surname" format
*/
_normalizeByline: function(name) {
var result = name;

if (Array.isArray(name)) {
return name.map((n) => this._normalizeByline(n));
}

// handle Surname, GivenName formatting
if (name.includes(",")) {
const parts = name.split(",").map(part => part.trim());
if (parts.length == 2) {
result = `${parts[1]} ${parts[0]}`;
}
if (parts.length > 2) {
result = `${parts[1]} ${parts[0]} ${parts.slice(2).join(" ")}`;
}
}

// remove things like "By:"
result = result.replace(/\w+:/, "");

return this._unescapeHtmlEntities(result);
},

/**
* Attempts to get excerpt and byline metadata for the article.
*
Expand Down Expand Up @@ -1499,6 +1529,7 @@ Readability.prototype = {
}
var matches = null;
var name = null;
var result = null;

if (elementProperty) {
matches = elementProperty.match(propertyPattern);
Expand All @@ -1507,7 +1538,7 @@ Readability.prototype = {
// so we can match below.
name = matches[0].toLowerCase().replace(/\s/g, "");
// multiple authors
values[name] = content.trim();
result = content.trim();
}
}
if (!matches && elementName && namePattern.test(elementName)) {
Expand All @@ -1516,8 +1547,23 @@ Readability.prototype = {
// Convert to lowercase, remove any whitespace, and convert dots
// to colons so we can match below.
name = name.toLowerCase().replace(/\s/g, "").replace(/\./g, ":");
values[name] = content.trim();
result = content.trim();
}
}

// handle properties which might have multiple distinct values, eg: citation_author
if (result) {
if (values[name]) {
if (Array.isArray(values[name]) && typeof result == "string") {
values[name].push(result);
}
if (typeof values[name] == "string") {
values[name] = [values[name], result];
}
} else {
values[name] = result;
}
this.log(`found metadata: ${name}=${values[name]}`);
}
});

Expand Down Expand Up @@ -1569,7 +1615,7 @@ Readability.prototype = {
// in many sites the meta value is escaped with HTML entities,
// so here we need to unescape it
metadata.title = this._unescapeHtmlEntities(metadata.title);
metadata.byline = this._unescapeHtmlEntities(metadata.byline);
metadata.byline = this._normalizeByline(metadata.byline);
metadata.excerpt = this._unescapeHtmlEntities(metadata.excerpt);
metadata.siteName = this._unescapeHtmlEntities(metadata.siteName);
metadata.publishedTime = this._unescapeHtmlEntities(metadata.publishedTime);
Expand Down
10 changes: 6 additions & 4 deletions test/generate-testcase.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@ function getWithRedirects(url, cb) {
console.log("HEADERS:", JSON.stringify(response.headers));
}

if(response.statusCode > 300 && response.statusCode <= 303) {
if (debug) console.log("following redirect", response.headers.location);
if (response.statusCode > 300 && response.statusCode <= 303) {
if (debug)
console.log("following redirect", response.headers.location);
await getWithRedirects(response.headers.location, cb);
}

Expand All @@ -73,7 +74,8 @@ function getWithRedirects(url, cb) {
response.on("data", (chunk) => rv += chunk);

response.on("end", () => {
if (debug) console.log("End received");
if (debug)
console.log("End received");
cb(rv);
});
});
Expand Down Expand Up @@ -192,4 +194,4 @@ if (process.argv[2] === "all") {
});
} else {
generateTestcase(process.argv[2]);
}
}
5 changes: 4 additions & 1 deletion test/test-pages/nature/expected-metadata.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
{
"title": "Worldwide divergence of values",
"byline": "Medvedev, Danila",
"byline": [
"Joshua Conrad Jackson",
"Danila Medvedev"
],
"dir": null,
"lang": "en",
"excerpt": "Social scientists have long debated the nature of cultural change in a modernizing and globalizing world. Some scholars predicted that national cultures would converge by adopting social values typical of Western democracies. Others predicted that cultural differences in values would persist or even increase over time. We test these competing predictions by analyzing survey data from 1981 to 2022 (n = 406,185) from 76 national cultures. We find evidence of global value divergence. Values emphasizing tolerance and self-expression have diverged most sharply, especially between high-income Western countries and the rest of the world. We also find that countries with similar per-capita GDP levels have held similar values over the last 40 years. Over time, however, geographic proximity has emerged as an increasingly strong correlate of value similarity, indicating that values have diverged globally but converged regionally. The authors test whether social values have become converged or diverged across national cultures over the last 40 years using a 76-country analysis of the World Values Survey. They show that values have diverged, especially between high-income Western countries and the rest of the world.",
Expand Down
3 changes: 0 additions & 3 deletions test/test-pages/nature/expected.html
Original file line number Diff line number Diff line change
Expand Up @@ -651,9 +651,6 @@ <h2 id="additional-information"> Additional information </h2>
<b>Publisher’s note</b> Springer Nature remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.
</p>
</div>
<div id="Sec25-section" data-title="Supplementary information">
<h2 id="Sec25"> Supplementary information </h2>
</div>
<div id="rightslink-section" data-title="Rights and permissions">
<h2 id="rightslink"> Rights and permissions </h2>
<div id="rightslink-content">
Expand Down
2 changes: 1 addition & 1 deletion test/test-pages/ourworldindata/expected-metadata.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"title": "Why do we need to know about progress if we are concerned about the world's largest problems?",
"byline": "By: Max Roser",
"byline": "Max Roser",
"dir": null,
"excerpt": "Why have we made it our mission to publish “research and data to make progress against the world’s largest problems”?",
"siteName": "Our World in Data",
Expand Down
2 changes: 1 addition & 1 deletion test/test-readability.js
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ describe("Readability API", function() {
it("should use custom video regex sent as option", function() {
var dom = new JSDOM(
"<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc mollis leo lacus, vitae semper nisl ullamcorper ut.</p>" +
"<iframe src=\"https://mycustomdomain.com/some-embeds\"></iframe>"
"<iframe src=\"https://mycustomdomain.com/some-embeds\"></iframe>",
);
var expected_xhtml = "<div id=\"readability-page-1\" class=\"page\">" +
"<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc mollis leo lacus, vitae semper nisl ullamcorper ut.</p>" +
Expand Down

0 comments on commit 729dca3

Please sign in to comment.