Skip to content

Commit

Permalink
Optimize link injection by inserting elements before arXiv API request.
Browse files Browse the repository at this point in the history
  • Loading branch information
j3soon committed Oct 13, 2023
1 parent 559faf8 commit 66d4326
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 75 deletions.
89 changes: 50 additions & 39 deletions chrome/content.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
// This content script modifies the title of the abstract / PDF page once it has finished loading.

// Store new title for onMessage.
var newTitle = undefined;
// Regular expressions for parsing arXiv IDs from URLs.
// Ref: https://info.arxiv.org/help/arxiv_identifier_for_services.html#urls-for-standard-arxiv-functions
const ID_REGEXP_REPLACE = [
Expand All @@ -10,10 +8,16 @@ const ID_REGEXP_REPLACE = [
[/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "$1$2", "PDF"],
[/^.*:\/\/ar5iv\.labs\.arxiv\.org\/html\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "$1", "HTML5"],
];
// Store new title for onMessage to deal with Chrome PDF viewer bug.
var newTitle = undefined;
// Define onMessage countdown for Chrome PDF viewer bug.
var messageCallbackCountdown = 3;
// All console logs should start with this prefix.
const LOG_PREFIX = "[arXiv-utils]";
// Element IDs for injected links
const DIRECT_DOWNLOAD_LI_ID = "arxiv-utils-direct-download-li";
const DIRECT_DOWNLOAD_A_ID = "arxiv-utils-direct-download-a";
const EXTRA_SERVICES_DIV_ID = "arxiv-utils-extra-services-div";

// Return the id parsed from the url.
function getId(url) {
Expand Down Expand Up @@ -59,7 +63,8 @@ async function getArticleInfoAsync(id, pageType) {
const match = el.getAttribute("href").match(versionRegexp);
if (match && match[1])
version = match[1];
} return {
}
return {
escapedTitle,
newTitle,
firstAuthor,
Expand All @@ -69,31 +74,12 @@ async function getArticleInfoAsync(id, pageType) {
version,
}
}
// Add a custom links in abstract page.
async function addCustomLinksAsync(id, articleInfo) {
// Add direct download link.
const result = await chrome.storage.sync.get({
'filename_format': '${title}, ${firstAuthor} et al., ${publishedYear}, v${version}.pdf'
});
const fileName = result.filename_format
.replace('${title}', articleInfo.escapedTitle)
.replace('${firstAuthor}', articleInfo.firstAuthor)
.replace('${authors}', articleInfo.authors)
.replace('${publishedYear}', articleInfo.publishedYear)
.replace('${updatedYear}', articleInfo.updatedYear)
.replace('${version}', articleInfo.version)
.replace('${paperid}', id)
// Ref: https://en.wikipedia.org/wiki/Filename#Reserved_characters_and_words
// Ref: https://stackoverflow.com/a/42210346
.replace(/[/\\?*:|"<>]/g, '_'); // Replace invalid characters.
;
const directURL = `https://arxiv.org/pdf/${id}.pdf`;
const directDownloadLiId = "arxiv-utils-direct-download-li";
const directDownloadAId = "arxiv-utils-direct-download-a";
document.getElementById(directDownloadLiId)?.remove();
// Add custom links in abstract page.
function addCustomLinksAsync(id) {
document.getElementById(DIRECT_DOWNLOAD_LI_ID)?.remove();
const directDownloadHTML = ` \
<li id="${directDownloadLiId}"> \
<a id="${directDownloadAId}" href="#">Direct Download</a> \
<li id="${DIRECT_DOWNLOAD_LI_ID}"> \
<a id="${DIRECT_DOWNLOAD_A_ID}">Direct Download</a> \
</li>`;
const downloadUL = document.querySelector(".full-text > ul");
if (!downloadUL) {
Expand All @@ -102,25 +88,16 @@ async function addCustomLinksAsync(id, articleInfo) {
}
downloadUL.innerHTML += directDownloadHTML;
console.log(LOG_PREFIX, "Added direct download link.")
document.getElementById(directDownloadAId).addEventListener('click', function(e) {
chrome.runtime.sendMessage({
url: directURL,
filename: fileName,
});
e.preventDefault();
console.log(LOG_PREFIX, `Sending download message to download: ${fileName} from ${directURL}.`)
});
// Add extra services links.
const elExtraRefCite = document.querySelector(".extra-ref-cite");
if (!elExtraRefCite) {
console.error(LOG_PREFIX, "Error: Cannot find the References & Citations section at the right side of the abstract page.");
return;
}
const extraServicesId = "arxiv-utils-extra-services-div";
document.getElementById(extraServicesId)?.remove();
document.getElementById(EXTRA_SERVICES_DIV_ID)?.remove();
const extraServicesDiv = document.createElement("div");
extraServicesDiv.classList.add('extra-ref-cite');
extraServicesDiv.id = extraServicesId;
extraServicesDiv.id = EXTRA_SERVICES_DIV_ID;
extraServicesDiv.innerHTML = ` \
<h3>Extra Services</h3> \
<ul> \
Expand All @@ -129,6 +106,38 @@ async function addCustomLinksAsync(id, articleInfo) {
<li><a href="https://export.arxiv.org/api/query/id_list/${id}">RSS feed</a></li> \
</ul>`;
elExtraRefCite.after(extraServicesDiv);
console.log(LOG_PREFIX, "Added extra services links.")
}

async function enableDirectDownload(id, articleInfo) {
// Add direct download link.
const result = await chrome.storage.sync.get({
'filename_format': '${title}, ${firstAuthor} et al., ${publishedYear}, v${version}.pdf'
});
const fileName = result.filename_format
.replace('${title}', articleInfo.escapedTitle)
.replace('${firstAuthor}', articleInfo.firstAuthor)
.replace('${authors}', articleInfo.authors)
.replace('${publishedYear}', articleInfo.publishedYear)
.replace('${updatedYear}', articleInfo.updatedYear)
.replace('${version}', articleInfo.version)
.replace('${paperid}', id)
// Ref: https://en.wikipedia.org/wiki/Filename#Reserved_characters_and_words
// Ref: https://stackoverflow.com/a/42210346
.replace(/[/\\?*:|"<>]/g, '_'); // Replace invalid characters.
;
const directURL = `https://arxiv.org/pdf/${id}.pdf`;
const downloadA = document.getElementById(DIRECT_DOWNLOAD_A_ID)
downloadA.addEventListener('click', function(e) {
chrome.runtime.sendMessage({
url: directURL,
filename: fileName,
});
e.preventDefault();
console.log(LOG_PREFIX, `Sending download message to download: ${fileName} from ${directURL}.`)
});
downloadA.href = "#";
console.log(LOG_PREFIX, "Enabled direct download.")
}

// The PDF viewer in Chrome has a bug that will overwrite the title of the page after loading the PDF.
Expand Down Expand Up @@ -165,11 +174,13 @@ async function mainAsync() {
console.error(LOG_PREFIX, "Error: Failed to get paper ID, aborted.");
return;
}
if (pageType === "Abstract")
addCustomLinksAsync(id);
const articleInfo = await getArticleInfoAsync(id, pageType);
document.title = articleInfo.newTitle;
console.log(LOG_PREFIX, `Set document title to: ${articleInfo.newTitle}.`);
if (pageType === "Abstract")
addCustomLinksAsync(id, articleInfo);
await enableDirectDownload(id, articleInfo);
// Store new title for onMessage.
newTitle = articleInfo.newTitle
}
Expand Down
82 changes: 46 additions & 36 deletions firefox/content.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ const ID_REGEXP_REPLACE = [
];
// All console logs should start with this prefix.
const LOG_PREFIX = "[arXiv-utils]";
// Element IDs for injected links
const DIRECT_DOWNLOAD_LI_ID = "arxiv-utils-direct-download-li";
const DIRECT_DOWNLOAD_A_ID = "arxiv-utils-direct-download-a";
const EXTRA_SERVICES_DIV_ID = "arxiv-utils-extra-services-div";

// Return the id parsed from the url.
function getId(url) {
Expand Down Expand Up @@ -70,31 +74,12 @@ async function getArticleInfoAsync(id, pageType) {
version,
}
}
// Add a custom links in abstract page.
async function addCustomLinksAsync(id, articleInfo) {
// Add direct download link.
const result = await browser.storage.sync.get({
'filename_format': '${title}, ${firstAuthor} et al., ${publishedYear}, v${version}.pdf'
});
const fileName = result.filename_format
.replace('${title}', articleInfo.escapedTitle)
.replace('${firstAuthor}', articleInfo.firstAuthor)
.replace('${authors}', articleInfo.authors)
.replace('${publishedYear}', articleInfo.publishedYear)
.replace('${updatedYear}', articleInfo.updatedYear)
.replace('${version}', articleInfo.version)
.replace('${paperid}', id)
// Ref: https://en.wikipedia.org/wiki/Filename#Reserved_characters_and_words
// Ref: https://stackoverflow.com/a/42210346
.replace(/[/\\?*:|"<>]/g, '_'); // Replace invalid characters.
;
const directURL = `https://arxiv.org/pdf/${id}.pdf`;
const directDownloadLiId = "arxiv-utils-direct-download-li";
const directDownloadAId = "arxiv-utils-direct-download-a";
document.getElementById(directDownloadLiId)?.remove();
// Add custom links in abstract page.
function addCustomLinksAsync(id) {
document.getElementById(DIRECT_DOWNLOAD_LI_ID)?.remove();
const directDownloadHTML = ` \
<li id="${directDownloadLiId}"> \
<a id="${directDownloadAId}" href="#">Direct Download</a> \
<li id="${DIRECT_DOWNLOAD_LI_ID}"> \
<a id="${DIRECT_DOWNLOAD_A_ID}">Direct Download</a> \
</li>`;
const downloadUL = document.querySelector(".full-text > ul");
if (!downloadUL) {
Expand All @@ -103,25 +88,16 @@ async function addCustomLinksAsync(id, articleInfo) {
}
downloadUL.innerHTML += directDownloadHTML;
console.log(LOG_PREFIX, "Added direct download link.")
document.getElementById(directDownloadAId).addEventListener('click', function(e) {
browser.runtime.sendMessage({
url: directURL,
filename: fileName,
});
e.preventDefault();
console.log(LOG_PREFIX, `Sending download message to download: ${fileName} from ${directURL}.`)
});
// Add extra services links.
const elExtraRefCite = document.querySelector(".extra-ref-cite");
if (!elExtraRefCite) {
console.error(LOG_PREFIX, "Error: Cannot find the References & Citations section at the right side of the abstract page.");
return;
}
const extraServicesId = "arxiv-utils-extra-services-div";
document.getElementById(extraServicesId)?.remove();
document.getElementById(EXTRA_SERVICES_DIV_ID)?.remove();
const extraServicesDiv = document.createElement("div");
extraServicesDiv.classList.add('extra-ref-cite');
extraServicesDiv.id = extraServicesId;
extraServicesDiv.id = EXTRA_SERVICES_DIV_ID;
extraServicesDiv.innerHTML = ` \
<h3>Extra Services</h3> \
<ul> \
Expand All @@ -130,6 +106,38 @@ async function addCustomLinksAsync(id, articleInfo) {
<li><a href="https://export.arxiv.org/api/query/id_list/${id}">RSS feed</a></li> \
</ul>`;
elExtraRefCite.after(extraServicesDiv);
console.log(LOG_PREFIX, "Added extra services links.")
}

async function enableDirectDownload(id, articleInfo) {
// Add direct download link.
const result = await browser.storage.sync.get({
'filename_format': '${title}, ${firstAuthor} et al., ${publishedYear}, v${version}.pdf'
});
const fileName = result.filename_format
.replace('${title}', articleInfo.escapedTitle)
.replace('${firstAuthor}', articleInfo.firstAuthor)
.replace('${authors}', articleInfo.authors)
.replace('${publishedYear}', articleInfo.publishedYear)
.replace('${updatedYear}', articleInfo.updatedYear)
.replace('${version}', articleInfo.version)
.replace('${paperid}', id)
// Ref: https://en.wikipedia.org/wiki/Filename#Reserved_characters_and_words
// Ref: https://stackoverflow.com/a/42210346
.replace(/[/\\?*:|"<>]/g, '_'); // Replace invalid characters.
;
const directURL = `https://arxiv.org/pdf/${id}.pdf`;
const downloadA = document.getElementById(DIRECT_DOWNLOAD_A_ID)
downloadA.addEventListener('click', function(e) {
browser.runtime.sendMessage({
url: directURL,
filename: fileName,
});
e.preventDefault();
console.log(LOG_PREFIX, `Sending download message to download: ${fileName} from ${directURL}.`)
});
downloadA.href = "#";
console.log(LOG_PREFIX, "Enabled direct download.")
}

async function mainAsync() {
Expand All @@ -141,11 +149,13 @@ async function mainAsync() {
console.error(LOG_PREFIX, "Error: Failed to get paper ID, aborted.");
return;
}
if (pageType === "Abstract")
addCustomLinksAsync(id);
const articleInfo = await getArticleInfoAsync(id, pageType);
document.title = articleInfo.newTitle;
console.log(LOG_PREFIX, `Set document title to: ${articleInfo.newTitle}.`);
if (pageType === "Abstract")
addCustomLinksAsync(id, articleInfo);
await enableDirectDownload(id, articleInfo);
}

// Execute main logic.
Expand Down

0 comments on commit 66d4326

Please sign in to comment.