Skip to content

Commit

Permalink
feat: Support www.arxiv.org` subdomain
Browse files Browse the repository at this point in the history
  • Loading branch information
j3soon committed Jun 1, 2024
1 parent 42a3522 commit d8ac62a
Show file tree
Hide file tree
Showing 9 changed files with 43 additions and 24 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ We do not gather your personal data. If in doubt, please refer to the source cod
- `*://arxiv.org/*`: Inject content scripts to existing tabs.
- `*://export.arxiv.org/*`: Inject content scripts to existing tabs.
- `*://browse.arxiv.org/*`: Inject content scripts to existing tabs.
- `*://www.arxiv.org/*`: Inject content scripts to existing tabs.
- `*://ar5iv.labs.arxiv.org/*`: Inject content scripts to existing tabs.

### Firefox Permissions
Expand All @@ -131,7 +132,8 @@ We do not gather your personal data. If in doubt, please refer to the source cod
- `*://arxiv.org/*pdf*`: Redirect PDF pages to custom PDF container.
- `*://export.arxiv.org/*pdf*`: Redirect PDF pages to custom PDF container.
- `*://browse.arxiv.org/*pdf*`: Redirect PDF pages to custom PDF container.
- `"content_security_policy": "script-src 'self'; object-src 'self' https://arxiv.org https://export.arxiv.org https://browse.arxiv.org;"`: For embedding PDF in container.
- `*://www.arxiv.org/*pdf*`: Redirect PDF pages to custom PDF container.
- `"content_security_policy": "script-src 'self'; object-src 'self' https://arxiv.org https://export.arxiv.org https://browse.arxiv.org https://www.arxiv.org;"`: For embedding PDF in container.
- `"web_accessible_resources": [ "pdfviewer.html" ]`: To redirect from HTTPS to extension custom page requires them to be visible.

## Developer Notes
Expand Down
8 changes: 4 additions & 4 deletions chrome/background.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
// Regular expressions for parsing target navigation URL from URLs.
// Ref: https://info.arxiv.org/help/arxiv_identifier_for_services.html#urls-for-standard-arxiv-functions
const TARGET_URL_REGEXP_REPLACE = [
[/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/abs\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/pdf/$1.pdf"],
[/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"],
[/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1$2"],
[/^.*:\/\/(?:browse\.)?arxiv\.org\/html\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"],
[/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/abs\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/pdf/$1.pdf"],
[/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"],
[/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1$2"],
[/^.*:\/\/(?:browse\.|www\.)?arxiv\.org\/html\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"],
[/^.*:\/\/ar5iv\.labs\.arxiv\.org\/html\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"],
[/^.*:\/\/openreview\.net\/forum\?id=(\S*?)(&.*?)?(\#.*?)?$/, "https://openreview.net/pdf?id=$1"],
[/^.*:\/\/openreview\.net\/pdf\?id=(\S*?)(&.*?)?(\#.*?)?$/, "https://openreview.net/forum?id=$1"],
Expand Down
8 changes: 4 additions & 4 deletions chrome/content.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
// Regular expressions for parsing arXiv IDs from URLs.
// Ref: https://info.arxiv.org/help/arxiv_identifier_for_services.html#urls-for-standard-arxiv-functions
const ID_REGEXP_REPLACE = [
[/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/abs\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "$1", "Abstract"],
[/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "$1", "PDF"],
[/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "$1$2", "PDF"],
[/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/abs\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "$1", "Abstract"],
[/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "$1", "PDF"],
[/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "$1$2", "PDF"],
[/^.*:\/\/ar5iv\.labs\.arxiv\.org\/html\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "$1", "HTML5"],
];
// Store new title for onMessage to deal with Chrome PDF viewer bug.
Expand Down Expand Up @@ -57,7 +57,7 @@ async function getArticleInfoAsync(id, pageType) {
const authors = [...entry.getElementsByTagName("name")].map((el) => el.textContent).join(", ");
const publishedYear = entry.getElementsByTagName("published")[0].textContent.split('-')[0];
const updatedYear = entry.getElementsByTagName("updated")[0].textContent.split('-')[0];
const versionRegexp = /^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/abs\/.*v([0-9]*)$/;
const versionRegexp = /^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/abs\/.*v([0-9]*)$/;
var version = '';
for (const el of entry.getElementsByTagName("link")) {
const match = el.getAttribute("href").match(versionRegexp);
Expand Down
3 changes: 3 additions & 0 deletions chrome/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
"*://export.arxiv.org/abs/*",
"*://browse.arxiv.org/*pdf*",
"*://browse.arxiv.org/abs/*",
"*://www.arxiv.org/*pdf*",
"*://www.arxiv.org/abs/*",
"*://ar5iv.labs.arxiv.org/html/*"
],
"js": [ "content.js" ],
Expand Down Expand Up @@ -44,6 +46,7 @@
"*://arxiv.org/*",
"*://export.arxiv.org/*",
"*://browse.arxiv.org/*",
"*://www.arxiv.org/*",
"*://ar5iv.labs.arxiv.org/*"
],
"icons": {
Expand Down
12 changes: 6 additions & 6 deletions firefox/background.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@
const pdfViewerRelatedURL = "pdfviewer.html?target=";
// The match pattern for the URLs to redirect
const redirectPatterns = [
"*://arxiv.org/*.pdf*", "*://export.arxiv.org/*.pdf*", "*://browse.arxiv.org/*.pdf*",
"*://arxiv.org/*pdf*/*", "*://export.arxiv.org/*pdf*/*", "*://browse.arxiv.org/*pdf*/*",
"*://arxiv.org/*.pdf*", "*://export.arxiv.org/*.pdf*", "*://browse.arxiv.org/*.pdf*", "*://www.arxiv.org/*.pdf*",
"*://arxiv.org/*pdf*/*", "*://export.arxiv.org/*pdf*/*", "*://browse.arxiv.org/*pdf*/*", "*://www.arxiv.org/*pdf*/*",
];
// Regular expressions for parsing target navigation URL from URLs.
// Ref: https://info.arxiv.org/help/arxiv_identifier_for_services.html#urls-for-standard-arxiv-functions
const TARGET_URL_REGEXP_REPLACE = [
[/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/abs\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/pdf/$1.pdf"],
[/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"],
[/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1$2"],
[/^.*:\/\/(?:browse\.)?arxiv\.org\/html\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"],
[/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/abs\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/pdf/$1.pdf"],
[/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"],
[/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1$2"],
[/^.*:\/\/(?:browse\.|www\.)?arxiv\.org\/html\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"],
[/^.*:\/\/ar5iv\.labs\.arxiv\.org\/html\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"],
[/^.*:\/\/openreview\.net\/forum\?id=(\S*?)(&.*?)?(\#.*?)?$/, "https://openreview.net/pdf?id=$1"],
[/^.*:\/\/openreview\.net\/pdf\?id=(\S*?)(&.*?)?(\#.*?)?$/, "https://openreview.net/forum?id=$1"],
Expand Down
10 changes: 5 additions & 5 deletions firefox/content.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
// Regular expressions for parsing arXiv IDs from URLs.
// Ref: https://info.arxiv.org/help/arxiv_identifier_for_services.html#urls-for-standard-arxiv-functions
const ID_REGEXP_REPLACE = [
[/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/abs\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "$1", "Abstract"],
[/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "$1", "PDF"],
[/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "$1$2", "PDF"],
[/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/abs\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "$1", "Abstract"],
[/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "$1", "PDF"],
[/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "$1$2", "PDF"],
[/^.*:\/\/ar5iv\.labs\.arxiv\.org\/html\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "$1", "HTML5"],
// For external PDF viewer
[/^.*:\/\/mozilla\.github\.io\/pdf\.js\/web\/viewer\.html\?file=https:\/\/(?:export\.|browse\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*$/, "$1"],
[/^.*:\/\/mozilla\.github\.io\/pdf\.js\/web\/viewer\.html\?file=https:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*$/, "$1"],
];
// All console logs should start with this prefix.
const LOG_PREFIX = "[arXiv-utils]";
Expand Down Expand Up @@ -57,7 +57,7 @@ async function getArticleInfoAsync(id, pageType) {
const authors = [...entry.getElementsByTagName("name")].map((el) => el.textContent).join(", ");
const publishedYear = entry.getElementsByTagName("published")[0].textContent.split('-')[0];
const updatedYear = entry.getElementsByTagName("updated")[0].textContent.split('-')[0];
const versionRegexp = /^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/abs\/.*v([0-9]*)$/;
const versionRegexp = /^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/abs\/.*v([0-9]*)$/;
var version = '';
for (const el of entry.getElementsByTagName("link")) {
const match = el.getAttribute("href").match(versionRegexp);
Expand Down
6 changes: 4 additions & 2 deletions firefox/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"*://arxiv.org/abs/*",
"*://export.arxiv.org/abs/*",
"*://browse.arxiv.org/abs/*",
"*://www.arxiv.org/abs/*",
"*://ar5iv.labs.arxiv.org/html/*",
"*://mozilla.github.io/pdf.js/web/viewer.html*"
],
Expand Down Expand Up @@ -41,10 +42,11 @@
"downloads",
"*://arxiv.org/*pdf*",
"*://export.arxiv.org/*pdf*",
"*://browse.arxiv.org/*pdf*"
"*://browse.arxiv.org/*pdf*",
"*://www.arxiv.org/*pdf*"
],
"content_security_policy":
"script-src 'self'; object-src 'self' https://arxiv.org https://export.arxiv.org https://browse.arxiv.org;",
"script-src 'self'; object-src 'self' https://arxiv.org https://export.arxiv.org https://browse.arxiv.org https://www.arxiv.org;",
"web_accessible_resources": [
"pdfviewer.html"
],
Expand Down
4 changes: 2 additions & 2 deletions firefox/pdfviewer.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
// Regular expressions for parsing arXiv IDs from URLs.
// Ref: https://info.arxiv.org/help/arxiv_identifier_for_services.html#urls-for-standard-arxiv-functions
const ID_REGEXP_REPLACE = [
[/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "$1"],
[/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "$1$2"],
[/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "$1"],
[/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "$1$2"],
];
// All console logs should start with this prefix.
const LOG_PREFIX = "[arXiv-utils]";
Expand Down
12 changes: 12 additions & 0 deletions tests/testcases.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,18 @@ navigation:
pdf_title: Designing Network Design Spaces | PDF
pdf2abs: False
description: abs with browse.arxiv.org subdomain
- url: https://arxiv.org/abs/2003.13678
title: Designing Network Design Spaces | Abstract
pdf_url: https://www.arxiv.org/pdf/2003.13678
pdf_title: Designing Network Design Spaces | PDF
abs2pdf: False
description: pdf with www.arxiv.org subdomain
- url: https://www.arxiv.org/abs/2003.13678
title: Designing Network Design Spaces | Abstract
pdf_url: https://arxiv.org/pdf/2003.13678
pdf_title: Designing Network Design Spaces | PDF
pdf2abs: False
description: abs with www.arxiv.org subdomain
- url: https://arxiv.org/abs/2003.13678
title: Designing Network Design Spaces | Abstract
pdf_url: https://arxiv.org/pdf/2003.13678
Expand Down

0 comments on commit d8ac62a

Please sign in to comment.