diff --git a/README.md b/README.md index 7213651..7e38a70 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,7 @@ We do not gather your personal data. If in doubt, please refer to the source cod - `*://arxiv.org/*`: Inject content scripts to existing tabs. - `*://export.arxiv.org/*`: Inject content scripts to existing tabs. - `*://browse.arxiv.org/*`: Inject content scripts to existing tabs. +- `*://www.arxiv.org/*`: Inject content scripts to existing tabs. - `*://ar5iv.labs.arxiv.org/*`: Inject content scripts to existing tabs. ### Firefox Permissions @@ -131,7 +132,8 @@ We do not gather your personal data. If in doubt, please refer to the source cod - `*://arxiv.org/*pdf*`: Redirect PDF pages to custom PDF container. - `*://export.arxiv.org/*pdf*`: Redirect PDF pages to custom PDF container. - `*://browse.arxiv.org/*pdf*`: Redirect PDF pages to custom PDF container. -- `"content_security_policy": "script-src 'self'; object-src 'self' https://arxiv.org https://export.arxiv.org https://browse.arxiv.org;"`: For embedding PDF in container. +- `*://www.arxiv.org/*pdf*`: Redirect PDF pages to custom PDF container. +- `"content_security_policy": "script-src 'self'; object-src 'self' https://arxiv.org https://export.arxiv.org https://browse.arxiv.org https://www.arxiv.org;"`: For embedding PDF in container. - `"web_accessible_resources": [ "pdfviewer.html" ]`: To redirect from HTTPS to extension custom page requires them to be visible. ## Developer Notes diff --git a/chrome/background.js b/chrome/background.js index 8c10394..513dc76 100644 --- a/chrome/background.js +++ b/chrome/background.js @@ -4,10 +4,10 @@ // Regular expressions for parsing target navigation URL from URLs. // Ref: https://info.arxiv.org/help/arxiv_identifier_for_services.html#urls-for-standard-arxiv-functions const TARGET_URL_REGEXP_REPLACE = [ - [/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/abs\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/pdf/$1.pdf"], - [/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"], - [/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1$2"], - [/^.*:\/\/(?:browse\.)?arxiv\.org\/html\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"], + [/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/abs\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/pdf/$1.pdf"], + [/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"], + [/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1$2"], + [/^.*:\/\/(?:browse\.|www\.)?arxiv\.org\/html\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"], [/^.*:\/\/ar5iv\.labs\.arxiv\.org\/html\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"], [/^.*:\/\/openreview\.net\/forum\?id=(\S*?)(&.*?)?(\#.*?)?$/, "https://openreview.net/pdf?id=$1"], [/^.*:\/\/openreview\.net\/pdf\?id=(\S*?)(&.*?)?(\#.*?)?$/, "https://openreview.net/forum?id=$1"], diff --git a/chrome/content.js b/chrome/content.js index 48a4c01..de030b1 100644 --- a/chrome/content.js +++ b/chrome/content.js @@ -3,9 +3,9 @@ // Regular expressions for parsing arXiv IDs from URLs. // Ref: https://info.arxiv.org/help/arxiv_identifier_for_services.html#urls-for-standard-arxiv-functions const ID_REGEXP_REPLACE = [ - [/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/abs\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "$1", "Abstract"], - [/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "$1", "PDF"], - [/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "$1$2", "PDF"], + [/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/abs\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "$1", "Abstract"], + [/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "$1", "PDF"], + [/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "$1$2", "PDF"], [/^.*:\/\/ar5iv\.labs\.arxiv\.org\/html\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "$1", "HTML5"], ]; // Store new title for onMessage to deal with Chrome PDF viewer bug. @@ -57,7 +57,7 @@ async function getArticleInfoAsync(id, pageType) { const authors = [...entry.getElementsByTagName("name")].map((el) => el.textContent).join(", "); const publishedYear = entry.getElementsByTagName("published")[0].textContent.split('-')[0]; const updatedYear = entry.getElementsByTagName("updated")[0].textContent.split('-')[0]; - const versionRegexp = /^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/abs\/.*v([0-9]*)$/; + const versionRegexp = /^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/abs\/.*v([0-9]*)$/; var version = ''; for (const el of entry.getElementsByTagName("link")) { const match = el.getAttribute("href").match(versionRegexp); diff --git a/chrome/manifest.json b/chrome/manifest.json index 99b9ce4..a4d549b 100644 --- a/chrome/manifest.json +++ b/chrome/manifest.json @@ -13,6 +13,8 @@ "*://export.arxiv.org/abs/*", "*://browse.arxiv.org/*pdf*", "*://browse.arxiv.org/abs/*", + "*://www.arxiv.org/*pdf*", + "*://www.arxiv.org/abs/*", "*://ar5iv.labs.arxiv.org/html/*" ], "js": [ "content.js" ], @@ -44,6 +46,7 @@ "*://arxiv.org/*", "*://export.arxiv.org/*", "*://browse.arxiv.org/*", + "*://www.arxiv.org/*", "*://ar5iv.labs.arxiv.org/*" ], "icons": { diff --git a/firefox/background.js b/firefox/background.js index 14c2053..c1b84d2 100644 --- a/firefox/background.js +++ b/firefox/background.js @@ -5,16 +5,16 @@ const pdfViewerRelatedURL = "pdfviewer.html?target="; // The match pattern for the URLs to redirect const redirectPatterns = [ - "*://arxiv.org/*.pdf*", "*://export.arxiv.org/*.pdf*", "*://browse.arxiv.org/*.pdf*", - "*://arxiv.org/*pdf*/*", "*://export.arxiv.org/*pdf*/*", "*://browse.arxiv.org/*pdf*/*", + "*://arxiv.org/*.pdf*", "*://export.arxiv.org/*.pdf*", "*://browse.arxiv.org/*.pdf*", "*://www.arxiv.org/*.pdf*", + "*://arxiv.org/*pdf*/*", "*://export.arxiv.org/*pdf*/*", "*://browse.arxiv.org/*pdf*/*", "*://www.arxiv.org/*pdf*/*", ]; // Regular expressions for parsing target navigation URL from URLs. // Ref: https://info.arxiv.org/help/arxiv_identifier_for_services.html#urls-for-standard-arxiv-functions const TARGET_URL_REGEXP_REPLACE = [ - [/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/abs\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/pdf/$1.pdf"], - [/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"], - [/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1$2"], - [/^.*:\/\/(?:browse\.)?arxiv\.org\/html\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"], + [/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/abs\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/pdf/$1.pdf"], + [/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"], + [/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1$2"], + [/^.*:\/\/(?:browse\.|www\.)?arxiv\.org\/html\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"], [/^.*:\/\/ar5iv\.labs\.arxiv\.org\/html\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "https://arxiv.org/abs/$1"], [/^.*:\/\/openreview\.net\/forum\?id=(\S*?)(&.*?)?(\#.*?)?$/, "https://openreview.net/pdf?id=$1"], [/^.*:\/\/openreview\.net\/pdf\?id=(\S*?)(&.*?)?(\#.*?)?$/, "https://openreview.net/forum?id=$1"], diff --git a/firefox/content.js b/firefox/content.js index 3ba08dc..b31dc60 100644 --- a/firefox/content.js +++ b/firefox/content.js @@ -5,12 +5,12 @@ // Regular expressions for parsing arXiv IDs from URLs. // Ref: https://info.arxiv.org/help/arxiv_identifier_for_services.html#urls-for-standard-arxiv-functions const ID_REGEXP_REPLACE = [ - [/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/abs\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "$1", "Abstract"], - [/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "$1", "PDF"], - [/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "$1$2", "PDF"], + [/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/abs\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "$1", "Abstract"], + [/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "$1", "PDF"], + [/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "$1$2", "PDF"], [/^.*:\/\/ar5iv\.labs\.arxiv\.org\/html\/(\S*?)\/*(\?.*?)?(\#.*?)?$/, "$1", "HTML5"], // For external PDF viewer - [/^.*:\/\/mozilla\.github\.io\/pdf\.js\/web\/viewer\.html\?file=https:\/\/(?:export\.|browse\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*$/, "$1"], + [/^.*:\/\/mozilla\.github\.io\/pdf\.js\/web\/viewer\.html\?file=https:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*$/, "$1"], ]; // All console logs should start with this prefix. const LOG_PREFIX = "[arXiv-utils]"; @@ -57,7 +57,7 @@ async function getArticleInfoAsync(id, pageType) { const authors = [...entry.getElementsByTagName("name")].map((el) => el.textContent).join(", "); const publishedYear = entry.getElementsByTagName("published")[0].textContent.split('-')[0]; const updatedYear = entry.getElementsByTagName("updated")[0].textContent.split('-')[0]; - const versionRegexp = /^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/abs\/.*v([0-9]*)$/; + const versionRegexp = /^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/abs\/.*v([0-9]*)$/; var version = ''; for (const el of entry.getElementsByTagName("link")) { const match = el.getAttribute("href").match(versionRegexp); diff --git a/firefox/manifest.json b/firefox/manifest.json index dc103b4..f9fcdfa 100644 --- a/firefox/manifest.json +++ b/firefox/manifest.json @@ -10,6 +10,7 @@ "*://arxiv.org/abs/*", "*://export.arxiv.org/abs/*", "*://browse.arxiv.org/abs/*", + "*://www.arxiv.org/abs/*", "*://ar5iv.labs.arxiv.org/html/*", "*://mozilla.github.io/pdf.js/web/viewer.html*" ], @@ -41,10 +42,11 @@ "downloads", "*://arxiv.org/*pdf*", "*://export.arxiv.org/*pdf*", - "*://browse.arxiv.org/*pdf*" + "*://browse.arxiv.org/*pdf*", + "*://www.arxiv.org/*pdf*" ], "content_security_policy": - "script-src 'self'; object-src 'self' https://arxiv.org https://export.arxiv.org https://browse.arxiv.org;", + "script-src 'self'; object-src 'self' https://arxiv.org https://export.arxiv.org https://browse.arxiv.org https://www.arxiv.org;", "web_accessible_resources": [ "pdfviewer.html" ], diff --git a/firefox/pdfviewer.js b/firefox/pdfviewer.js index 69c0f95..f066cdb 100644 --- a/firefox/pdfviewer.js +++ b/firefox/pdfviewer.js @@ -3,8 +3,8 @@ // Regular expressions for parsing arXiv IDs from URLs. // Ref: https://info.arxiv.org/help/arxiv_identifier_for_services.html#urls-for-standard-arxiv-functions const ID_REGEXP_REPLACE = [ - [/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "$1"], - [/^.*:\/\/(?:export\.|browse\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "$1$2"], + [/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/pdf\/(\S*?)(?:\.pdf)?\/*(\?.*?)?(\#.*?)?$/, "$1"], + [/^.*:\/\/(?:export\.|browse\.|www\.)?arxiv\.org\/ftp\/(?:arxiv\/|([^\/]*\/))papers\/.*?([^\/]*?)\.pdf(\?.*?)?(\#.*?)?$/, "$1$2"], ]; // All console logs should start with this prefix. const LOG_PREFIX = "[arXiv-utils]"; diff --git a/tests/testcases.yaml b/tests/testcases.yaml index b627c0d..df11c29 100644 --- a/tests/testcases.yaml +++ b/tests/testcases.yaml @@ -59,6 +59,18 @@ navigation: pdf_title: Designing Network Design Spaces | PDF pdf2abs: False description: abs with browse.arxiv.org subdomain +- url: https://arxiv.org/abs/2003.13678 + title: Designing Network Design Spaces | Abstract + pdf_url: https://www.arxiv.org/pdf/2003.13678 + pdf_title: Designing Network Design Spaces | PDF + abs2pdf: False + description: pdf with www.arxiv.org subdomain +- url: https://www.arxiv.org/abs/2003.13678 + title: Designing Network Design Spaces | Abstract + pdf_url: https://arxiv.org/pdf/2003.13678 + pdf_title: Designing Network Design Spaces | PDF + pdf2abs: False + description: abs with www.arxiv.org subdomain - url: https://arxiv.org/abs/2003.13678 title: Designing Network Design Spaces | Abstract pdf_url: https://arxiv.org/pdf/2003.13678