Skip to content

Commit

Permalink
Merge pull request #742 from arXiv/develop
Browse files Browse the repository at this point in the history
add surrogate keys to unavailable pages
  • Loading branch information
kyokukou authored Sep 26, 2024
2 parents fafd036 + 16a5e5c commit 3f3b6e4
Show file tree
Hide file tree
Showing 9 changed files with 883 additions and 772 deletions.
68 changes: 55 additions & 13 deletions browse/controllers/files/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Iterator, Union
from typing import Iterator, Union, Dict, List
from email.utils import format_datetime
from flask import Response, make_response, render_template
from datetime import timezone
Expand All @@ -7,7 +7,7 @@
from arxiv.identifier import Identifier
from arxiv.document.version import VersionEntry
from arxiv.files import FileObj

from arxiv.integration.fastly.headers import add_surrogate_key

BUFFER_SIZE = 1024 * 4

Expand Down Expand Up @@ -91,64 +91,106 @@ def download_file_base(arxiv_id: Identifier, version: Union[VersionEntry|int|str
v_num = version.version if isinstance(version, VersionEntry) else int(version)
return f"arXiv-{arxiv_id.squashed}v{v_num}"

def no_source(arxiv_id: Identifier, had_specific_version: bool=False) -> Response:
"""Sets expire to one year, max allowed by RFC 2616"""
headers= _unavailable_headers(arxiv_id, ["no-source"])
if had_specific_version:
headers['Surrogate-Control']= 'max-age=31536000'
else:
headers['Surrogate-Control']= maxage(False)
return make_response(render_template("dissemination/withdrawn.html",
arxiv_id=arxiv_id),
404, headers)

def withdrawn(arxiv_id: Identifier, had_specific_version: bool=False) -> Response:
"""Sets expire to one year, max allowed by RFC 2616"""
headers= _unavailable_headers(arxiv_id, ["withdrawn"])
if had_specific_version:
headers = {'Surrogate-Control': 'max-age=31536000'}
headers['Surrogate-Control']= 'max-age=31536000'
else:
headers = {'Surrogate-Control': maxage(False)}
headers['Surrogate-Control']= maxage(False)
return make_response(render_template("dissemination/withdrawn.html",
arxiv_id=arxiv_id),
404, headers)

def not_public(arxiv_id: Identifier, had_specific_version: bool=False) -> Response:
""" Returned for pages whose source is encrypted/ made not public by the author.
Sets expire to one year, max allowed by RFC 2616"""
headers= _unavailable_headers(arxiv_id, ["not-public"])
if had_specific_version:
headers = {'Surrogate-Control': 'max-age=31536000'}
headers['Surrogate-Control']= 'max-age=31536000'
else:
headers = {'Surrogate-Control': maxage(False)}
headers['Surrogate-Control']= maxage(False)
return make_response(
render_template("dissemination/not_public.html",arxiv_id=arxiv_id), 403, headers
)


def unavailable(arxiv_id: Identifier) -> Response:
headers= _unavailable_headers(arxiv_id, [])
return make_response(render_template("dissemination/unavailable.html",
arxiv_id=arxiv_id), 500, {})
arxiv_id=arxiv_id), 500, headers)


def not_pdf(arxiv_id: Identifier) -> Response:
headers= _unavailable_headers(arxiv_id, ["pdf"])
return make_response(render_template("dissemination/unavailable.html",
arxiv_id=arxiv_id), 404, {})
arxiv_id=arxiv_id), 404, headers)


def no_html(arxiv_id: Identifier) -> Response:
headers= _unavailable_headers(arxiv_id, ["html"])
return make_response(render_template("dissemination/no_html.html",
arxiv_id=arxiv_id), 404, {})
arxiv_id=arxiv_id), 404, headers)


def not_found(arxiv_id: Identifier) -> Response:
headers = {'Surrogate-Control': maxage(arxiv_id.has_version)}
headers= _unavailable_headers(arxiv_id, ["not-found"])
return make_response(render_template("dissemination/not_found.html",
arxiv_id=arxiv_id), 404, headers)


def not_found_anc(arxiv_id: Identifier) -> Response:
headers = {'Surrogate-Control': maxage(arxiv_id.has_version)}
headers= _unavailable_headers(arxiv_id, ["anc"])
return make_response(render_template("src/anc_not_found.html",
arxiv_id=arxiv_id), 404, headers)


def bad_id(arxiv_id: Union[Identifier,str], err_msg: str) -> Response:
headers: Dict[str,str]
if isinstance(arxiv_id,str):
headers={'Surrogate-Control': 'max-age=31536000'}
headers=add_surrogate_key(headers, ["paper-unavailable", "bad-id"])
else:
headers= _unavailable_headers(arxiv_id, [])
headers['Surrogate-Control']= 'max-age=31536000'
return make_response(render_template("dissemination/bad_id.html",
err_msg=err_msg,
arxiv_id=arxiv_id), 404, {})
arxiv_id=arxiv_id), 404, headers)


def cannot_build_pdf(arxiv_id: Identifier, msg: str, fmt: str) -> Response:
headers= _unavailable_headers(arxiv_id, ["pdf"])
return make_response(render_template("dissemination/cannot_build_pdf.html",
msg=msg,
fmt=fmt,
arxiv_id=arxiv_id), 404, {})
arxiv_id=arxiv_id), 404, headers)

def _unavailable_headers(arxiv_id: Identifier, other_tags: List[str]) -> Dict[str,str]:
keys=["paper-unavailable", f"paper-id-{arxiv_id.id}"] + other_tags

if arxiv_id.has_version:
keys.append(f"paper-id-{arxiv_id.idv}")
keys.append(f"unavailable-{arxiv_id.idv}")
for tag in other_tags:
keys.append(f"{tag}-{arxiv_id.idv}")
else:
keys.append(f"paper-id-{arxiv_id.id}-current")
keys.append(f"unavailable-{arxiv_id.id}-current")
for tag in other_tags:
keys.append(f"{tag}-{arxiv_id.id}-current")

headers: Dict[str,str]={}
headers['Surrogate-Control'] = maxage(arxiv_id.has_version)
headers=add_surrogate_key(headers, keys)
return headers
Empty file removed git
Empty file.
Loading

0 comments on commit 3f3b6e4

Please sign in to comment.