Skip to content

Commit

Permalink
Formated with black and flake8 added to tests
Browse files Browse the repository at this point in the history
  • Loading branch information
bdc34 committed Oct 11, 2023
1 parent db828f0 commit a0be306
Show file tree
Hide file tree
Showing 83 changed files with 3,029 additions and 2,570 deletions.
14 changes: 8 additions & 6 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@ jobs:
python -m pip install --upgrade pip
pip install flake8 pytest poetry
poetry install
#- name: Lint with flake8
# run: |
# # stop the build if there are Python syntax errors or undefined names
# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
# flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# ignore whitespace and other formating errors
# flake8 . --count --ignore=E1,E2,E3,E4,E5,W2,W3,W5 --max-line-length=127 --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
# flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
poetry run pytest tests
72 changes: 44 additions & 28 deletions browse/commands/invalidate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Invalidates pages in the CDN."""
import re
from typing import Optional, Iterable, List
from typing import List

import click
from flask import Blueprint
Expand All @@ -15,37 +15,47 @@


@bp.cli.command(short_help="invalidates CDN for PDFs from a mailing")
@click.argument("mailings",
# help="Invalidate all PDFs from these mailings. May have more than one. Format YYMMDD.",
nargs=-1)
@click.argument(
"mailings",
# help="Invalidate all PDFs from these mailings. May have more than one. Format YYMMDD.",
nargs=-1,
)
@click.option("--project", default="arxiv-production")
@click.option("--cdn", default="browse-arxiv-org-load-balancer2",
help="Url-map of the CDN. Find it with `gcloud compute url-maps list`"
)
@click.option("-n", "--dry-run", "dry_run", is_flag=True,
help="Only display what paths would be invalidated.",
default=False)
@click.option("-v", is_flag=True,
help="Verbose.",
default=False)
def invalidate_mailings(project: str, cdn: str, mailings: List[str], dry_run: bool, v: bool) -> None:
@click.option(
"--cdn",
default="browse-arxiv-org-load-balancer2",
help="Url-map of the CDN. Find it with `gcloud compute url-maps list`",
)
@click.option(
"-n",
"--dry-run",
"dry_run",
is_flag=True,
help="Only display what paths would be invalidated.",
default=False,
)
@click.option("-v", is_flag=True, help="Verbose.", default=False)
def invalidate_mailings(
project: str, cdn: str, mailings: List[str], dry_run: bool, v: bool
) -> None:
"""Invalidate CDN for PDFs in a mailing."""
if not mailings:
raise ValueError("mailing must not be empty.")

mailings = [date for date in mailings if date]
if any([not re.match(r'\d{6}', mailing) for mailing in mailings]):
if any([not re.match(r"\d{6}", mailing) for mailing in mailings]):
raise ValueError("mailings values must be like '230130'")

paths: List[str] = []
session: Session = db.session
for mailing in mailings:
if v:
print(f"About to query for {mailing}")
papers = (session.query(NextMail.paper_id, NextMail.version)
.filter(NextMail.mail_id == int(mailing)))
papers = session.query(NextMail.paper_id, NextMail.version).filter(
NextMail.mail_id == int(mailing)
)

nn = 0;
nn = 0
for paper_id, version in papers.all():
paths.append(f"/pdf/{paper_id}.pdf")
paths.append(f"/pdf/{paper_id}v{version}.pdf")
Expand All @@ -55,13 +65,17 @@ def invalidate_mailings(project: str, cdn: str, mailings: List[str], dry_run: bo
print(f"For {mailing} found {nn} papers.")

if v:
print(f"{len(paths)} paths to invalidate. "
"Two for each paper. One with version and one without.")
print(
f"{len(paths)} paths to invalidate. "
"Two for each paper. One with version and one without."
)

_invalidate(project, cdn, paths, dry_run=dry_run, v=v)


def _invalidate(proj: str, cdn: str, paths: List[str], dry_run: bool = False, v: bool = False) -> None:
def _invalidate(
proj: str, cdn: str, paths: List[str], dry_run: bool = False, v: bool = False
) -> None:
"""Invalidates `paths` on `cdn` in `proj`."""
paths.sort()
if v:
Expand All @@ -78,22 +92,24 @@ def _invalidate(proj: str, cdn: str, paths: List[str], dry_run: bool = False, v:
request = compute_v1.InvalidateCacheUrlMapRequest(
project=proj,
url_map=cdn,
cache_invalidation_rule_resource=
compute_v1.CacheInvalidationRule(
cache_invalidation_rule_resource=compute_v1.CacheInvalidationRule(
# host="*",
path=path),
path=path
),
)
_invalidate_req(client, request)
if v:
print(f"Invalidated {path}.")


def _exception_pred(ex: Exception) -> bool:
return bool(ex and
(isinstance(ex, BaseException)
or "rate limit exceeded" in str(ex).lower()))
return bool(
ex and (isinstance(ex, BaseException) or "rate limit exceeded" in str(ex).lower())
)


@retry.Retry(predicate=_exception_pred)
def _invalidate_req(client: compute_v1.UrlMapsClient, request: compute_v1.InvalidateCacheUrlMapRequest) -> None:
def _invalidate_req(
client: compute_v1.UrlMapsClient, request: compute_v1.InvalidateCacheUrlMapRequest
) -> None:
client.invalidate_cache_unary(request=request)
102 changes: 58 additions & 44 deletions browse/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from secrets import token_hex
import warnings

from typing import Optional, Dict, Any, List
from typing import Optional, Dict, Any
import logging

from pydantic import SecretStr, PyObject, BaseSettings
Expand All @@ -16,7 +16,7 @@


DEFAULT_DB = "sqlite:///../tests/data/browse.db"
TESTING_LATEXML_DB = 'sqlite:///../tests/data/latexmldb.db'
TESTING_LATEXML_DB = "sqlite:///../tests/data/latexmldb.db"


class Settings(BaseSettings):
Expand Down Expand Up @@ -45,27 +45,27 @@ class Settings(BaseSettings):
LATEXML_ENABLED: bool = False
"""Sets if LATEXML is enabled or not"""

LATEXML_BASE_URL: str = ''
LATEXML_BASE_URL: str = ""
"""Base GS bucket URL to find the HTML."""

LATEXML_DB_USER: str = ''
LATEXML_DB_USER: str = ""
"""DB username for latexml DB."""

LATEXML_DB_PASS: str = ''
LATEXML_DB_PASS: str = ""
"""DB password for latexml DB."""

LATEXML_DB_NAME: str = ''
LATEXML_DB_NAME: str = ""
"""DB name for latexml DB."""

LATEXML_INSTANCE_CONNECTION_NAME: str = ''
LATEXML_INSTANCE_CONNECTION_NAME: str = ""
"""GCP instance connection name of managed DB.
ex. arxiv-xyz:us-central1:my-special-db
If this is set, a TLS protected GCP connection will be used to connect to
the latexml db. See
https://cloud.google.com/sql/docs/postgres/connect-connectors#python_1"""

LATEXML_IP_TYPE: str = 'PUBLIC_IP'
LATEXML_IP_TYPE: str = "PUBLIC_IP"
"""If the GCP connection is public or private"""

SQLALCHEMY_BINDS: Dict[str, Any] = {}
Expand Down Expand Up @@ -97,20 +97,28 @@ class Settings(BaseSettings):

BROWSE_SITE_LABEL: str = "arXiv.org"

BROWSE_ANALYTICS_ENABLED: bool = bool(int(os.environ.get("BROWSE_ANALYTICS_ENABLED", "0")))
BROWSE_ANALYTICS_ENABLED: bool = bool(
int(os.environ.get("BROWSE_ANALYTICS_ENABLED", "0"))
)
"""Enable/disable web analytics, ie: Pendo, Piwik, geoip."""

BROWSE_USER_BANNER_ENABLED: bool = bool(int(os.environ.get("BROWSE_USER_BANNER_ENABLED", "0")))
BROWSE_USER_BANNER_ENABLED: bool = bool(
int(os.environ.get("BROWSE_USER_BANNER_ENABLED", "0"))
)
"""Enable/disable the user banner, the full width one, above the Cornell logo."""

BROWSE_MINIMAL_BANNER_ENABLED: bool = bool(int(os.environ.get("BROWSE_MINIMAL_BANNER_ENABLED", "0")))
BROWSE_MINIMAL_BANNER_ENABLED: bool = bool(
int(os.environ.get("BROWSE_MINIMAL_BANNER_ENABLED", "0"))
)
"""Enable/disable the banner to the right of the Cornell logo, before the donate button."""

BROWSE_SPECIAL_MESSAGE_ENABLED: bool = bool(int(os.environ.get("BROWSE_SPECIAL_MESSAGE_ENABLED", "0")))
BROWSE_SPECIAL_MESSAGE_ENABLED: bool = bool(
int(os.environ.get("BROWSE_SPECIAL_MESSAGE_ENABLED", "0"))
)
"""Enable/disable the cloud list item, in the arXiv News section, in home/special-message.html"""

############################## Services ##############################
DOCUMENT_LISTING_SERVICE: PyObject = 'browse.services.listing.fs_listing' # type: ignore
# ############################# Services ##############################
DOCUMENT_LISTING_SERVICE: PyObject = "browse.services.listing.fs_listing" # type: ignore
"""What implementation to use for the listing service.
Accepted values are
Expand All @@ -121,14 +129,13 @@ class Settings(BaseSettings):
- `browse.services.listing.fake`: A totally fake set of listings for testing.
"""

DOCUMENT_LISTING_PATH: str = 'tests/data/abs_files/ftp'
DOCUMENT_LISTING_PATH: str = "tests/data/abs_files/ftp"
"""Path to get listing files from.
This can start with gs:// to use Google Storage.
Ex gs://arxiv-production-data/ftp."""


DOCUMENT_ABSTRACT_SERVICE: PyObject = 'browse.services.documents.fs_docs' # type: ignore
DOCUMENT_ABSTRACT_SERVICE: PyObject = "browse.services.documents.fs_docs" # type: ignore
"""Implementation to use for abstracts.
Accepted values are:
Expand All @@ -148,17 +155,16 @@ class Settings(BaseSettings):
This can start with gs:// to use Google Storage.
"""

DOCUMENT_CACHE_PATH: str = "tests/data/cache"
DOCUMENT_CACHE_PATH: str = "tests/data/cache"
"""Path to cache directory"""

PREV_NEXT_SERVICE: PyObject = 'browse.services.prevnext.fsprevnext' # type: ignore
PREV_NEXT_SERVICE: PyObject = "browse.services.prevnext.fsprevnext" # type: ignore
"""Implementation of the prev/next service used for those features on the abs page.
Currently the only value is `browse.services.prevnext.fsprevnext` This uses
DOCUMENT_LATEST_VERSIONS_PATH and DOCUMENT_ORIGNAL_VERSIONS_PATH.
"""


DISSEMINATION_STORAGE_PREFIX: str = "./tests/data/abs_files/"
"""Storage prefix to use. Ex gs://arxiv-production-data
Expand All @@ -169,7 +175,7 @@ class Settings(BaseSettings):
`./testing/data/` for testing data. Must end with a /
"""

######################### End of Services ###########################
# ######################## End of Services ###########################

SHOW_EMAIL_SECRET: SecretStr = SecretStr(token_hex(10))
"""Used in linking to /show-email.
Expand Down Expand Up @@ -204,7 +210,7 @@ class Settings(BaseSettings):
CLASSIC_SESSION_HASH: SecretStr = SecretStr(token_hex(10))
SESSION_DURATION: int = 36000

ARXIV_BUSINESS_TZ: str = 'US/Eastern'
ARXIV_BUSINESS_TZ: str = "US/Eastern"
"""
Timezone of the arxiv business offices.
"""
Expand Down Expand Up @@ -235,7 +241,6 @@ class Settings(BaseSettings):
original file only in debug mode.
"""


SECRET_KEY: str = "qwert2345"

SESSION_COOKIE_NAME: str = "arxiv_browse"
Expand Down Expand Up @@ -388,38 +393,47 @@ class Config:
"""Additional pydantic config of these settings."""

fields = {
'SQLALCHEMY_DATABASE_URI': {
'env': ['BROWSE_SQLALCHEMY_DATABASE_URI', 'CLASSIC_DATABASE_URI']
"SQLALCHEMY_DATABASE_URI": {
"env": ["BROWSE_SQLALCHEMY_DATABASE_URI", "CLASSIC_DATABASE_URI"]
}
}

def check(self) -> None:
"""A check and fix up of a settings object."""
if 'sqlite' in self.SQLALCHEMY_DATABASE_URI:
if "sqlite" in self.SQLALCHEMY_DATABASE_URI:
if not self.TESTING:
log.warning(f"using SQLite DB at {self.SQLALCHEMY_DATABASE_URI}")
self.SQLALCHEMY_MAX_OVERFLOW = None
self.SQLALCHEMY_POOL_SIZE = None

if (os.environ.get("FLASK_ENV", False) == "production"
and "sqlite" in self.SQLALCHEMY_DATABASE_URI):
if (
os.environ.get("FLASK_ENV", False) == "production"
and "sqlite" in self.SQLALCHEMY_DATABASE_URI
):
warnings.warn(
"Using sqlite in BROWSE_SQLALCHEMY_DATABASE_URI in production environment"
)

if self.DOCUMENT_ORIGNAL_VERSIONS_PATH.startswith("gs://") and \
self.DOCUMENT_LATEST_VERSIONS_PATH.startswith("gs://"):
self.FS_TZ = "UTC"
log.warning("Switching FS_TZ to UTC since DOCUMENT_LATEST_VERSIONS_PATH "
"and DOCUMENT_ORIGINAL_VERSIONS_PATH are Google Storage")
if os.environ.get('GOOGLE_APPLICATION_CREDENTIALS', ''):
log.warning("GOOGLE_APPLICATION_CREDENTIALS is set")
else:
log.warning("GOOGLE_APPLICATION_CREDENTIALS is not set")

if ("fs_docs" in str(type(self.DOCUMENT_ABSTRACT_SERVICE)) and
"fs_listing" in str(type(self.DOCUMENT_LISTING_PATH)) and
self.DOCUMENT_LATEST_VERSIONS_PATH != self.DOCUMENT_LISTING_PATH):
log.warning(f"Unexpected: using FS listings and abs sevice but FS don't match. "
"latest abs at {self.DOCUMENT_LATEST_VERSIONS_PATH} "
f"but listings at {self.DOCUMENT_LISTING_PATH}")
if self.DOCUMENT_ORIGNAL_VERSIONS_PATH.startswith(
"gs://"
) and self.DOCUMENT_LATEST_VERSIONS_PATH.startswith("gs://"):
self.FS_TZ = "UTC"
log.warning(
"Switching FS_TZ to UTC since DOCUMENT_LATEST_VERSIONS_PATH "
"and DOCUMENT_ORIGINAL_VERSIONS_PATH are Google Storage"
)
if os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""):
log.warning("GOOGLE_APPLICATION_CREDENTIALS is set")
else:
log.warning("GOOGLE_APPLICATION_CREDENTIALS is not set")

if (
"fs_docs" in str(type(self.DOCUMENT_ABSTRACT_SERVICE))
and "fs_listing" in str(type(self.DOCUMENT_LISTING_PATH))
and self.DOCUMENT_LATEST_VERSIONS_PATH != self.DOCUMENT_LISTING_PATH
):
log.warning(
f"Unexpected: using FS listings and abs sevice but FS don't match. "
"latest abs at {self.DOCUMENT_LATEST_VERSIONS_PATH} "
f"but listings at {self.DOCUMENT_LISTING_PATH}"
)
7 changes: 3 additions & 4 deletions browse/controllers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,12 @@ def check_supplied_identifier(id: Identifier, route: str) -> Optional[Response]:

arxiv_id = id.idv if id.has_version else id.id
redirect_url: str = url_for(route, arxiv_id=arxiv_id)
return {},\
status.MOVED_PERMANENTLY,\
{'Location': redirect_url}

return {}, status.MOVED_PERMANENTLY, {"Location": redirect_url}


_arxiv_biz_tz = None


def biz_tz() -> ZoneInfo:
global _arxiv_biz_tz
if _arxiv_biz_tz is None:
Expand Down
Loading

0 comments on commit a0be306

Please sign in to comment.