Formated with black and flake8 added to tests

arXiv · Oct 11, 2023 · a0be306 · a0be306
1 parent db828f0
commit a0be306
Show file tree

Hide file tree

Showing 83 changed files with 3,029 additions and 2,570 deletions.
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -28,12 +28,14 @@ jobs:
         python -m pip install --upgrade pip
         pip install flake8 pytest poetry
         poetry install
-    #- name: Lint with flake8
-    #  run: |
-    #    # stop the build if there are Python syntax errors or undefined names
-    #    flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-    #    # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-    #    flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Lint with flake8
+     run: |
+       # stop the build if there are Python syntax errors or undefined names
+       flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+       # ignore whitespace and other formating errors
+       # flake8 . --count --ignore=E1,E2,E3,E4,E5,W2,W3,W5 --max-line-length=127 --statistics
+       # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+       # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
     - name: Test with pytest
       run: |
         poetry run pytest tests
diff --git a/browse/commands/invalidate.py b/browse/commands/invalidate.py
@@ -1,6 +1,6 @@
 """Invalidates pages in the CDN."""
 import re
-from typing import Optional, Iterable, List
+from typing import List
 
 import click
 from flask import Blueprint
@@ -15,37 +15,47 @@
 
 
 @bp.cli.command(short_help="invalidates CDN for PDFs from a mailing")
-@click.argument("mailings",
-                # help="Invalidate all PDFs from these mailings. May have more than one. Format YYMMDD.",
-                nargs=-1)
+@click.argument(
+    "mailings",
+    # help="Invalidate all PDFs from these mailings. May have more than one. Format YYMMDD.",
+    nargs=-1,
+)
 @click.option("--project", default="arxiv-production")
-@click.option("--cdn", default="browse-arxiv-org-load-balancer2",
-              help="Url-map of the CDN. Find it with `gcloud compute url-maps list`"
-              )
-@click.option("-n", "--dry-run", "dry_run", is_flag=True,
-              help="Only display what paths would be invalidated.",
-              default=False)
-@click.option("-v", is_flag=True,
-              help="Verbose.",
-              default=False)
-def invalidate_mailings(project: str, cdn: str, mailings: List[str], dry_run: bool, v: bool) -> None:
+@click.option(
+    "--cdn",
+    default="browse-arxiv-org-load-balancer2",
+    help="Url-map of the CDN. Find it with `gcloud compute url-maps list`",
+)
+@click.option(
+    "-n",
+    "--dry-run",
+    "dry_run",
+    is_flag=True,
+    help="Only display what paths would be invalidated.",
+    default=False,
+)
+@click.option("-v", is_flag=True, help="Verbose.", default=False)
+def invalidate_mailings(
+    project: str, cdn: str, mailings: List[str], dry_run: bool, v: bool
+) -> None:
     """Invalidate CDN for PDFs in a mailing."""
     if not mailings:
         raise ValueError("mailing must not be empty.")
 
     mailings = [date for date in mailings if date]
-    if any([not re.match(r'\d{6}', mailing) for mailing in mailings]):
+    if any([not re.match(r"\d{6}", mailing) for mailing in mailings]):
         raise ValueError("mailings values must be like '230130'")
 
     paths: List[str] = []
     session: Session = db.session
     for mailing in mailings:
         if v:
             print(f"About to query for {mailing}")
-        papers = (session.query(NextMail.paper_id, NextMail.version)
-                  .filter(NextMail.mail_id == int(mailing)))
+        papers = session.query(NextMail.paper_id, NextMail.version).filter(
+            NextMail.mail_id == int(mailing)
+        )
 
-        nn = 0;
+        nn = 0
         for paper_id, version in papers.all():
             paths.append(f"/pdf/{paper_id}.pdf")
             paths.append(f"/pdf/{paper_id}v{version}.pdf")
@@ -55,13 +65,17 @@ def invalidate_mailings(project: str, cdn: str, mailings: List[str], dry_run: bo
             print(f"For {mailing} found {nn} papers.")
 
     if v:
-        print(f"{len(paths)} paths to invalidate. "
-              "Two for each paper. One with version and one without.")
+        print(
+            f"{len(paths)} paths to invalidate. "
+            "Two for each paper. One with version and one without."
+        )
 
     _invalidate(project, cdn, paths, dry_run=dry_run, v=v)
 
 
-def _invalidate(proj: str, cdn: str, paths: List[str], dry_run: bool = False, v: bool = False) -> None:
+def _invalidate(
+    proj: str, cdn: str, paths: List[str], dry_run: bool = False, v: bool = False
+) -> None:
     """Invalidates `paths` on `cdn` in `proj`."""
     paths.sort()
     if v:
@@ -78,22 +92,24 @@ def _invalidate(proj: str, cdn: str, paths: List[str], dry_run: bool = False, v:
             request = compute_v1.InvalidateCacheUrlMapRequest(
                 project=proj,
                 url_map=cdn,
-                cache_invalidation_rule_resource=
-                compute_v1.CacheInvalidationRule(
+                cache_invalidation_rule_resource=compute_v1.CacheInvalidationRule(
                     # host="*",
-                    path=path),
+                    path=path
+                ),
             )
             _invalidate_req(client, request)
             if v:
                 print(f"Invalidated {path}.")
 
 
 def _exception_pred(ex: Exception) -> bool:
-    return bool(ex and
-                (isinstance(ex, BaseException)
-                 or "rate limit exceeded" in str(ex).lower()))
+    return bool(
+        ex and (isinstance(ex, BaseException) or "rate limit exceeded" in str(ex).lower())
+    )
 
 
 @retry.Retry(predicate=_exception_pred)
-def _invalidate_req(client: compute_v1.UrlMapsClient, request: compute_v1.InvalidateCacheUrlMapRequest) -> None:
+def _invalidate_req(
+    client: compute_v1.UrlMapsClient, request: compute_v1.InvalidateCacheUrlMapRequest
+) -> None:
     client.invalidate_cache_unary(request=request)
diff --git a/browse/config.py b/browse/config.py
@@ -7,7 +7,7 @@
 from secrets import token_hex
 import warnings
 
-from typing import Optional, Dict, Any, List
+from typing import Optional, Dict, Any
 import logging
 
 from pydantic import SecretStr, PyObject, BaseSettings
@@ -16,7 +16,7 @@
 
 
 DEFAULT_DB = "sqlite:///../tests/data/browse.db"
-TESTING_LATEXML_DB = 'sqlite:///../tests/data/latexmldb.db'
+TESTING_LATEXML_DB = "sqlite:///../tests/data/latexmldb.db"
 
 
 class Settings(BaseSettings):
@@ -45,27 +45,27 @@ class Settings(BaseSettings):
     LATEXML_ENABLED: bool = False
     """Sets if LATEXML is enabled or not"""
 
-    LATEXML_BASE_URL: str = ''
+    LATEXML_BASE_URL: str = ""
     """Base GS bucket URL to find the HTML."""
 
-    LATEXML_DB_USER: str = ''
+    LATEXML_DB_USER: str = ""
     """DB username for latexml DB."""
 
-    LATEXML_DB_PASS: str = ''
+    LATEXML_DB_PASS: str = ""
     """DB password for latexml DB."""
 
-    LATEXML_DB_NAME: str = ''
+    LATEXML_DB_NAME: str = ""
     """DB name for latexml DB."""
 
-    LATEXML_INSTANCE_CONNECTION_NAME: str = ''
+    LATEXML_INSTANCE_CONNECTION_NAME: str = ""
     """GCP instance connection name of managed DB.
     ex. arxiv-xyz:us-central1:my-special-db
 
     If this is set, a TLS protected GCP connection will be used to connect to
     the latexml db. See
     https://cloud.google.com/sql/docs/postgres/connect-connectors#python_1"""
 
-    LATEXML_IP_TYPE: str = 'PUBLIC_IP'
+    LATEXML_IP_TYPE: str = "PUBLIC_IP"
     """If the GCP connection is public or private"""
 
     SQLALCHEMY_BINDS: Dict[str, Any] = {}
@@ -97,20 +97,28 @@ class Settings(BaseSettings):
 
     BROWSE_SITE_LABEL: str = "arXiv.org"
 
-    BROWSE_ANALYTICS_ENABLED: bool = bool(int(os.environ.get("BROWSE_ANALYTICS_ENABLED", "0")))
+    BROWSE_ANALYTICS_ENABLED: bool = bool(
+        int(os.environ.get("BROWSE_ANALYTICS_ENABLED", "0"))
+    )
     """Enable/disable web analytics, ie: Pendo, Piwik, geoip."""
 
-    BROWSE_USER_BANNER_ENABLED: bool = bool(int(os.environ.get("BROWSE_USER_BANNER_ENABLED", "0")))
+    BROWSE_USER_BANNER_ENABLED: bool = bool(
+        int(os.environ.get("BROWSE_USER_BANNER_ENABLED", "0"))
+    )
     """Enable/disable the user banner, the full width one, above the Cornell logo."""
 
-    BROWSE_MINIMAL_BANNER_ENABLED: bool = bool(int(os.environ.get("BROWSE_MINIMAL_BANNER_ENABLED", "0")))
+    BROWSE_MINIMAL_BANNER_ENABLED: bool = bool(
+        int(os.environ.get("BROWSE_MINIMAL_BANNER_ENABLED", "0"))
+    )
     """Enable/disable the banner to the right of the Cornell logo, before the donate button."""
 
-    BROWSE_SPECIAL_MESSAGE_ENABLED: bool = bool(int(os.environ.get("BROWSE_SPECIAL_MESSAGE_ENABLED", "0")))
+    BROWSE_SPECIAL_MESSAGE_ENABLED: bool = bool(
+        int(os.environ.get("BROWSE_SPECIAL_MESSAGE_ENABLED", "0"))
+    )
     """Enable/disable the cloud list item, in the arXiv News section, in home/special-message.html"""
 
-    ############################## Services ##############################
-    DOCUMENT_LISTING_SERVICE: PyObject = 'browse.services.listing.fs_listing'  # type: ignore
+    # ############################# Services ##############################
+    DOCUMENT_LISTING_SERVICE: PyObject = "browse.services.listing.fs_listing"  # type: ignore
     """What implementation to use for the listing service.
 
     Accepted values are
@@ -121,14 +129,13 @@ class Settings(BaseSettings):
     - `browse.services.listing.fake`: A totally fake set of listings for testing.
     """
 
-    DOCUMENT_LISTING_PATH: str = 'tests/data/abs_files/ftp'
+    DOCUMENT_LISTING_PATH: str = "tests/data/abs_files/ftp"
     """Path to get listing files from.
 
     This can start with gs:// to use Google Storage.
     Ex gs://arxiv-production-data/ftp."""
 
-
-    DOCUMENT_ABSTRACT_SERVICE: PyObject = 'browse.services.documents.fs_docs'  # type: ignore
+    DOCUMENT_ABSTRACT_SERVICE: PyObject = "browse.services.documents.fs_docs"  # type: ignore
     """Implementation to use for abstracts.
 
     Accepted values are:
@@ -148,17 +155,16 @@ class Settings(BaseSettings):
         This can start with gs:// to use Google Storage.
     """
 
-    DOCUMENT_CACHE_PATH: str =  "tests/data/cache"
+    DOCUMENT_CACHE_PATH: str = "tests/data/cache"
     """Path to cache directory"""
 
-    PREV_NEXT_SERVICE: PyObject = 'browse.services.prevnext.fsprevnext'  # type: ignore
+    PREV_NEXT_SERVICE: PyObject = "browse.services.prevnext.fsprevnext"  # type: ignore
     """Implementation of the prev/next service used for those features on the abs page.
 
     Currently the only value is `browse.services.prevnext.fsprevnext` This uses
        DOCUMENT_LATEST_VERSIONS_PATH and DOCUMENT_ORIGNAL_VERSIONS_PATH.
     """
 
-
     DISSEMINATION_STORAGE_PREFIX: str = "./tests/data/abs_files/"
     """Storage prefix to use. Ex gs://arxiv-production-data
 
@@ -169,7 +175,7 @@ class Settings(BaseSettings):
     `./testing/data/` for testing data. Must end with a /
     """
 
-    ######################### End of Services ###########################
+    # ######################## End of Services ###########################
 
     SHOW_EMAIL_SECRET: SecretStr = SecretStr(token_hex(10))
     """Used in linking to /show-email.
@@ -204,7 +210,7 @@ class Settings(BaseSettings):
     CLASSIC_SESSION_HASH: SecretStr = SecretStr(token_hex(10))
     SESSION_DURATION: int = 36000
 
-    ARXIV_BUSINESS_TZ: str = 'US/Eastern'
+    ARXIV_BUSINESS_TZ: str = "US/Eastern"
     """
     Timezone of the arxiv business offices.
     """
@@ -235,7 +241,6 @@ class Settings(BaseSettings):
     original file only in debug mode.
     """
 
-
     SECRET_KEY: str = "qwert2345"
 
     SESSION_COOKIE_NAME: str = "arxiv_browse"
@@ -388,38 +393,47 @@ class Config:
         """Additional pydantic config of these settings."""
 
         fields = {
-            'SQLALCHEMY_DATABASE_URI': {
-                'env': ['BROWSE_SQLALCHEMY_DATABASE_URI', 'CLASSIC_DATABASE_URI']
+            "SQLALCHEMY_DATABASE_URI": {
+                "env": ["BROWSE_SQLALCHEMY_DATABASE_URI", "CLASSIC_DATABASE_URI"]
             }
         }
 
     def check(self) -> None:
         """A check and fix up of a settings object."""
-        if 'sqlite' in self.SQLALCHEMY_DATABASE_URI:
+        if "sqlite" in self.SQLALCHEMY_DATABASE_URI:
             if not self.TESTING:
                 log.warning(f"using SQLite DB at {self.SQLALCHEMY_DATABASE_URI}")
             self.SQLALCHEMY_MAX_OVERFLOW = None
             self.SQLALCHEMY_POOL_SIZE = None
 
-        if (os.environ.get("FLASK_ENV", False) == "production"
-                and "sqlite" in self.SQLALCHEMY_DATABASE_URI):
+        if (
+            os.environ.get("FLASK_ENV", False) == "production"
+            and "sqlite" in self.SQLALCHEMY_DATABASE_URI
+        ):
             warnings.warn(
                 "Using sqlite in BROWSE_SQLALCHEMY_DATABASE_URI in production environment"
             )
 
-        if self.DOCUMENT_ORIGNAL_VERSIONS_PATH.startswith("gs://") and \
-           self.DOCUMENT_LATEST_VERSIONS_PATH.startswith("gs://"):
-           self.FS_TZ = "UTC"
-           log.warning("Switching FS_TZ to UTC since DOCUMENT_LATEST_VERSIONS_PATH "
-                       "and DOCUMENT_ORIGINAL_VERSIONS_PATH are Google Storage")
-           if os.environ.get('GOOGLE_APPLICATION_CREDENTIALS', ''):
-               log.warning("GOOGLE_APPLICATION_CREDENTIALS is set")
-           else:
-               log.warning("GOOGLE_APPLICATION_CREDENTIALS is not set")
-
-        if ("fs_docs" in str(type(self.DOCUMENT_ABSTRACT_SERVICE)) and
-            "fs_listing" in str(type(self.DOCUMENT_LISTING_PATH)) and
-            self.DOCUMENT_LATEST_VERSIONS_PATH != self.DOCUMENT_LISTING_PATH):
-            log.warning(f"Unexpected: using FS listings and abs sevice but FS don't match. "
-                        "latest abs at {self.DOCUMENT_LATEST_VERSIONS_PATH} "
-                        f"but listings at {self.DOCUMENT_LISTING_PATH}")
+        if self.DOCUMENT_ORIGNAL_VERSIONS_PATH.startswith(
+            "gs://"
+        ) and self.DOCUMENT_LATEST_VERSIONS_PATH.startswith("gs://"):
+            self.FS_TZ = "UTC"
+            log.warning(
+                "Switching FS_TZ to UTC since DOCUMENT_LATEST_VERSIONS_PATH "
+                "and DOCUMENT_ORIGINAL_VERSIONS_PATH are Google Storage"
+            )
+            if os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""):
+                log.warning("GOOGLE_APPLICATION_CREDENTIALS is set")
+            else:
+                log.warning("GOOGLE_APPLICATION_CREDENTIALS is not set")
+
+        if (
+            "fs_docs" in str(type(self.DOCUMENT_ABSTRACT_SERVICE))
+            and "fs_listing" in str(type(self.DOCUMENT_LISTING_PATH))
+            and self.DOCUMENT_LATEST_VERSIONS_PATH != self.DOCUMENT_LISTING_PATH
+        ):
+            log.warning(
+                f"Unexpected: using FS listings and abs sevice but FS don't match. "
+                "latest abs at {self.DOCUMENT_LATEST_VERSIONS_PATH} "
+                f"but listings at {self.DOCUMENT_LISTING_PATH}"
+            )
diff --git a/browse/controllers/__init__.py b/browse/controllers/__init__.py
@@ -34,13 +34,12 @@ def check_supplied_identifier(id: Identifier, route: str) -> Optional[Response]:
 
     arxiv_id = id.idv if id.has_version else id.id
     redirect_url: str = url_for(route, arxiv_id=arxiv_id)
-    return {},\
-        status.MOVED_PERMANENTLY,\
-        {'Location': redirect_url}
-
+    return {}, status.MOVED_PERMANENTLY, {"Location": redirect_url}
 
 
 _arxiv_biz_tz = None
+
+
 def biz_tz() -> ZoneInfo:
     global _arxiv_biz_tz
     if _arxiv_biz_tz is None: