SEED-platform · nllong · May 8, 2024 · May 8, 2024 · May 8, 2024
diff --git a/.cspell.json b/.cspell.json
@@ -4,6 +4,7 @@
   "words": [
     "autoload",
     "geodataframe",
+    "iloc",
     "notna",
     "quadkey",
     "quadkeys",

diff --git a/pyproject.toml b/pyproject.toml
@@ -29,31 +29,5 @@ pre-commit = "^3.7.0"
 requires = ["poetry-core", "setuptools", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
-# https://docs.astral.sh/ruff/tutorial/#configuration
-[tool.ruff]
-fix = true # automatically fix problems if possible
-line-length = 120
-
-# https://docs.astral.sh/ruff/linter/#rule-selection
-[tool.ruff.lint]
-extend-select = ["RUF", "E", "F", "I", "UP", "N", "S", "BLE", "A", "C4", "T10", "ISC", "ICN", "PT",
-"Q", "SIM", "TID", "ARG", "DTZ", "PD", "PGH", "PLC", "PLE", "PLR", "PLW", "PIE", "COM"] # Enable these rules
-ignore = ["PLR0913", "PLR2004", "PLR0402", "COM812", "COM819", "SIM108", "ARG002", "ISC001", "S603", "S607", "S701",
-          "PD901", "N806", "S113", "SIM115", "S324", "BLE001"] # except for these specific errors
-
-[tool.ruff.lint.pylint]
-# system_parameters.py has many file lookups that necessitate nested statements & branches
-# Raise the allowed limits the least possible amount https://docs.astral.sh/ruff/settings/#pylint-max-branches
-max-statements = 100
-max-branches = 50
-
-# https://docs.astral.sh/ruff/formatter/#configuration
-[tool.ruff.format]
-# quote-style = "double"
-
 [tool.ruff.lint.per-file-ignores]
 "tests/*" = ["S101"] # assert statements are allowed in tests, and paths are safe
-"geojson_modelica_translator/model_connectors/load_connectors/teaser.py" = ["PLR0915"]
-# Lots of statements in the teaser post-processing. Don't think we can avoid it.
-"geojson_modelica_translator/model_connectors/couplings/diagram.py" = ["E501"]
-# Only needed for _parse_coupling_graph docstring
diff --git a/ruff.toml b/ruff.toml
@@ -0,0 +1,94 @@
+fix = true
+line-length = 140
+
+[format]
+# preview = true
+docstring-code-format = true
+
+# https://docs.astral.sh/ruff/linter/#rule-selection
+[lint]
+# preview = true
+# Enable these rules
+extend-select = [
+    "A",   # flake8-builtins
+    "ARG", # flake8-unused-arguments
+    "BLE", # flake8-blind-except
+    "C4",  # flake8-comprehensions
+    "COM", # flake8-commas
+    # "DTZ", # flake8-datetimez
+    "E",   # Error
+    "F",   # Pyflakes
+    "I",   # isort
+    "ICN", # flake8-import-conventions
+    "ISC", # flake8-implicit-str-concat
+    "N",   # pep8-naming
+    "PD",  # pandas-vet
+    "PGH", # pygrep-hooks
+    "PIE", # flake8-pie
+    "PLC", # Pylint Convention
+    "PLE", # Pylint Error
+    "PLR", # Pylint Refactor
+    "PLW", # Pylint Warning
+    "PT",  # flake8-pytest-style
+    "Q",   # flake8-quotes
+    "RUF", # Ruff-specific rules
+    "S",   # flake8-bandit
+    "SIM", # flake8-simplify
+    "T10", # flake8-debugger
+    "TID", # flake8-tidy-imports
+    "UP",  # pyupgrade
+]
+# except for these specific errors
+ignore = [
+    "A001",    # builtin-variable-shadowing
+    "ARG001",  # unused-function-argument
+    "ARG002",  # unused-method-argument
+    "ARG003",  # unused-class-method-argument
+    "BLE001",  # blind-except
+    "COM812",  # missing-trailing-comma
+    "COM819",  # prohibited-trailing-comma
+    "E501",    # line-too-long
+    "ISC001",  # single-line-implicit-string-concatenation
+    "N806",    # non-lowercase-variable-in-function
+    "PLC1901", # compare-to-empty-string (preview)
+    "PLR0402", # manual-from-import
+    "PLR0911", # too-many-return-statements
+    "PLR0912", # too-many-branches
+    "PLR0913", # too-many-arguments
+    "PLR0915", # too-many-statements
+    "PLR2004", # magic-value-comparison
+    "PT009",   # pytest-unittest-assertion
+    "Q000",    # bad-quotes-inline-string
+    "RUF012",  # mutable-class-default
+    "S106",    # hardcoded-password-func-arg
+    "S113",    # Probable use of requests call without timeout
+    "SIM115",  # Use context handler for opening files
+    "S311",    # suspicious-non-cryptographic-random-usage
+    "S320",    # suspicious-xmle-tree-usage
+    "S324",    # Probable use of insecure hash functions in `hashlib`: `md5`
+    "S603",    # subprocess-without-shell-equals-true
+    "S607",    # start-process-with-partial-path
+    "S701",    # jinja2-autoescape-false
+    "SIM108",  # if-else-block-instead-of-if-exp
+]
+
+[lint.per-file-ignores]
+"**/tests/test_*" = [
+    "S101", # assert
+    "S105", # hardcoded-password-string
+    "S307", # suspicious-eval-usage
+]
+"seed/**/migrations/*" = [
+    "ARG001", # unused-function-argument
+    "RUF012", # mutable-class-default
+]
+"seed/models/__init__.py" = [
+    "F401", # unused-import
+    "F403", # undefined-local-with-import-star
+    "I001", # unsorted-imports
+]
+
+[lint.pylint]
+# Raise the allowed limits the least possible amount https://docs.astral.sh/ruff/settings/#pylint-max-branches
+max-statements = 58
+max-branches = 24
diff --git a/utils/chunk.py b/utils/chunk.py
@@ -6,6 +6,4 @@
 
 
 def chunk(full_list: list, chunk_size: int = 100):
-    return [
-        full_list[i * chunk_size : (i + 1) * chunk_size] for i in range((len(full_list) + chunk_size - 1) // chunk_size)
-    ]
+    return [full_list[i * chunk_size : (i + 1) * chunk_size] for i in range((len(full_list) + chunk_size - 1) // chunk_size)]
diff --git a/utils/geocode_addresses.py b/utils/geocode_addresses.py
@@ -44,9 +44,7 @@ def _process_result(result):
         for i in range(1, 7):
             if result.get("locations")[0].get(f"adminArea{i}Type") is None:
                 continue
-            admin_areas[result.get("locations")[0].get(f"adminArea{i}Type").lower()] = result.get("locations")[0].get(
-                f"adminArea{i}"
-            )
+            admin_areas[result.get("locations")[0].get(f"adminArea{i}Type").lower()] = result.get("locations")[0].get(f"adminArea{i}")
 
         return {
             "quality": quality,
@@ -81,15 +79,13 @@ def geocode_addresses(locations: list[Location], mapquest_api_key: str):
             # Catch invalid API key error before parsing the response
             if response.status_code == 401:
                 raise MapQuestAPIKeyError(
-                    "Failed geocoding property states due to MapQuest error. "
-                    "API Key is invalid with message: {response.content}."
+                    "Failed geocoding property states due to MapQuest error. " "API Key is invalid with message: {response.content}."
                 )
             results += response.json().get("results")
         except Exception as e:
             if response.status_code == 403:
                 raise MapQuestAPIKeyError(
-                    "Failed geocoding property states due to MapQuest error. "
-                    "Your MapQuest API Key is either invalid or at its limit."
+                    "Failed geocoding property states due to MapQuest error. " "Your MapQuest API Key is either invalid or at its limit."
                 )
             else:
                 raise e

diff --git a/utils/normalize_address.py b/utils/normalize_address.py
@@ -142,9 +142,7 @@ def normalize_address(address_val: str):
             normalized_address = _normalize_address_number(addr["AddressNumber"])
 
         if "StreetNamePreDirectional" in addr and addr["StreetNamePreDirectional"] is not None:
-            normalized_address = (
-                normalized_address + " " + _normalize_address_direction(addr["StreetNamePreDirectional"])
-            )
+            normalized_address = normalized_address + " " + _normalize_address_direction(addr["StreetNamePreDirectional"])
 
         if "StreetName" in addr and addr["StreetName"] is not None:
             normalized_address = normalized_address + " " + addr["StreetName"]
@@ -154,9 +152,7 @@ def normalize_address(address_val: str):
             normalized_address = normalized_address + " " + _normalize_address_post_type(addr["StreetNamePostType"])
 
         if "StreetNamePostDirectional" in addr and addr["StreetNamePostDirectional"] is not None:
-            normalized_address = (
-                normalized_address + " " + _normalize_address_direction(addr["StreetNamePostDirectional"])
-            )
+            normalized_address = normalized_address + " " + _normalize_address_direction(addr["StreetNamePostDirectional"])
 
         if "SubaddressType" in addr and addr["SubaddressType"] is not None:
             normalized_address = normalized_address + " " + _normalize_subaddress_type(addr["SubaddressType"])

diff --git a/utils/open_street_map.py b/utils/open_street_map.py
@@ -132,13 +132,7 @@ def get_node_coordinates(node_ids: list[int]):
 
         # Extract the latitude and longitude coordinates of the node from the response
         for element in data["elements"]:
-            if (
-                "type" in element
-                and element["type"] == "node"
-                and "id" in element
-                and "lat" in element
-                and "lon" in element
-            ):
+            if "type" in element and element["type"] == "node" and "id" in element and "lat" in element and "lon" in element:
                 lat = float(element["lat"])
                 lon = float(element["lon"])
                 # Check if coordinates are within valid range
@@ -260,9 +254,7 @@ def process_dataframe_for_osm_buildings(
 
     # check that the method is valid
     if method not in ["geometry_centroid", "osm_id", "lat_long"]:
-        raise ValueError(
-            f"Invalid processing method: {method}, must be one of ['geometry_centroid', 'osm_id', 'lat_long']"
-        )
+        raise ValueError(f"Invalid processing method: {method}, must be one of ['geometry_centroid', 'osm_id', 'lat_long']")
 
     results = []
     error_processing = []

diff --git a/utils/ubid.py b/utils/ubid.py
@@ -13,9 +13,7 @@
 def encode_ubid(geometry: Polygon) -> str:
     min_longitude, min_latitude, max_longitude, max_latitude = geometry.bounds
     centroid = geometry.centroid
-    ubid = encode(
-        min_latitude, min_longitude, max_latitude, max_longitude, centroid.y, centroid.x, codeLength=PAIR_CODE_LENGTH_
-    )
+    ubid = encode(min_latitude, min_longitude, max_latitude, max_longitude, centroid.y, centroid.x, codeLength=PAIR_CODE_LENGTH_)
     return ubid
 
 

diff --git a/utils/update_quadkeys.py b/utils/update_quadkeys.py
@@ -16,12 +16,12 @@ def update_quadkeys(quadkeys: list[int], save_directory: Path = Path("data/quadk
     Skip the download if it has already been downloaded, and it is up-to-date
     """
     save_directory.mkdir(parents=True, exist_ok=True)
-    df = pd.read_csv(save_directory / "dataset-links.csv")
+    df_update = pd.read_csv(save_directory / "dataset-links.csv")
 
     for quadkey in tqdm(quadkeys):
         download = True
         quadkey_file = save_directory / f"{quadkey}.geojsonl.gz"
-        rows = df[df["QuadKey"] == quadkey]
+        rows = df_update[df_update["QuadKey"] == quadkey]
         if rows.shape[0] == 1:
             url = rows.iloc[0]["Url"]
         elif rows.shape[0] > 1: