Skip to content

Commit

Permalink
ruff linting
Browse files Browse the repository at this point in the history
  • Loading branch information
tillwenke committed Jan 5, 2025
1 parent d7a4643 commit 1c26a33
Show file tree
Hide file tree
Showing 8 changed files with 995 additions and 170 deletions.
18 changes: 18 additions & 0 deletions .github/workflows/ruff_linting.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: ruff linting
on: push
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install ruff==0.6.9
# Update output format to enable automatic inline annotations.
- name: Run Ruff
run: ruff check --output-format=github .
24 changes: 24 additions & 0 deletions ruff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@

line-length = 120

[lint]
select = [
# pycodestyle
"E",
# Pyflakes
"F",
# pyupgrade
"UP",
# flake8-bugbear
"B",
# flake8-simplify
"SIM",
# isort
"I",
# pycodestyle
"D"
]

[lint.pydocstyle]
# Use Google-style docstrings.
convention = "google"
154 changes: 154 additions & 0 deletions visualization/predictors/gpmap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import geopandas as gpd
import matplotlib.colors as colors
import numpy as np
import pandas as pd
import rasterio
import rasterio.mask
import rasterio.plot
from matplotlib import cm
from matplotlib import pyplot as plt
from rasterio.control import GroundControlPoint as GCP
from rasterio.crs import CRS
from rasterio.transform import from_gcps
from shapely.geometry import Point, Polygon
from shapely.ops import unary_union
from shapely.validation import make_valid
from sklearn.base import BaseEstimator, RegressorMixin
from tqdm.auto import tqdm
import time
import os
import pickle
from utils.utils_data import get_points
from models import MapBasedModel
from utils.utils_models import fit_gpr_silent
import glob

class GPMap(MapBasedModel):
def __init__(self):
self.gpr_path = "models/kernel.pkl"
self.points_path = "dump.sqlite"

with open(self.gpr_path, "rb") as file:
self.gpr = pickle.load(file)

super().__init__(method=type(self.gpr).__name__, region="world", resolution=10, version="prod", verbose=False)

files = glob.glob(f"intermediate/map_{self.method}_{self.region}_{self.resolution}_{self.version}*.txt")
if len(files) == 0:
raise FileNotFoundError("No base map calculated so far.")
else:
latest_date = pd.Timestamp.min
for file in files:
date = pd.Timestamp(file.split("_")[-1].split(".")[0])
if date > latest_date:
latest_date = date
self.old_map_path = file

self.begin = latest_date

self.batch_size = 10000
self.today = pd.Timestamp("2024-3-30")
self.map_path = f"intermediate/map_{self.method}_{self.region}_{self.resolution}_{self.version}_{self.today.date()}.txt"

self.recalc_radius = 800000 # TODO: determine from model largest influence radius

def recalc_map(self):
"""Recalculate the map with the current Gaussian Process model.
Overrides the stored np.array raster of the map.
"""
# fit model to new data points

self.points = get_points(self.points_path, until=self.today)
self.points["lon"] = self.points.geometry.x
self.points["lat"] = self.points.geometry.y

X = self.points[["lon", "lat"]].values
y = self.points["wait"].values

self.gpr.regressor.optimizer = None
self.gpr = fit_gpr_silent(self.gpr, X, y)

# recalc the old map

self.raw_raster = np.loadtxt(self.old_map_path)

self.get_map_grid()
self.get_recalc_raster()

print("Compute pixels that are expected to differ...")
start = time.time()
to_predict = []
pixels_to_predict = []
for x, vertical_line in tqdm(
enumerate(self.grid.transpose()), total=len(self.grid.transpose())
):
for y, coords in enumerate(vertical_line):
if self.recalc_landmass[y][x] == 0:
continue
this_point = [float(coords[0]), float(coords[1])]
to_predict.append(this_point)
pixels_to_predict.append((y, x))
# batching the model calls
if len(to_predict) == self.batch_size:
prediction = model.predict(np.array(to_predict), return_std=False)
for i, (y, x) in enumerate(pixels_to_predict):
self.raw_raster[y][x] = prediction[i]

to_predict = []
pixels_to_predict = []

prediction = model.predict(np.array(to_predict), return_std=False)
for i, (y, x) in enumerate(pixels_to_predict):
self.raw_raster[y][x] = prediction[i]

print(f"Time elapsed to compute full map: {time.time() - start}")
print(
f"For map of shape: {self.raw_raster.shape} that is {self.raw_raster.shape[0] * self.raw_raster.shape[1]} pixels and an effective time per pixel of {(time.time() - start) / (self.raw_raster.shape[0] * self.raw_raster.shape[1])} seconds"
)
print((f"Only {recalc_landmass.sum()} pixels were recalculated. That is {recalc_landmass.sum() / (self.raw_raster.shape[0] * self.raw_raster.shape[1]) * 100}% of the map."))
print(f"And time per recalculated pixel was {(time.time() - start) / recalc_landmass.sum()} seconds")

np.savetxt(self.map_path, self.raw_raster)
self.save_as_raster()


def get_recalc_raster(self):
"""Creats 2d np.array of raster where only pixels that are 1 should be recalculated."""
def pixel_from_point(point) -> tuple[int, int]:
lats = map.Y.transpose()[0]
lat_index = None
for i, lat in enumerate(lats):
if lat >= point["lat"] and point["lat"] >= lats[i+1]:
lat_index = i
break

lons = map.X[0]
lon_index = None
for i, lon in enumerate(lons):
if lon <= point["lon"] and point["lon"] <= lons[i+1]:
lon_index = i
break

return (lat_index, lon_index)

recalc_radius_pixels = int(np.ceil(abs(self.recalc_radius / (self.grid[0][0][0] - self.grid[0][0][1]))))

self.recalc_raster = np.zeros(self.grid.shape[1:])
self.recalc_raster.shape
new_points = get_points(self.points_path, begin=self.begin, until=self.today)
for i, point in new_points.iterrows():
lat_pixel, lon_pixel = pixel_from_point(point)

for i in range(lat_pixel - recalc_radius_pixels, lat_pixel + recalc_radius_pixels):
for j in range(lon_pixel - recalc_radius_pixels, lon_pixel + recalc_radius_pixels):
if i < 0 or j < 0 or i >= self.recalc_raster.shape[0] or j >= self.recalc_raster.shape[1]:
continue
self.recalc_raster[i, j] = 1

print("Report reduction of rasters.")
print(self.recalc_raster.sum(), self.recalc_raster.shape[0] * self.recalc_raster.shape[1], self.recalc_raster.sum() / (self.recalc_raster.shape[0] * self.recalc_raster.shape[1]))
self.get_landmass_raster()
self.recalc_raster = self.recalc_raster * self.landmass_raster
print(self.landmass_raster.sum(), self.landmass_raster.shape[0] * self.landmass_raster.shape[1], self.landmass_raster.sum() / (self.landmass_raster.shape[0] * self.landmass_raster.shape[1]))
print(self.recalc_raster.sum(), self.recalc_raster.shape[0] * self.recalc_raster.shape[1], self.recalc_raster.sum() / (self.recalc_raster.shape[0] * self.recalc_raster.shape[1]))
Binary file removed visualization/predictors/intermediate/map_asia.png
Binary file not shown.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
130 changes: 76 additions & 54 deletions visualization/predictors/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
from sklearn.base import BaseEstimator, RegressorMixin
from tqdm.auto import tqdm
import time
import os
import pickle
from utils.utils_data import get_points

tqdm.pandas()

Expand Down Expand Up @@ -57,9 +60,12 @@ def __init__(
self.version = version
self.verbose = verbose

os.makedirs("temp", exist_ok=True)

self.map_boundary = self.get_map_boundary()
self.rasterio_path = f"intermediate/map_{self.method}_{self.region}_{self.resolution}_{self.version}.tif"
self.map_path = f"intermediate/map_{method}_{region}_{resolution}_{version}.txt"
self.landmass_path = "temp/landmass.tif"


def get_map_boundary(self):
Expand Down Expand Up @@ -119,7 +125,7 @@ def map_to_polygon(self):

return polygon

def save_as_raster(self):
def save_as_rasterio(self):
"""Saves as .tif raster for rasterio."""

polygon_vertices_x, polygon_vertices_y, pixel_width, pixel_height = (
Expand Down Expand Up @@ -170,64 +176,80 @@ def save_as_raster(self):
) as destination:
destination.write(self.raw_raster, 1)

self.raster = rasterio.open(self.rasterio_path)

return map
self.rasterio_raster = rasterio.open(self.rasterio_path)

def get_landmass_raster(self):
"""Creates raster of landmass as np.array"""
self.landmass_raster = np.ones(self.grid.shape[1:])

polygon_vertices_x, polygon_vertices_y, pixel_width, pixel_height = (
self.define_raster()
)

# handling special case when map spans over the 180 degree meridian
if polygon_vertices_x[0] > 0 and polygon_vertices_x[2] < 0:
polygon_vertices_x[2] = 2 * MERIDIAN + polygon_vertices_x[2]
polygon_vertices_x[3] = 2 * MERIDIAN + polygon_vertices_x[3]

# https://gis.stackexchange.com/questions/425903/getting-rasterio-transform-affine-from-lat-and-long-array

# lower/upper - left/right
ll = (polygon_vertices_x[0], polygon_vertices_y[0])
ul = (polygon_vertices_x[1], polygon_vertices_y[1]) # in lon, lat / x, y order
ur = (polygon_vertices_x[2], polygon_vertices_y[2])
lr = (polygon_vertices_x[3], polygon_vertices_y[3])
cols, rows = pixel_width, pixel_height

# ground control points
gcps = [
GCP(0, 0, *ul),
GCP(0, cols, *ur),
GCP(rows, 0, *ll),
GCP(rows, cols, *lr),
]

# seems to need the vertices of the map polygon
transform = from_gcps(gcps)

# cannot use np.float128 to write to tif
self.landmass_raster = self.landmass_raster.astype(np.float64)

# save the colored raster using the above transform
# important: rasterio requires [0,0] of the raster to be in the upper left corner and [rows, cols] in the lower right corner
# TODO find out why raster is getting smaller in x direction when stored as tif (e.g. 393x700 -> 425x700)
with rasterio.open(
self.landmass_path,
"w",
driver="GTiff",
height=self.landmass_raster.shape[0],
width=self.landmass_raster.shape[1],
count=1,
crs=CRS.from_epsg(3857),
transform=transform,
dtype=self.landmass_raster.dtype,
) as destination:
destination.write(self.landmass_raster, 1)

landmass_rasterio = rasterio.open(self.landmass_path)

nodata = 0

countries = gpd.read_file(
"map_features/countries/ne_110m_admin_0_countries.shp"
)
countries = countries.to_crs(epsg=3857)
countries = countries[countries.NAME != "Antarctica"]
country_shapes = countries.geometry
country_shapes = country_shapes.apply(lambda x: make_valid(x))
self.landmass_raster = np.zeros(map.grid.shape[1:])
for x, vertical_line in tqdm(enumerate(map.grid.transpose()), total=len(map.grid.transpose())):
for y, coords in enumerate(vertical_line):
this_point = Point(float(coords[0]), float(coords[1]))
self.landmass_raster[y][x] = 1 if any([country_shape.contains(this_point) for country_shape in country_shapes]) else 0

def get_recalc_raster(self):

def pixel_from_point(point) -> tuple[int, int]:
lats = map.Y.transpose()[0]
lat_index = None
for i, lat in enumerate(lats):
if lat >= point["lat"] and point["lat"] >= lats[i+1]:
lat_index = i
break

lons = map.X[0]
lon_index = None
for i, lon in enumerate(lons):
if lon <= point["lon"] and point["lon"] <= lons[i+1]:
lon_index = i
break

return (lat_index, lon_index)

recalc_radius = 800000 # TODO: determine from model largest influence radius
recalc_radius_pixels = int(np.ceil(abs(recalc_radius / (map.grid[0][0][0] - map.grid[0][0][1]))))

self.recalc_raster = np.zeros(map.grid.shape[1:])
self.recalc_raster.shape
for i, point in points.iterrows():
lat_pixel, lon_pixel = pixel_from_point(point)

for i in range(lat_pixel - recalc_radius_pixels, lat_pixel + recalc_radius_pixels):
for j in range(lon_pixel - recalc_radius_pixels, lon_pixel + recalc_radius_pixels):
if i < 0 or j < 0 or i >= self.recalc_raster.shape[0] or j >= self.recalc_raster.shape[1]:
continue
self.recalc_raster[i, j] = 1

print("Report reduction of rasters.")
print(self.recalc_raster.sum(), self.recalc_raster.shape[0] * self.recalc_raster.shape[1], self.recalc_raster.sum() / (self.recalc_raster.shape[0] * self.recalc_raster.shape[1]))
self.get_landmass_raster()
self.recalc_raster = self.recalc_raster * self.landmass_raster
print(self.landmass_raster.sum(), self.landmass_raster.shape[0] * self.landmass_raster.shape[1], self.landmass_raster.sum() / (self.landmass_raster.shape[0] * self.landmass_raster.shape[1]))
print(self.recalc_raster.sum(), self.recalc_raster.shape[0] * self.recalc_raster.shape[1], self.recalc_raster.sum() / (self.recalc_raster.shape[0] * self.recalc_raster.shape[1]))

out_image, out_transform = rasterio.mask.mask(
landmass_rasterio, country_shapes, nodata=nodata
)

self.landmass_raster = out_image[0]

# cleanup
os.remove(self.landmass_path)

def get_map_grid(self) -> np.array:
"""Create pixel grid for map."""
Expand Down Expand Up @@ -737,7 +759,7 @@ def fit(
recompute=True,
no_data_threshold=0.00000001,
):
self.raster = None
self.rasterio_raster = None
self.raw_raster: np.array = None

self.points = X
Expand Down Expand Up @@ -806,7 +828,7 @@ def fit(
np.savetxt(f"intermediate/map_{self.region}.txt", Z)

self.raw_raster = Z
self.save_as_raster()
self.save_as_rasterio()

return self

Expand All @@ -818,8 +840,8 @@ def predict(self, X):

for lon, lat in X:
# transform the lat/lon to the raster's coordinate system
x, y = self.raster.index(lon, lat)
x, y = self.rasterio_raster.index(lon, lat)
# read the raster at the given coordinates
predictions.append(self.raster.read(1)[x, y])
predictions.append(self.rasterio_raster.read(1)[x, y])

return np.array(predictions)
return np.array(predictions)
Loading

0 comments on commit 1c26a33

Please sign in to comment.