Skip to content

Commit

Permalink
Merge pull request #61 from FlorisCalkoen/fix/feb-release
Browse files Browse the repository at this point in the history
Fix add column
  • Loading branch information
cholmes authored Dec 2, 2024
2 parents 270af67 + 113524c commit 98bae66
Showing 1 changed file with 17 additions and 12 deletions.
29 changes: 17 additions & 12 deletions open_buildings/overture/add_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,17 @@
# parquet to geoparquet.


import glob
import os
import duckdb
import time
import tempfile
import shutil
import subprocess
import glob
from duckdb.typing import *
import tempfile
import time

import duckdb
import mercantile
import shutil
from duckdb.typing import *


def lat_lon_to_quadkey(lat: DOUBLE, lon: DOUBLE, level: INTEGER) -> VARCHAR:
# Convert latitude and longitude to tile using mercantile
Expand Down Expand Up @@ -44,6 +46,7 @@ def add_quadkey(con):
);
""")


def add_country_iso(con, country_parquet_path):
# Load country parquet file into duckdb
con.execute(f"CREATE TABLE countries AS SELECT * FROM read_parquet('{country_parquet_path}')")
Expand Down Expand Up @@ -88,8 +91,8 @@ def process_parquet_file(input_parquet_path, output_folder, country_parquet_path

con.execute('LOAD spatial;')

# Load parquet file into duckdb
con.execute(f"CREATE TABLE buildings AS SELECT * FROM read_parquet('{input_parquet_path}')")
# NOTE: exclude names column because it's all NULL and causes InternalException: INTERNAL Error: Attempted to dereference unique_ptr that is NULL!
con.execute(f"CREATE OR REPLACE TABLE buildings AS SELECT * EXCLUDE(names) FROM read_parquet('{input_parquet_path}')")

if add_quadkey_option:
add_quadkey(con)
Expand Down Expand Up @@ -126,7 +129,9 @@ def process_parquet_files(input_path, output_folder, country_parquet_path, overw
process_parquet_file(input_path, output_folder, country_parquet_path, overwrite, add_quadkey_option, add_country_iso_option, verbose)

# Call the function - uncomment if you want to call this directly from python and put values in here.
#input_path = '/Volumes/fastdata/overture/s3-data/buildings/'
#output_folder = '/Volumes/fastdata/overture/refined-parquet/'
#country_parquet_path = '/Volumes/fastdata/overture/countries.parquet'
#process_parquet_files(input_path, output_folder, country_parquet_path, overwrite=False, add_quadkey_option=True, add_country_iso_option=True)
# OVERTURE_DIR = pathlib.Path('~/data/src/overture/2024-02-15-alpha.0').expanduser()
# OUT_DIR = pathlib.Path('~/data/prc/overture/2024-02-15')
# ADMIN_BOUNDARIES_LEVEL_1_FP = pathlib.Path("~/data/prc/overture/2024-02-15/admin_boundaries_level_1.parquet")

# process_parquet_files(str(OVERTURE_DIR), str(OUT_DIR), str(ADMIN_BOUNDARIES_LEVEL_1_FP), overwrite=False, add_quadkey_option=True, add_country_iso_option=False)

0 comments on commit 98bae66

Please sign in to comment.