Skip to content

Commit

Permalink
Merge branch 'main' of github.com:cldellow/datasette-parquet into main
Browse files Browse the repository at this point in the history
  • Loading branch information
cldellow committed Mar 1, 2023
2 parents ff4c510 + fcfa34b commit f6e3cd8
Showing 1 changed file with 3 additions and 12 deletions.
15 changes: 3 additions & 12 deletions datasette_parquet/ddl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,14 @@
import json
from pathlib import Path

def sniff_json_columns(fname):
with open(fname) as f:
line = f.readline()
obj = json.loads(line)

return ', '.join(["json->>'{}' AS \"{}\"".format(x, x) for x in obj.keys()])

def view_for(view_name, fname, glob):
view_name = view_name.replace('.', '_')
if fname.endswith('.csv') or fname.endswith('.tsv'):
if fname.endswith(('.csv', '.tsv')):
return "CREATE VIEW \"{}\" AS SELECT * FROM read_csv_auto('{}', header=true)".format(view_name, glob)
elif fname.endswith('.parquet'):
return "CREATE VIEW \"{}\" AS SELECT * FROM '{}'".format(view_name, glob)
elif fname.endswith('.ndjson') or fname.endswith('.jsonl'):
# We need to sniff the first row of the file in order to build a good view
columns = sniff_json_columns(fname)
return "CREATE VIEW \"{}\" AS SELECT {} FROM read_json_objects('{}')".format(view_name, columns, glob)
elif fname.endswith(('.ndjson', '.jsonl')):
return "CREATE VIEW \"{}\" AS SELECT * FROM read_ndjson_auto('{}')".format(view_name, glob)

def create_views(dirname):
rv = []
Expand Down

0 comments on commit f6e3cd8

Please sign in to comment.