Skip to content

Commit

Permalink
Merge pull request #20 from mrchrisadams/ca-issue-19-double-quote-fal…
Browse files Browse the repository at this point in the history
…lback

Add fallback exception when double quotes are used - see issue #19
  • Loading branch information
cldellow committed Jul 22, 2023
2 parents 231421b + 5b79d0a commit 48d5c40
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 4 deletions.
12 changes: 12 additions & 0 deletions .gitpod.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# This configuration file was automatically generated by Gitpod.
# Please adjust to your needs (see https://www.gitpod.io/docs/introduction/learn-gitpod/gitpod-yaml)
# and commit this file to your remote git repository to share the goodness with others.

# Learn more from ready-to-use templates: https://www.gitpod.io/docs/introduction/getting-started/quickstart

tasks:
- name: Set up environment for development
init: |
python -m venv venv
source venv/bin/activate
pip install -e '.[test]'
23 changes: 23 additions & 0 deletions datasette_parquet/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
class DoubleQuoteForLiteraValue(Exception):
"""
DuckDB follows the SQL standard more closely than SQLite,
and as a result, is stricter about the use of double quotes
to wrap literal values.
Thrown when a literal value to compare against is wrapped in
double quotes instead of single quotes.
For more info see the sqlite docs:
https://www.sqlite.org/quirks.html#double_quoted_string_literals_are_accepted
"""
def __init__(self, matches, message=None):
if message:
self.message = message
else:
self.message = (
"It looks like you are using a double quoted string "
f"for a value at: {matches[0]}. "
"To make this work with DuckDB, wrap it in single quoted "
"strings instead."
)
super().__init__(matches)
53 changes: 49 additions & 4 deletions datasette_parquet/winging_it.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,37 @@
import time
import re
import typing

from duckdb import BinderException

from .rewrite import rewrite, NO_OP_SQL
from . import exceptions


# a regular expression to find the literal wrapped by the double quoted string
# GOOD for DuckDB, works fine:
# `WHERE column = 'some value'`
#
# BAD for DuckDB, triggers duckdb.BinderException:
# `WHERE column = "some value"`
DOUBLE_QUOTES_AROUND_LITERALS_PATTERN = r'(?<![A-Za-z0-9_])"(?:[^"]*(?:"[^"]*)*[^"]*)"(?![A-Za-z0-9_])' # noqa

def find_matching_double_quote_usage(ex: BinderException) -> typing.List[str]:
"""
Accepts a BinderException from DuckDB, and returns a list of
matching strings in double quotes
"""
# should return 'Binder Error: Referenced column "LITERAL_IN_DOUBLE_QUOTES" '
referenced_column_message = ex.args[0].split(' not found in FROM clause')[0]
pattern = DOUBLE_QUOTES_AROUND_LITERALS_PATTERN

# find the offending values wrapped in double quotes
matches = re.findall(pattern, referenced_column_message)
return matches


# A collection of classes to provide a facade that mimics the sqlite3 DB-API
# interface.

class Row:
def __init__(self, columns, tpl):
self.columns = columns
Expand Down Expand Up @@ -62,7 +90,16 @@ def execute(self, sql, parameters=None):

#print('## params={} sql={}'.format(parameters, sql))
t = time.time()
rv = self.cursor.execute(sql, parameters)
try:
rv = self.cursor.execute(sql, parameters)
except BinderException as ex:
matches = find_matching_double_quote_usage(ex)
if matches:
raise exceptions.DoubleQuoteForLiteraValue(matches)
else:
# continue raising the original BinderException
raise

#print('took {}'.format(time.time() - t))
return rv

Expand Down Expand Up @@ -128,8 +165,16 @@ def execute(self, sql, parameters=None):
#print('! rewritten sql={}'.format(sql))
sql, parameters = fixup_params(sql, parameters)
#print('!! params={} sql={}'.format(parameters, sql))
rv = self.conn.execute(sql, parameters)

try:
rv = self.conn.execute(sql, parameters)
except BinderException as ex:
matches = find_matching_double_quote_usage(ex)
if matches:
raise exceptions.DoubleQuoteForLiteraValue(matches)
else:
# continue raising the original BinderException
raise

return ProxyCursor(self.conn, rv)

def fetchall(self):
Expand Down
14 changes: 14 additions & 0 deletions tests/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pytest
import duckdb
from datasette_parquet.winging_it import ProxyConnection
from datasette_parquet import exceptions

@pytest.fixture(scope="session")
def datasette():
Expand Down Expand Up @@ -71,3 +72,16 @@ def test_fetchone():
conn = ProxyConnection(raw_conn)
fetched = conn.execute('SELECT 1 AS col').fetchone()
assert fetched['col'] == 1


@pytest.mark.asyncio
def test_catch_double_quote_usage_for_literal(datasette):

raw_conn = duckdb.connect()
conn = ProxyConnection(raw_conn)

# try reading the parquet file in trove/userdata1.parquet
explodey_string_with_double_quotes = 'SELECT * from "./trove/userdata1.parquet" WHERE first_name = "Amanda"'

with pytest.raises(exceptions.DoubleQuoteForLiteraValue):
result = conn.execute(explodey_string_with_double_quotes).fetchall()

0 comments on commit 48d5c40

Please sign in to comment.