Skip to content

Commit

Permalink
[FEAT] Check field types in database emulation (#4862)
Browse files Browse the repository at this point in the history
There is currently a problem with a table that has an index on a certain field, but is trying to insert values of a type not matching that index. It fails in Dynamo, which requires that all values used as index keys are of a single declared type (either string, number or binary).

However, the local emulation layer doesn't catch that, because it doesn't know anything about types that need to be checked.

Add a required `types` argument to `Table`, which must declare at least the key fields (of the table itself and of the indexes), and can optionally declare the types of other fields. It will be an error to insert values of the wrong type into the database.

This may look like an easy feature, but it has some edge cases you need to be aware of:

* First of all, it relies on users declaring the indexes in code; they can choose NOT to declare the indexes in code, and there may still be an error after deploying.
* Second of all: just because you declare a type on a fields saying what may be inserted into a table going forward, this can not make any guarantees on data already in the table. Don't use this feature to make any assumptions on data you read out of the table, only use it to sanity-check new data going into it!

We may decide this change is not worth it... (for example, it would not have caught the current problem, as the index was there in Dynamo but not declared in code, so this feature wouldn't have caught it).
  • Loading branch information
rix0rrr authored Jun 12, 2024
1 parent 177e1b6 commit 16f1246
Show file tree
Hide file tree
Showing 7 changed files with 686 additions and 118 deletions.
12 changes: 0 additions & 12 deletions testddb.py

This file was deleted.

135 changes: 87 additions & 48 deletions tests/test_dynamo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from unittest import mock

from website import dynamo
from website.dynamo import Optional


class Helpers:
Expand Down Expand Up @@ -41,7 +42,9 @@ def get_pages(self, key, **kwargs):

class TestDynamoAbstraction(unittest.TestCase, Helpers):
def setUp(self):
self.table = dynamo.Table(dynamo.MemoryStorage(), 'table', 'id')
self.table = dynamo.Table(dynamo.MemoryStorage(), 'table', 'id', types={
'id': str,
})

def test_set_manipulation(self):
"""Test that adding to a set and removing from a set works."""
Expand Down Expand Up @@ -84,16 +87,38 @@ def test_cannot_set_manipulate_lists(self):
def test_sets_are_serialized_properly(self):
"""Test that adding to a set and removing from a set works."""
with with_clean_file('test.json'):
table = dynamo.Table(dynamo.MemoryStorage('test.json'), 'table', 'id')
table = dynamo.Table(dynamo.MemoryStorage('test.json'), 'table', 'id',
types={'id': str})
table.create(dict(
id='key',
values=set(['a', 'b', 'c']),
))

table = dynamo.Table(dynamo.MemoryStorage('test.json'), 'table', 'id')
table = dynamo.Table(dynamo.MemoryStorage('test.json'), 'table', 'id',
types={'id': str})
final = table.get(dict(id='key'))
self.assertEqual(final['values'], set(['a', 'b', 'c']))

def test_set_validation(self):
"""Test that adding to a set and removing from a set validates the elements."""
self.table = dynamo.Table(dynamo.MemoryStorage(), 'table', 'id', types={
'id': str,
'values': dynamo.SetOf(str),
})
self.table.create(dict(
id='key',
values=set(['a', 'b', 'c']),
))

self.table.update(dict(id='key'), dict(
values=dynamo.DynamoAddToStringSet('x', 'y'),
))
# This should not have thrown an exception
with self.assertRaises(ValueError):
self.table.update(dict(id='key'), dict(
values=dynamo.DynamoAddToStringSet(3),
))

def test_batch_get(self):
self.insert(
dict(id='k1', bla=1),
Expand Down Expand Up @@ -189,7 +214,10 @@ def setUp(self):
dynamo.MemoryStorage(),
'table',
partition_key='id',
sort_key='sort')
sort_key='sort', types={
'id': str,
'sort': str,
})

def test_put_and_get(self):
self.table.create(dict(
Expand Down Expand Up @@ -231,6 +259,14 @@ def setUp(self):
'table',
partition_key='id',
sort_key='sort',
types={
'id': str,
'sort': int,
'x': Optional(int),
'y': Optional(int),
'm': Optional(int),
'n': Optional(int),
},
indexes=[
dynamo.Index(
'x',
Expand All @@ -240,14 +276,14 @@ def setUp(self):
])

def test_query(self):
self.table.create({'id': 'key', 'sort': 1, 'm': 'val'})
self.table.create({'id': 'key', 'sort': 2, 'm': 'another'})
self.table.create({'id': 'key', 'sort': 1, 'm': 999})
self.table.create({'id': 'key', 'sort': 2, 'm': 888})

ret = list(self.table.get_many({'id': 'key'}))

self.assertEqual(ret, [
{'id': 'key', 'sort': 1, 'm': 'val'},
{'id': 'key', 'sort': 2, 'm': 'another'}
{'id': 'key', 'sort': 1, 'm': 999},
{'id': 'key', 'sort': 2, 'm': 888}
])

def test_cant_use_array_for_indexed_field(self):
Expand All @@ -259,93 +295,93 @@ def test_cant_use_array_for_partition(self):
self.table.create({'id': [1, 2]})

def test_query_with_filter(self):
self.table.create({'id': 'key', 'sort': 1, 'm': 'val'})
self.table.create({'id': 'key', 'sort': 2, 'm': 'another'})
self.table.create({'id': 'key', 'sort': 1, 'm': 999})
self.table.create({'id': 'key', 'sort': 2, 'm': 888})

ret = list(self.table.get_many({'id': 'key'}, filter={'m': 'another'}))
ret = list(self.table.get_many({'id': 'key'}, filter={'m': 888}))

self.assertEqual(ret, [
{'id': 'key', 'sort': 2, 'm': 'another'}
{'id': 'key', 'sort': 2, 'm': 888}
])

def test_between_query(self):
self.table.create({'id': 'key', 'sort': 1, 'x': 'x'})
self.table.create({'id': 'key', 'sort': 2, 'x': 'y'})
self.table.create({'id': 'key', 'sort': 3, 'x': 'z'})
self.table.create({'id': 'key', 'sort': 1, 'x': 1})
self.table.create({'id': 'key', 'sort': 2, 'x': 2})
self.table.create({'id': 'key', 'sort': 3, 'x': 3})

ret = list(self.table.get_many({
'id': 'key',
'sort': dynamo.Between(2, 5),
}))
self.assertEqual(ret, [
{'id': 'key', 'sort': 2, 'x': 'y'},
{'id': 'key', 'sort': 3, 'x': 'z'},
{'id': 'key', 'sort': 2, 'x': 2},
{'id': 'key', 'sort': 3, 'x': 3},
])

def test_no_superfluous_keys(self):
with self.assertRaises(ValueError):
self.table.get_many({'m': 'some_value', 'Z': 'some_other_value'})

def test_query_index(self):
self.table.create({'id': 'key', 'sort': 1, 'm': 'val'})
self.table.create({'id': 'key', 'sort': 2, 'm': 'another'})
self.table.create({'id': 'key', 'sort': 1, 'm': 999})
self.table.create({'id': 'key', 'sort': 2, 'm': 888})

ret = list(self.table.get_many({'m': 'val'}))
ret = list(self.table.get_many({'m': 999}))

self.assertEqual(ret, [
{'id': 'key', 'sort': 1, 'm': 'val'}
{'id': 'key', 'sort': 1, 'm': 999}
])

def test_query_index_with_filter(self):
self.table.create({'id': 'key', 'sort': 1, 'm': 'val', 'p': 'p'})
self.table.create({'id': 'key', 'sort': 3, 'm': 'val', 'p': 'p'})
self.table.create({'id': 'key', 'sort': 2, 'm': 'another', 'q': 'q'})
self.table.create({'id': 'key', 'sort': 1, 'm': 999, 'p': 'p'})
self.table.create({'id': 'key', 'sort': 3, 'm': 999, 'p': 'p'})
self.table.create({'id': 'key', 'sort': 2, 'm': 888, 'q': 'q'})

ret = list(self.table.get_many({'m': 'val'}, filter={'p': 'p'}))
ret = list(self.table.get_many({'m': 999}, filter={'p': 'p'}))

self.assertEqual(ret, [
{'id': 'key', 'sort': 1, 'm': 'val', 'p': 'p'},
{'id': 'key', 'sort': 3, 'm': 'val', 'p': 'p'},
{'id': 'key', 'sort': 1, 'm': 999, 'p': 'p'},
{'id': 'key', 'sort': 3, 'm': 999, 'p': 'p'},
])

def test_query_index_with_partition_key(self):
self.table.create({'id': 'key', 'sort': 1, 'x': 'val', 'y': 0})
self.table.create({'id': 'key', 'sort': 2, 'x': 'val', 'y': 1})
self.table.create({'id': 'key', 'sort': 3, 'x': 'val', 'y': 1})
self.table.create({'id': 'key', 'sort': 4, 'x': 'another_val', 'y': 2})
self.table.create({'id': 'key', 'sort': 1, 'x': 1, 'y': 0})
self.table.create({'id': 'key', 'sort': 2, 'x': 1, 'y': 1})
self.table.create({'id': 'key', 'sort': 3, 'x': 1, 'y': 1})
self.table.create({'id': 'key', 'sort': 4, 'x': 2, 'y': 2})

ret = list(self.table.get_many({'x': 'val'}))
ret = list(self.table.get_many({'x': 1}))

self.assertEqual(ret, [
{'id': 'key', 'sort': 1, 'x': 'val', 'y': 0},
{'id': 'key', 'sort': 2, 'x': 'val', 'y': 1},
{'id': 'key', 'sort': 3, 'x': 'val', 'y': 1}
{'id': 'key', 'sort': 1, 'x': 1, 'y': 0},
{'id': 'key', 'sort': 2, 'x': 1, 'y': 1},
{'id': 'key', 'sort': 3, 'x': 1, 'y': 1}
])

def test_query_index_with_partition_sort_key(self):
self.table.create({'id': 'key', 'sort': 1, 'x': 'val', 'y': 0})
self.table.create({'id': 'key', 'sort': 2, 'x': 'val', 'y': 1})
self.table.create({'id': 'key', 'sort': 3, 'x': 'val', 'y': 1})
self.table.create({'id': 'key', 'sort': 4, 'x': 'another_val', 'y': 2})
self.table.create({'id': 'key', 'sort': 1, 'x': 1, 'y': 0})
self.table.create({'id': 'key', 'sort': 2, 'x': 1, 'y': 1})
self.table.create({'id': 'key', 'sort': 3, 'x': 1, 'y': 1})
self.table.create({'id': 'key', 'sort': 4, 'x': 2, 'y': 2})

ret = list(self.table.get_many({'x': 'val', 'y': 1}))
ret = list(self.table.get_many({'x': 1, 'y': 1}))

self.assertEqual(ret, [
{'id': 'key', 'sort': 2, 'x': 'val', 'y': 1},
{'id': 'key', 'sort': 3, 'x': 'val', 'y': 1}
{'id': 'key', 'sort': 2, 'x': 1, 'y': 1},
{'id': 'key', 'sort': 3, 'x': 1, 'y': 1}
])

def test_query_index_sort_key_between(self):
self.table.create({'id': 'key', 'sort': 1, 'x': 'val', 'y': 1})
self.table.create({'id': 'key', 'sort': 2, 'x': 'val', 'y': 3})
self.table.create({'id': 'key', 'sort': 3, 'x': 'val', 'y': 6})
self.table.create({'id': 'key', 'sort': 1, 'x': 1, 'y': 1})
self.table.create({'id': 'key', 'sort': 2, 'x': 1, 'y': 3})
self.table.create({'id': 'key', 'sort': 3, 'x': 1, 'y': 6})

ret = list(self.table.get_many({
'x': 'val',
'x': 1,
'y': dynamo.Between(2, 5),
}))
self.assertEqual(ret, [
{'id': 'key', 'sort': 2, 'x': 'val', 'y': 3}
{'id': 'key', 'sort': 2, 'x': 1, 'y': 3}
])

def test_paginated_query(self):
Expand Down Expand Up @@ -567,7 +603,10 @@ def setUp(self):
''),
'table',
partition_key='id',
sort_key='sort')
sort_key='sort', types={
'id': str,
'sort': int,
})

self.db.query.return_value = {'Items': []}

Expand Down
Loading

0 comments on commit 16f1246

Please sign in to comment.