From 16f1246b81f0f47759a6f0a8bab0e0aa9c452639 Mon Sep 17 00:00:00 2001 From: Rico Hermans Date: Wed, 12 Jun 2024 16:50:32 +0200 Subject: [PATCH] [FEAT] Check field types in database emulation (#4862) There is currently a problem with a table that has an index on a certain field, but is trying to insert values of a type not matching that index. It fails in Dynamo, which requires that all values used as index keys are of a single declared type (either string, number or binary). However, the local emulation layer doesn't catch that, because it doesn't know anything about types that need to be checked. Add a required `types` argument to `Table`, which must declare at least the key fields (of the table itself and of the indexes), and can optionally declare the types of other fields. It will be an error to insert values of the wrong type into the database. This may look like an easy feature, but it has some edge cases you need to be aware of: * First of all, it relies on users declaring the indexes in code; they can choose NOT to declare the indexes in code, and there may still be an error after deploying. * Second of all: just because you declare a type on a fields saying what may be inserted into a table going forward, this can not make any guarantees on data already in the table. Don't use this feature to make any assumptions on data you read out of the table, only use it to sanity-check new data going into it! We may decide this change is not worth it... (for example, it would not have caught the current problem, as the index was there in Dynamo but not declared in code, so this feature wouldn't have caught it). --- testddb.py | 12 -- tests/test_dynamo.py | 135 +++++++++++------ tests_e2e.py | 83 +++++++++-- utils.py | 2 +- website/database.py | 318 ++++++++++++++++++++++++++++++++++------ website/dynamo.py | 249 ++++++++++++++++++++++++++++++- website/for_teachers.py | 5 +- 7 files changed, 686 insertions(+), 118 deletions(-) delete mode 100644 testddb.py diff --git a/testddb.py b/testddb.py deleted file mode 100644 index 334b433eadc..00000000000 --- a/testddb.py +++ /dev/null @@ -1,12 +0,0 @@ - -import pprint -from website import dynamo, database - -storage = dynamo.AwsDynamoStorage.from_env() -if not storage: - raise RuntimeError('DDB not configure quickly') - -Q = dynamo.Table(storage, 'preferences', partition_key='id', sort_key='level') - -recs = database.USERS.get({'username': 'rix0rrr'}) -pprint.pprint(recs) diff --git a/tests/test_dynamo.py b/tests/test_dynamo.py index 4fb887b3b3d..0b338f888af 100644 --- a/tests/test_dynamo.py +++ b/tests/test_dynamo.py @@ -4,6 +4,7 @@ from unittest import mock from website import dynamo +from website.dynamo import Optional class Helpers: @@ -41,7 +42,9 @@ def get_pages(self, key, **kwargs): class TestDynamoAbstraction(unittest.TestCase, Helpers): def setUp(self): - self.table = dynamo.Table(dynamo.MemoryStorage(), 'table', 'id') + self.table = dynamo.Table(dynamo.MemoryStorage(), 'table', 'id', types={ + 'id': str, + }) def test_set_manipulation(self): """Test that adding to a set and removing from a set works.""" @@ -84,16 +87,38 @@ def test_cannot_set_manipulate_lists(self): def test_sets_are_serialized_properly(self): """Test that adding to a set and removing from a set works.""" with with_clean_file('test.json'): - table = dynamo.Table(dynamo.MemoryStorage('test.json'), 'table', 'id') + table = dynamo.Table(dynamo.MemoryStorage('test.json'), 'table', 'id', + types={'id': str}) table.create(dict( id='key', values=set(['a', 'b', 'c']), )) - table = dynamo.Table(dynamo.MemoryStorage('test.json'), 'table', 'id') + table = dynamo.Table(dynamo.MemoryStorage('test.json'), 'table', 'id', + types={'id': str}) final = table.get(dict(id='key')) self.assertEqual(final['values'], set(['a', 'b', 'c'])) + def test_set_validation(self): + """Test that adding to a set and removing from a set validates the elements.""" + self.table = dynamo.Table(dynamo.MemoryStorage(), 'table', 'id', types={ + 'id': str, + 'values': dynamo.SetOf(str), + }) + self.table.create(dict( + id='key', + values=set(['a', 'b', 'c']), + )) + + self.table.update(dict(id='key'), dict( + values=dynamo.DynamoAddToStringSet('x', 'y'), + )) + # This should not have thrown an exception + with self.assertRaises(ValueError): + self.table.update(dict(id='key'), dict( + values=dynamo.DynamoAddToStringSet(3), + )) + def test_batch_get(self): self.insert( dict(id='k1', bla=1), @@ -189,7 +214,10 @@ def setUp(self): dynamo.MemoryStorage(), 'table', partition_key='id', - sort_key='sort') + sort_key='sort', types={ + 'id': str, + 'sort': str, + }) def test_put_and_get(self): self.table.create(dict( @@ -231,6 +259,14 @@ def setUp(self): 'table', partition_key='id', sort_key='sort', + types={ + 'id': str, + 'sort': int, + 'x': Optional(int), + 'y': Optional(int), + 'm': Optional(int), + 'n': Optional(int), + }, indexes=[ dynamo.Index( 'x', @@ -240,14 +276,14 @@ def setUp(self): ]) def test_query(self): - self.table.create({'id': 'key', 'sort': 1, 'm': 'val'}) - self.table.create({'id': 'key', 'sort': 2, 'm': 'another'}) + self.table.create({'id': 'key', 'sort': 1, 'm': 999}) + self.table.create({'id': 'key', 'sort': 2, 'm': 888}) ret = list(self.table.get_many({'id': 'key'})) self.assertEqual(ret, [ - {'id': 'key', 'sort': 1, 'm': 'val'}, - {'id': 'key', 'sort': 2, 'm': 'another'} + {'id': 'key', 'sort': 1, 'm': 999}, + {'id': 'key', 'sort': 2, 'm': 888} ]) def test_cant_use_array_for_indexed_field(self): @@ -259,27 +295,27 @@ def test_cant_use_array_for_partition(self): self.table.create({'id': [1, 2]}) def test_query_with_filter(self): - self.table.create({'id': 'key', 'sort': 1, 'm': 'val'}) - self.table.create({'id': 'key', 'sort': 2, 'm': 'another'}) + self.table.create({'id': 'key', 'sort': 1, 'm': 999}) + self.table.create({'id': 'key', 'sort': 2, 'm': 888}) - ret = list(self.table.get_many({'id': 'key'}, filter={'m': 'another'})) + ret = list(self.table.get_many({'id': 'key'}, filter={'m': 888})) self.assertEqual(ret, [ - {'id': 'key', 'sort': 2, 'm': 'another'} + {'id': 'key', 'sort': 2, 'm': 888} ]) def test_between_query(self): - self.table.create({'id': 'key', 'sort': 1, 'x': 'x'}) - self.table.create({'id': 'key', 'sort': 2, 'x': 'y'}) - self.table.create({'id': 'key', 'sort': 3, 'x': 'z'}) + self.table.create({'id': 'key', 'sort': 1, 'x': 1}) + self.table.create({'id': 'key', 'sort': 2, 'x': 2}) + self.table.create({'id': 'key', 'sort': 3, 'x': 3}) ret = list(self.table.get_many({ 'id': 'key', 'sort': dynamo.Between(2, 5), })) self.assertEqual(ret, [ - {'id': 'key', 'sort': 2, 'x': 'y'}, - {'id': 'key', 'sort': 3, 'x': 'z'}, + {'id': 'key', 'sort': 2, 'x': 2}, + {'id': 'key', 'sort': 3, 'x': 3}, ]) def test_no_superfluous_keys(self): @@ -287,65 +323,65 @@ def test_no_superfluous_keys(self): self.table.get_many({'m': 'some_value', 'Z': 'some_other_value'}) def test_query_index(self): - self.table.create({'id': 'key', 'sort': 1, 'm': 'val'}) - self.table.create({'id': 'key', 'sort': 2, 'm': 'another'}) + self.table.create({'id': 'key', 'sort': 1, 'm': 999}) + self.table.create({'id': 'key', 'sort': 2, 'm': 888}) - ret = list(self.table.get_many({'m': 'val'})) + ret = list(self.table.get_many({'m': 999})) self.assertEqual(ret, [ - {'id': 'key', 'sort': 1, 'm': 'val'} + {'id': 'key', 'sort': 1, 'm': 999} ]) def test_query_index_with_filter(self): - self.table.create({'id': 'key', 'sort': 1, 'm': 'val', 'p': 'p'}) - self.table.create({'id': 'key', 'sort': 3, 'm': 'val', 'p': 'p'}) - self.table.create({'id': 'key', 'sort': 2, 'm': 'another', 'q': 'q'}) + self.table.create({'id': 'key', 'sort': 1, 'm': 999, 'p': 'p'}) + self.table.create({'id': 'key', 'sort': 3, 'm': 999, 'p': 'p'}) + self.table.create({'id': 'key', 'sort': 2, 'm': 888, 'q': 'q'}) - ret = list(self.table.get_many({'m': 'val'}, filter={'p': 'p'})) + ret = list(self.table.get_many({'m': 999}, filter={'p': 'p'})) self.assertEqual(ret, [ - {'id': 'key', 'sort': 1, 'm': 'val', 'p': 'p'}, - {'id': 'key', 'sort': 3, 'm': 'val', 'p': 'p'}, + {'id': 'key', 'sort': 1, 'm': 999, 'p': 'p'}, + {'id': 'key', 'sort': 3, 'm': 999, 'p': 'p'}, ]) def test_query_index_with_partition_key(self): - self.table.create({'id': 'key', 'sort': 1, 'x': 'val', 'y': 0}) - self.table.create({'id': 'key', 'sort': 2, 'x': 'val', 'y': 1}) - self.table.create({'id': 'key', 'sort': 3, 'x': 'val', 'y': 1}) - self.table.create({'id': 'key', 'sort': 4, 'x': 'another_val', 'y': 2}) + self.table.create({'id': 'key', 'sort': 1, 'x': 1, 'y': 0}) + self.table.create({'id': 'key', 'sort': 2, 'x': 1, 'y': 1}) + self.table.create({'id': 'key', 'sort': 3, 'x': 1, 'y': 1}) + self.table.create({'id': 'key', 'sort': 4, 'x': 2, 'y': 2}) - ret = list(self.table.get_many({'x': 'val'})) + ret = list(self.table.get_many({'x': 1})) self.assertEqual(ret, [ - {'id': 'key', 'sort': 1, 'x': 'val', 'y': 0}, - {'id': 'key', 'sort': 2, 'x': 'val', 'y': 1}, - {'id': 'key', 'sort': 3, 'x': 'val', 'y': 1} + {'id': 'key', 'sort': 1, 'x': 1, 'y': 0}, + {'id': 'key', 'sort': 2, 'x': 1, 'y': 1}, + {'id': 'key', 'sort': 3, 'x': 1, 'y': 1} ]) def test_query_index_with_partition_sort_key(self): - self.table.create({'id': 'key', 'sort': 1, 'x': 'val', 'y': 0}) - self.table.create({'id': 'key', 'sort': 2, 'x': 'val', 'y': 1}) - self.table.create({'id': 'key', 'sort': 3, 'x': 'val', 'y': 1}) - self.table.create({'id': 'key', 'sort': 4, 'x': 'another_val', 'y': 2}) + self.table.create({'id': 'key', 'sort': 1, 'x': 1, 'y': 0}) + self.table.create({'id': 'key', 'sort': 2, 'x': 1, 'y': 1}) + self.table.create({'id': 'key', 'sort': 3, 'x': 1, 'y': 1}) + self.table.create({'id': 'key', 'sort': 4, 'x': 2, 'y': 2}) - ret = list(self.table.get_many({'x': 'val', 'y': 1})) + ret = list(self.table.get_many({'x': 1, 'y': 1})) self.assertEqual(ret, [ - {'id': 'key', 'sort': 2, 'x': 'val', 'y': 1}, - {'id': 'key', 'sort': 3, 'x': 'val', 'y': 1} + {'id': 'key', 'sort': 2, 'x': 1, 'y': 1}, + {'id': 'key', 'sort': 3, 'x': 1, 'y': 1} ]) def test_query_index_sort_key_between(self): - self.table.create({'id': 'key', 'sort': 1, 'x': 'val', 'y': 1}) - self.table.create({'id': 'key', 'sort': 2, 'x': 'val', 'y': 3}) - self.table.create({'id': 'key', 'sort': 3, 'x': 'val', 'y': 6}) + self.table.create({'id': 'key', 'sort': 1, 'x': 1, 'y': 1}) + self.table.create({'id': 'key', 'sort': 2, 'x': 1, 'y': 3}) + self.table.create({'id': 'key', 'sort': 3, 'x': 1, 'y': 6}) ret = list(self.table.get_many({ - 'x': 'val', + 'x': 1, 'y': dynamo.Between(2, 5), })) self.assertEqual(ret, [ - {'id': 'key', 'sort': 2, 'x': 'val', 'y': 3} + {'id': 'key', 'sort': 2, 'x': 1, 'y': 3} ]) def test_paginated_query(self): @@ -567,7 +603,10 @@ def setUp(self): ''), 'table', partition_key='id', - sort_key='sort') + sort_key='sort', types={ + 'id': str, + 'sort': int, + }) self.db.query.return_value = {'Items': []} diff --git a/tests_e2e.py b/tests_e2e.py index b1b53c1c5b0..ce231f9c2b1 100644 --- a/tests_e2e.py +++ b/tests_e2e.py @@ -108,7 +108,7 @@ def assert_user_exists(self, username): body = { 'username': username, 'email': username + '@hedy.com', - 'language': 'nl', + 'language': 'en', 'keyword_language': 'en', 'agree_terms': 'yes', 'password': 'foobar', @@ -912,7 +912,12 @@ def test_invalid_public_profile(self): self.given_user_is_logged_in() # Create a program -> make sure it is not public - program = {'code': 'hello world', 'name': 'program 1', 'level': 1, 'shared': False} + program = {'code': 'print testing programs creation', + 'name': 'program 1', + 'level': 1, + 'adventure_name': 'default', + 'shared': False + } program_id = self.post_data('programs', program)['id'] # WHEN attempting to create a public profile with invalid bodies @@ -949,13 +954,19 @@ def test_public_profile_with_favourite(self): self.given_user_is_logged_in() # Create a program that is public -> can be set as favourite on the public profile - program = {'code': 'hello world', 'name': 'program 1', 'level': 1, 'shared': True} + program = {'code': 'print testing programs creation', + 'name': 'program 1', + 'level': 1, + 'adventure_name': 'default', + 'shared': True + } program_id = self.post_data('programs', program)['id'] public_profile = { 'image': '9', 'personal_text': 'welcome to my profile!', - 'favourite_program': program_id} + 'favourite_program': program_id + } # WHEN creating a new public profile with favourite program # THEN receive an OK response code from the server @@ -1035,7 +1046,13 @@ def test_create_program(self): self.given_fresh_user_is_logged_in() # WHEN submitting a valid program - program = {'code': 'hello world', 'name': 'program 1', 'level': 1, 'shared': False} + program = { + 'code': 'print testing programs creation', + 'name': 'program 1', + 'level': 1, + 'adventure_name': 'default', + 'shared': False + } # THEN receive an OK response code from the server program = self.post_data('programs', program) # THEN verify that the returned program has both a name and an id @@ -1074,7 +1091,13 @@ def test_invalid_make_program_public(self): def test_valid_make_program_public(self): # GIVEN a logged in user with at least one program self.given_user_is_logged_in() - program = {'code': 'hello world', 'name': 'program 1', 'level': 1, 'shared': False} + program = { + 'code': 'print testing programs creation', + 'name': 'program 1', + 'level': 1, + 'adventure_name': 'default', + 'shared': False + } program_id = self.post_data('programs', program)['id'] # WHEN making a program public @@ -1097,7 +1120,13 @@ def test_valid_make_program_public(self): def test_valid_make_program_private(self): # GIVEN a logged in user with at least one public program self.given_user_is_logged_in() - program = {'code': 'hello world', 'name': 'program 1', 'level': 1, 'shared': False} + program = { + 'code': 'print testing programs creation', + 'name': 'program 1', + 'level': 1, + 'adventure_name': 'default', + 'shared': False + } program_id = self.post_data('programs', program)['id'] self.post_data('programs/share/' + program_id + '/0', {'id': program_id}) @@ -1121,7 +1150,13 @@ def test_valid_make_program_private(self): def test_invalid_delete_program(self): # GIVEN a logged in user with at least one program self.given_user_is_logged_in() - program = {'code': 'hello world', 'name': 'program 1', 'level': 1, 'shared': False} + program = { + 'code': 'print testing programs creation', + 'name': 'program 1', + 'level': 1, + 'adventure_name': 'default', + 'shared': False + } self.post_data('programs', program)['id'] program_id = '123456' @@ -1132,7 +1167,13 @@ def test_invalid_delete_program(self): def test_valid_delete_program(self): # GIVEN a logged in user with at least one program self.given_user_is_logged_in() - program = {'code': 'hello world', 'name': 'program 1', 'level': 1, 'shared': False} + program = { + 'code': 'print testing programs creation', + 'name': 'program 1', + 'level': 1, + 'adventure_name': 'default', + 'shared': False + } program_id = self.post_data('programs', program)['id'] # WHEN deleting a program @@ -1147,7 +1188,13 @@ def test_valid_delete_program(self): def test_destroy_account_with_programs(self): # GIVEN a logged in user with at least one program self.given_user_is_logged_in() - program = {'code': 'hello world', 'name': 'program 1', 'level': 1, 'shared': False} + program = { + 'code': 'print testing programs creation', + 'name': 'program 1', + 'level': 1, + 'adventure_name': 'default', + 'shared': False + } self.post_data('programs', program)['id'] # WHEN deleting the user account @@ -1368,10 +1415,22 @@ def test_see_students_with_programs_in_class(self): self.given_fresh_user_is_logged_in() self.post_data('class/join', {'id': Class['id']}, expect_http_code=200) # GIVEN a student with two programs, one public and one private - public_program = {'code': 'hello world', 'name': 'program 1', 'level': 1, 'shared': False} + public_program = { + 'code': 'print testing programs creation', + 'name': 'program 1', + 'level': 1, + 'adventure_name': 'default', + 'shared': False + } public_program_id = self.post_data('programs', public_program)['id'] self.post_data('programs/share/' + public_program_id + '/False', {'id': public_program_id}) - private_program = {'code': 'hello world', 'name': 'program 2', 'level': 2, 'shared': False} + private_program = { + 'code': 'print testing programs creation', + 'name': 'program 2', + 'level': 2, + 'adventure_name': 'default', + 'shared': False + } self.post_data('programs', private_program)['id'] # GIVEN the aforementioned teacher diff --git a/utils.py b/utils.py index f09cb6f85a8..9d03f37ff7c 100644 --- a/utils.py +++ b/utils.py @@ -88,7 +88,7 @@ def times(): return int(round(time.time())) -DEBUG_MODE = False +DEBUG_MODE = not os.getenv('NO_DEBUG_MODE') def is_debug_mode(): diff --git a/website/database.py b/website/database.py index 40dd8a1b9a8..8342a9859d5 100644 --- a/website/database.py +++ b/website/database.py @@ -1,3 +1,30 @@ +"""Data layer for Hedy. + +This file defines all DynamoDB tables that make up the Hedy data model, +as well as the class 'Database' which can be used to access those tables. + +THE DATABASE CLASS +------------------- + +The Database class should implement logical operations that make sense +for the data model, spanning multiple tables if necessary. As much as you +can, hide the details of how data is stored in and queried from tables so +that the front-end doesn't have to think about those details. + +TYPE ANNOTATIONS +---------------- + +The tables below have type annotations. Type annotations will be used +to validate records that are stored INTO the database; they are not +used to validate records that are retrieved from the database. + +!!! You cannot assume that records retrieved from a table will always have + the fields of the types that are listed in the table definition! + +The record could be older than the validation that was added. Always +program defensively! +""" + import functools import operator import itertools @@ -5,11 +32,13 @@ import sys from os import path -from utils import timems, times +from utils import timems, times, is_debug_mode from . import dynamo, auth from . import querylog +from .dynamo import DictOf, Optional, ListOf, SetOf, RecordOf, EitherOf + is_offline = getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS') if is_offline: # Offline mode. Store data 1 directory upwards from `_internal` @@ -18,29 +47,104 @@ # Production or dev: use environment variables or dev storage storage = dynamo.AwsDynamoStorage.from_env() or dynamo.MemoryStorage("dev_database.json") -USERS = dynamo.Table(storage, "users", "username", indexes=[ - dynamo.Index("email"), - dynamo.Index("epoch", sort_key="created") -]) -TOKENS = dynamo.Table(storage, "tokens", "id", indexes=[ - dynamo.Index('id'), - dynamo.Index('username'), -]) -PROGRAMS = dynamo.Table(storage, "programs", "id", indexes=[ - dynamo.Index('username', sort_key='date', index_name='username-index'), - dynamo.Index('hedy_choice', sort_key='date', index_name='hedy_choice-index'), - # For the explore page, this index has 'level', 'lang' and 'adventure_name' - dynamo.Index('public', sort_key='date'), - - # For the filtered view of the 'explore' page (keys_only so we don't duplicate other attributes unnecessarily) - dynamo.Index('lang', sort_key='date', keys_only=True), - dynamo.Index('level', sort_key='date', keys_only=True), - dynamo.Index('adventure_name', sort_key='date', keys_only=True), -]) -CLASSES = dynamo.Table(storage, "classes", "id", indexes=[ - dynamo.Index('teacher'), - dynamo.Index('link'), -]) + +def only_in_dev(x): + """Return the argument only in debug mode. In production or offline mode, return None. + + This is intended to be used with validation expressions, so that when testing + locally we do validation, but production data that happens to work but doesn't + validate doesn't throw exceptions. + """ + return x if is_debug_mode() else None + + +USERS = dynamo.Table(storage, 'users', 'username', + types=only_in_dev({ + 'username': str, + 'password': str, + 'email': Optional(str), + 'language': Optional(str), + 'keyword_language': Optional(str), + 'created': int, + 'is_teacher': Optional(int), + 'verification_pending': Optional(str), + 'last_login': int, + 'country': Optional(str), + 'birth_year': Optional(int), + 'gender': Optional(str), + 'heard_about': Optional(ListOf(str)), + 'prog_experience': Optional(str), + 'experience_languages': Optional(ListOf(str)), + 'epoch': int, + 'second_teacher_in': Optional(ListOf(str)), + 'classes': Optional(SetOf(str)), + 'teacher': Optional(str), + 'pair_with_teacher': Optional(int), + 'teacher_request': Optional(bool) + }), + indexes=[ + dynamo.Index('email'), + dynamo.Index('epoch', sort_key='created') + ] + ) +TOKENS = dynamo.Table(storage, 'tokens', 'id', + types=only_in_dev({ + 'id': str, + 'username': str, + 'ttl': int, + }), + indexes=[ + dynamo.Index('username'), + ] + ) +PROGRAMS = dynamo.Table(storage, "programs", "id", + types=only_in_dev({ + 'id': str, + 'session': str, + 'username': str, + 'date': int, + 'hedy_choice': Optional(int), + 'public': Optional(int), + 'lang': str, + 'level': int, + 'code': str, + 'adventure_name': str, + 'name': str, + 'username_level': str, + 'error': Optional(bool), + 'is_modified': Optional(bool) + }), + indexes=[ + dynamo.Index('username', sort_key='date', index_name='username-index'), + dynamo.Index('hedy_choice', sort_key='date', index_name='hedy_choice-index'), + # For the explore page, this index has 'level', 'lang' and 'adventure_name' + dynamo.Index('public', sort_key='date'), + + # For the filtered view of the 'explore' page (keys_only so we don't duplicate + # other attributes unnecessarily) + dynamo.Index('lang', sort_key='date', keys_only=True), + dynamo.Index('level', sort_key='date', keys_only=True), + dynamo.Index('adventure_name', sort_key='date', keys_only=True), + ] + ) +CLASSES = dynamo.Table(storage, "classes", "id", + types=only_in_dev({ + 'id': str, + 'teacher': str, + 'link': str, + 'date': int, + 'name': str, + 'second_teachers': Optional(ListOf(RecordOf({ + 'role': str, + 'username': str, + }))), + 'students': Optional(SetOf(str)), + }), + indexes=[ + dynamo.Index('teacher'), + dynamo.Index('link'), + ] + ) # A custom teacher adventure # - id (str): id of the adventure @@ -51,12 +155,42 @@ # - name (str): adventure name # - public (int): 1 or 0 whether it can be shared # - tags_id (str): id of tags that describe this adventure. -ADVENTURES = dynamo.Table(storage, "adventures", "id", indexes=[ - dynamo.Index("creator"), dynamo.Index("public"), - dynamo.Index("name", sort_key="creator", index_name="name-creator-index")]) + + +ADVENTURES = dynamo.Table(storage, "adventures", "id", + types=only_in_dev({ + 'id': str, + 'date': int, + 'creator': str, + 'name': str, + 'classes': Optional(ListOf(str)), + 'level': EitherOf(str, int), # this might be a string or a int + 'levels': ListOf(str), + 'content': str, + 'public': int, + 'language': str, + 'formatted_content': Optional(str) + }), + indexes=[ + dynamo.Index("creator"), + dynamo.Index("public"), + dynamo.Index("name", sort_key="creator", index_name="name-creator-index") + ]) INVITATIONS = dynamo.Table( storage, "invitations", partition_key="username#class_id", - indexes=[dynamo.Index("username"), dynamo.Index("class_id")], + types=only_in_dev({ + 'username': str, + 'class_id': str, + 'timestamp': int, + 'ttl': int, + 'invited_as': str, + 'invited_as_text': str, + 'username#class_id': str + }), + indexes=[ + dynamo.Index("username"), + dynamo.Index("class_id"), + ], ) """ @@ -66,16 +200,49 @@ - tagged_in ([{ id, public, language }]): tagged in which adventures. - popularity (int): # of adventures it's been tagged in. """ -TAGS = dynamo.Table(storage, "tags", "id", indexes=[dynamo.Index("name", sort_key="popularity")]) +TAGS = dynamo.Table(storage, "tags", "id", + types=only_in_dev({ + 'id': str, + 'name': str, + 'popularity': int, + 'tagged_in': Optional(ListOf(RecordOf({ + 'id': str, + 'public': int, + 'language': str + }))) + }), + indexes=[ + dynamo.Index("name", sort_key="popularity") + ] + ) # A survey # - id (str): the identifier of the survey + the response identifier ex. "class_teacher1" or "students_student1" # - responses (str []): the response per question # - skip (str): if the survey should never be shown or today date to be reminded later -SURVEYS = dynamo.Table(storage, "surveys", "id") - -FEEDBACK = dynamo.Table(storage, "teacher_feedback", "id") +SURVEYS = dynamo.Table(storage, "surveys", "id", + types=only_in_dev({ + 'id': str, + 'responses': Optional(DictOf({ + str: RecordOf({ + 'answer': str, + 'question': str + }) + })) + }), + ) + +FEEDBACK = dynamo.Table(storage, "teacher_feedback", "id", + types=only_in_dev({ + 'id': str, + 'username': str, + 'email': str, + 'message': str, + 'category': str, + 'page': str, + 'date': int + })) # Class customizations # @@ -102,12 +269,64 @@ # - teacher_adventures (str[]): a list of all ids of the adventures that have been made # available to this class. This list is deprecated, all adventures a teacher created # are now automatically available to all of their classes. -CUSTOMIZATIONS = dynamo.Table(storage, "class_customizations", partition_key="id") -ACHIEVEMENTS = dynamo.Table(storage, "achievements", partition_key="username") -PUBLIC_PROFILES = dynamo.Table(storage, "public_profiles", partition_key="username") -PARSONS = dynamo.Table(storage, "parsons", "id") -STUDENT_ADVENTURES = dynamo.Table(storage, "student_adventures", "id") -CLASS_ERRORS = dynamo.Table(storage, "class_errors", "id") +CUSTOMIZATIONS = dynamo.Table(storage, "class_customizations", partition_key="id", + types=only_in_dev({ + 'id': str, + 'levels': ListOf(int), + 'opening_dates': DictOf({ + str: str + }), + 'other_settings': ListOf(str), + 'level_thresholds': DictOf({ + str: int + }), + 'sorted_adventures': DictOf({ + str: ListOf(RecordOf({ + 'name': str, + 'from_teacher': bool + })) + }), + 'updated_by': Optional(str), + 'quiz_parsons_tabs_migrated': Optional(int) + })) + +ACHIEVEMENTS = dynamo.Table(storage, "achievements", partition_key="username", + types=only_in_dev({ + 'username': str, + 'achieved': Optional(ListOf(str)), + 'commands': Optional(ListOf(str)), + 'saved_programs': Optional(int), + 'run_programs': Optional(int) + })) +PUBLIC_PROFILES = dynamo.Table(storage, "public_profiles", partition_key="username", + types=only_in_dev({ + 'username': str, + 'image': str, + 'personal_text': str, + 'agree_terms': str, + 'tags': Optional(ListOf(str)) + })) +PARSONS = dynamo.Table(storage, "parsons", "id", + types=only_in_dev({ + 'id': str, + 'username': str, + 'level': str, + 'exercise': str, + 'order': str, + 'correct': str, + 'timestamp': ListOf(int) + }), + ) +STUDENT_ADVENTURES = dynamo.Table(storage, "student_adventures", "id", + types=only_in_dev({ + 'id': str, + 'ticked': bool, + 'program_id': str + }), + ) +CLASS_ERRORS = dynamo.Table(storage, "class_errors", "id", + types=only_in_dev({'id': str}), + ) # We use the epoch field to make an index on the users table, sorted by a different # sort key. In our case, we want to sort by 'created', so that we can make an ordered # list of users. @@ -140,7 +359,12 @@ # 'levelAttempt' is a combination of level and attemptId, to distinguish attempts # by a user. 'level' is padded to 4 characters, then attemptId is added. # -QUIZ_ANSWERS = dynamo.Table(storage, "quizAnswers", partition_key="user", sort_key="levelAttempt") +QUIZ_ANSWERS = dynamo.Table(storage, "quizAnswers", partition_key="user", sort_key="levelAttempt", + types=only_in_dev({ + 'user': str, + 'levelAttempt': str, + }) + ) # Holds information about program runs: success/failure and produced exceptions. Entries are created per user per level # per week and updated in place. Uses a composite partition key 'id#level' and 'week' as a sort key. Structure: @@ -153,11 +377,23 @@ # } # PROGRAM_STATS = dynamo.Table( - storage, "program-stats", partition_key="id#level", sort_key="week", indexes=[dynamo.Index("id", "week")] + storage, "program-stats", partition_key="id#level", sort_key="week", + types=only_in_dev({ + 'id#level': str, + 'id': str, + 'week': str, + }), + indexes=[dynamo.Index("id", "week")] ) QUIZ_STATS = dynamo.Table( - storage, "quiz-stats", partition_key="id#level", sort_key="week", indexes=[dynamo.Index("id", "week")] + storage, "quiz-stats", partition_key="id#level", sort_key="week", + types=only_in_dev({ + 'id#level': str, + 'id': str, + 'week': str, + }), + indexes=[dynamo.Index("id", "week")] ) # Program stats also includes a boolean array indicating the order of successful and non-successful runs. diff --git a/website/dynamo.py b/website/dynamo.py index 971dac905f8..c55f3c19e9d 100644 --- a/website/dynamo.py +++ b/website/dynamo.py @@ -241,14 +241,22 @@ class Table: project the full set of attributes. Indexes can have a partition and their own sort keys. - sort_key: a field that is the sort key for the table. + - Types: a dictionary of field name to type object, to validate fields against. + Does not have to be exhaustive, but it must include all the indexed fields. + You can use: str, list, bool, bytes, int, float, numbers.Number, dict, list, + string_set, number_set, binary_set (last 3 declared in this module). """ - def __init__(self, storage: TableStorage, table_name, partition_key, sort_key=None, indexes=None): + def __init__(self, storage: TableStorage, table_name, partition_key, types=None, sort_key=None, indexes=None): self.key_schema = KeySchema(partition_key, sort_key) self.storage = storage self.table_name = table_name self.indexes = indexes or [] self.indexed_fields = set() + if types is not None: + self.types = Validator.ensure_all(types) + else: + self.types = None all_schemas = [self.key_schema] + [i.key_schema for i in self.indexes] for schema in all_schemas: @@ -259,7 +267,12 @@ def __init__(self, storage: TableStorage, table_name, partition_key, sort_key=No part_names = reverse_index((index.index_name, index.key_schema.partition_key) for index in self.indexes) duped = [names for names in part_names.values() if len(names) > 1] if duped: - raise RuntimeError(f'Table {self.table_name}: indexes with the same partition key: {duped}') + raise ValueError(f'Table {self.table_name}: indexes with the same partition key: {duped}') + + # Check to make sure all indexed fields have a declared type + if self.types: + if undeclared := [f for f in self.indexed_fields if f not in self.types]: + raise ValueError(f'Declare the type of these fields which are used as keys: {", ".join(undeclared)}') @querylog.timed_as("db_get") def get(self, key): @@ -510,6 +523,7 @@ def create(self, data): if not self.key_schema.contains_both_keys(data): raise ValueError(f"Expecting fields {self.key_schema} in create() call, got: {data}") self._validate_indexable_fields(data, False) + self._validate_types(data, full=True) querylog.log_counter(f"db_create:{self.table_name}") self.storage.put(self.table_name, self.key_schema.extract(data), data) @@ -530,6 +544,7 @@ def update(self, key, updates): querylog.log_counter(f"db_update:{self.table_name}") self._validate_indexable_fields(updates, True) self._validate_key(key) + self._validate_types(updates, full=False) updating_keys = set(updates.keys()) & set(self.key_schema.key_names) if updating_keys: @@ -658,6 +673,39 @@ def _validate_indexable_fields(self, data, for_update): ' or an index, so must be of type string, number or binary.' % ({field: value}, self.table_name, field)) + def _validate_types(self, data, full): + """Validate the types in the record according to the 'self.types' map. + + If 'full=True' we will use the declared keys as a basis (making sure + the record is complete). If full=False, we only use the given keys, + making sure the updates are valid. + """ + if self.types is None: + return + + keys_to_validate = self.types.keys() if full else data.keys() + + for field in keys_to_validate: + validator = self.types.get(field) + if not validator: + continue + + value = data.get(field) + if not validate_value_against_validator(value, validator): + raise ValueError(f'In {data}, value of {field} should be {validator} (got {value})') + + +def validate_value_against_validator(value, validator: 'Validator'): + """Validate a value against a validator. + + A validator can be a built-in class representing a type, like 'str' + or 'int'. + """ + if isinstance(value, DynamoUpdate): + return value.validate_against_type(validator) + else: + return validator.is_valid(value) + DDB_SERIALIZER = TypeSerializer() DDB_DESERIALIZER = TypeDeserializer() @@ -1177,6 +1225,9 @@ class DynamoUpdate: def to_dynamo(self): raise NotImplementedError() + def validate_against_type(self, validator): + raise NotImplementedError() + class DynamoIncrement(DynamoUpdate): def __init__(self, delta=1): @@ -1188,6 +1239,12 @@ def to_dynamo(self): "Value": {"N": str(self.delta)}, } + def validate_against_type(self, validator): + return validate_value_against_validator(self.delta, validator) + + def __repr__(self): + return f'Inc({self.delta})' + class DynamoAddToStringSet(DynamoUpdate): """Add one or more elements to a string set.""" @@ -1201,6 +1258,13 @@ def to_dynamo(self): "Value": {"SS": list(self.elements)}, } + def validate_against_type(self, validator): + # The validator should be SetOf(...) + return validate_value_against_validator(set(self.elements), validator) + + def __repr__(self): + return f'Add{self.elements}' + class DynamoAddToNumberSet(DynamoUpdate): """Add one or more elements to a number set.""" @@ -1217,6 +1281,13 @@ def to_dynamo(self): "Value": {"NS": [str(x) for x in self.elements]}, } + def validate_against_type(self, validator): + # The validator should be SetOf(...) + return validate_value_against_validator(set(self.elements), validator) + + def __repr__(self): + return f'Add{self.elements}' + class DynamoAddToList(DynamoUpdate): """Add one or more elements to a list.""" @@ -1224,12 +1295,19 @@ class DynamoAddToList(DynamoUpdate): def __init__(self, *elements): self.elements = elements + def validate_against_type(self, validator): + # The validator should be ListOf(...) + return validate_value_against_validator(list(self.elements), validator) + def to_dynamo(self): return { "Action": "ADD", "Value": {"L": [DDB_SERIALIZER.serialize(x) for x in self.elements]}, } + def __repr__(self): + return f'Add{self.elements}' + class DynamoRemoveFromStringSet(DynamoUpdate): """Remove one or more elements to a string set.""" @@ -1243,6 +1321,13 @@ def to_dynamo(self): "Value": {"SS": list(self.elements)}, } + def validate_against_type(self, validator): + # The validator should be SetOf(...) + return validate_value_against_validator(set(self.elements), validator) + + def __repr__(self): + return f'Remove{self.elements}' + class DynamoCondition: """Base class for Query conditions. @@ -1566,3 +1651,163 @@ def make_predicate(obj): if isinstance(obj, dict): return lambda row: all(row.get(key) == value for key, value in obj.items()) raise ValueError(f'Not a valid client_side_filter: {obj}') + + +class Validator(metaclass=ABCMeta): + """Base class for validators. + + Subclasses should implement 'is_valid' and '__str__'. + """ + + @staticmethod + def ensure(validator): + """Turn the given value into a validator.""" + if validator is True: + return Any() + if isinstance(validator, Validator): + return validator + if type(validator) is type: + return InstanceOf(validator) + if callable(validator): + return Predicate(validator) + raise ValueError(f'Not sure how to treat {validator} as a validator') + + @staticmethod + def ensure_all(validator_dict): + if str in validator_dict.keys() and len(validator_dict.keys()) > 1: + raise ValueError(f'If you specify str as part of the validation, you cant define more type checks.\ + You dindt do that for {validator_dict}') + type_dict = {} + for k, v in validator_dict.items(): + if k is str: + type_dict[Validator.ensure(k)] = Validator.ensure(v) + elif isinstance(k, str): + type_dict[k] = Validator.ensure(v) + else: + raise ValueError(f'Key values should be of type str or instances of string.\ + {k} does not comply with that') + return type_dict + + def is_valid(self, value): + ... + + def __str__(self, value): + ... + + +class Any(Validator): + """Validator which allows any type.""" + + def is_valid(self, value): + return True + + def __str__(self): + return 'any value' + + +class InstanceOf(Validator): + """Validator which checks if a value is an instance of a type.""" + + def __init__(self, type): + self.type = type + + def is_valid(self, value): + return isinstance(value, self.type) + + def __str__(self): + return f'instance of {self.type}' + + +class EitherOf(Validator): + """Validator wihch checks if a value is instance of one of several types""" + + def __init__(self, *types): + self.validators = [Validator.ensure(type) for type in types] + + def is_valid(self, value): + return any(validator.is_valid(value) for validator in self.validators) + + def __str__(self): + return f'matches one of {self.validators}' + + +class Predicate(Validator): + """Validator which calls an arbitrary callback.""" + + def __init__(self, fn): + self.fn = fn + + def is_valid(self, value): + return self.fn(value) + + def __str__(self): + return f'matches {self.fn}' + + +class Optional(Validator): + """Validator which matches either None or an inner validator.""" + + def __init__(self, inner): + self.inner = Validator.ensure(inner) + + def is_valid(self, value): + return value is None or self.inner.is_valid(value) + + def __str__(self): + return f'optional {self.inner}' + + +class SetOf(Validator): + """Validator which matches a set matching inner validators.""" + + def __init__(self, inner): + self.inner = Validator.ensure(inner) + + def is_valid(self, value): + return isinstance(value, set) and all(self.inner.is_valid(x) for x in value) + + def __str__(self): + return f'set of {self.inner}' + + +class ListOf(Validator): + """Validator which matches a list matching inner validators.""" + + def __init__(self, inner): + self.inner = Validator.ensure(inner) + + def is_valid(self, value): + return isinstance(value, list) and all(self.inner.is_valid(x) for x in value) + + def __str__(self): + return f'list of {self.inner}' + + +class RecordOf(Validator): + """Validator which matches a record with inner validators.""" + + def __init__(self, inner): + self.inner = Validator.ensure_all(inner) + + def is_valid(self, value): + return (isinstance(value, dict) + and all(validator.is_valid(value.get(key)) for key, validator in self.inner.items())) + + def __str__(self): + return f'{self.inner}' + + +class DictOf(Validator): + """ Validator wich matches dictionaries of any string to inner validators """ + + def __init__(self, inner): + self.inner = Validator.ensure_all(inner) + + def is_valid(self, value): + if not isinstance(value, dict): + return False + first_type = list(self.inner.keys())[0] + return all(first_type.is_valid(k) and self.inner.get(first_type).is_valid(v) for k, v in value.items()) + + def __str__(self): + return f'list of {self.inner}' diff --git a/website/for_teachers.py b/website/for_teachers.py index 54b37e5b767..c0215dfd09f 100644 --- a/website/for_teachers.py +++ b/website/for_teachers.py @@ -757,6 +757,7 @@ def restore_customizations_to_default(self, user): "level_thresholds": {}, "sorted_adventures": db_adventures, "restored_by": user["username"], + "updated_by": user["username"] } self.db.update_class_customizations(customizations) @@ -1154,7 +1155,7 @@ def update_adventure(self, user): "creator": user["username"], "name": body["name"], "classes": body["classes"], - "level": body["levels"][0], # TODO: this should be removed gradually. + "level": int(body["levels"][0]), # TODO: this should be removed gradually. "levels": body["levels"], "content": body["content"], "public": 1 if body["public"] else 0, @@ -1284,7 +1285,7 @@ def create_adventure(self, user, class_id=None, level=None): "date": utils.timems(), "creator": user["username"], "name": name, - "classes": [class_id], + "classes": [class_id] if class_id is not None else [], "level": int(level), "levels": [level], "content": "",