From 8c27980dfba1f615b507591e264eba414cea82cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herman=20=C3=98ie=20Kolden?= Date: Sat, 29 Jun 2024 06:53:52 +0200 Subject: [PATCH 1/5] core: support data-based direct comparison Previously we couldn't use == to compare ulogs, so we had to do __dict__ comparison manually in tests. We should just add such comparison as default, since two ULog objects that have the same data should be considered equal. --- pyulog/core.py | 13 +++++++++++++ pyulog/db.py | 10 ++++++++++ test/test_db.py | 3 +-- test/test_ulog.py | 38 ++++++++++++++++++++++---------------- 4 files changed, 46 insertions(+), 18 deletions(-) diff --git a/pyulog/core.py b/pyulog/core.py index 6069325..0ad9896 100644 --- a/pyulog/core.py +++ b/pyulog/core.py @@ -482,6 +482,19 @@ def _make_changed_param_items(self): return changed_param_items + def __eq__(self, other): + """ + If the other object has all the same data as we have, we want to + consider them equal, even if the other object has extra fields, because + the user cares about the ULog contents. + """ + if not isinstance(other, ULog): + return NotImplemented + return all( + self_value == getattr(other, field) + for field, self_value in self.__dict__.items() + ) + class Data(object): """ contains the final topic data for a single topic and instance """ diff --git a/pyulog/db.py b/pyulog/db.py index 0c40b34..dc0434b 100644 --- a/pyulog/db.py +++ b/pyulog/db.py @@ -177,6 +177,16 @@ def __init__(self, db_handle, primary_key=None, log_file=None, lazy=True, **kwar if primary_key is not None: self.load(lazy=lazy) + def __eq__(self, other): + """ + If the other object is a normal ULog, then we just want to compare ULog + data, not DatabaseULog specific fields, because we want to compare + theULog file contents. + """ + if type(other) is ULog: # pylint: disable=unidiomatic-typecheck + return other.__eq__(self) + return super().__eq__(other) + @property def primary_key(self): '''The primary key of the ulog, pointing to the correct "ULog" row in the database.''' diff --git a/test/test_db.py b/test/test_db.py index 53c2429..9da0599 100644 --- a/test/test_db.py +++ b/test/test_db.py @@ -54,8 +54,7 @@ def test_parsing(self, test_case): dbulog_saved.save() primary_key = dbulog_saved.primary_key dbulog_loaded = DatabaseULog(self.db_handle, primary_key=primary_key, lazy=False) - for ulog_key, ulog_value in ulog.__dict__.items(): - self.assertEqual(ulog_value, getattr(dbulog_loaded, ulog_key)) + self.assertEqual(ulog, dbulog_loaded) def test_lazy(self): ''' diff --git a/test/test_ulog.py b/test/test_ulog.py index 85f95c3..26d5ecf 100644 --- a/test/test_ulog.py +++ b/test/test_ulog.py @@ -20,6 +20,22 @@ class TestULog(unittest.TestCase): Tests the ULog class ''' + @data('sample') + def test_comparison(self, base_name): + ''' + Test that the custom comparison method works as expected. + ''' + ulog_file_name = os.path.join(TEST_PATH, base_name + '.ulg') + ulog1 = pyulog.ULog(ulog_file_name) + ulog2 = pyulog.ULog(ulog_file_name) + assert ulog1 == ulog2 + assert ulog1 is not ulog2 + + # make them different in arbitrary field + ulog1.data_list[0].data['timestamp'][0] += 1 + assert ulog1 != ulog2 + + @data('sample', 'sample_appended', 'sample_appended_multiple', @@ -36,21 +52,11 @@ def test_write_ulog(self, base_name): original.write_ulog(written_ulog_file_name) copied = pyulog.ULog(written_ulog_file_name) - for original_key, original_value in original.__dict__.items(): - copied_value = getattr(copied, original_key) - if original_key == '_sync_seq_cnt': - # Sync messages are counted on parse, but otherwise dropped, so - # we don't rewrite them - assert copied_value == 0 - elif original_key == '_appended_offsets': - # Abruptly ended messages just before offsets are dropped, so - # we don't rewrite appended offsets - assert copied_value == [] - elif original_key == '_incompat_flags': - # Same reasoning on incompat_flags[0] as for '_appended_offsets' - assert copied_value[0] == original_value[0] & 0xFE # pylint: disable=unsubscriptable-object - assert copied_value[1:] == original_value[1:] # pylint: disable=unsubscriptable-object - else: - assert copied_value == original_value + # Some fields are not copied but dropped, so we cheat by modifying the original + original._sync_seq_cnt = 0 # pylint: disable=protected-access + original._appended_offsets = [] # pylint: disable=protected-access + original._incompat_flags[0] &= 0xFE # pylint: disable=protected-access + + assert copied == original # vim: set et fenc=utf-8 ft=python ff=unix sts=4 sw=4 ts=4 From 71f57a607d83588e1632f0f32d93c65984b2a88d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herman=20=C3=98ie=20Kolden?= Date: Sat, 29 Jun 2024 08:04:49 +0200 Subject: [PATCH 2/5] ci: add test for dbulog writing to file Writing DatabaseULog to file doesn't work, so we add a test which finds errors we can fix. --- test/test_db.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/test/test_db.py b/test/test_db.py index 9da0599..e38ca83 100644 --- a/test/test_db.py +++ b/test/test_db.py @@ -4,6 +4,7 @@ import unittest import os +import tempfile from unittest.mock import patch import numpy as np from ddt import ddt, data @@ -231,3 +232,36 @@ def test_json(self): self.assertEqual(len(db_timestamps), len(values)) np.testing.assert_allclose(db_timestamps, values) cur.close() + + @data('sample', + 'sample_appended', + 'sample_appended_multiple', + 'sample_logging_tagged_and_default_params') + def test_write_ulog(self, base_name): + ''' + Test that the write_ulog method successfully replicates all relevant data. + ''' + with tempfile.TemporaryDirectory() as tmpdirname: + ulog_file_name = os.path.join(TEST_PATH, base_name + '.ulg') + written_ulog_file_name = os.path.join(tmpdirname, base_name + '_copy.ulg') + + dbulog = DatabaseULog(self.db_handle, log_file=ulog_file_name) + dbulog.save() + + lazy_loaded_dbulog = DatabaseULog( + self.db_handle, + primary_key=dbulog.primary_key, + lazy=True + ) + with self.assertRaises(ValueError): + lazy_loaded_dbulog.write_ulog(written_ulog_file_name) + + loaded_dbulog = DatabaseULog(self.db_handle, primary_key=dbulog.primary_key, lazy=False) + loaded_dbulog.write_ulog(written_ulog_file_name) + copied = ULog(written_ulog_file_name) + + # Some fields are not copied but dropped, so we cheat by modifying the original + loaded_dbulog._sync_seq_cnt = 0 # pylint: disable=protected-access + loaded_dbulog._appended_offsets = [] # pylint: disable=protected-access + loaded_dbulog._incompat_flags[0] &= 0xFE # pylint: disable=protected-access + assert copied == loaded_dbulog From b2a398b688d4870710e8ec97d0accdbf286bc118 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herman=20=C3=98ie=20Kolden?= Date: Sat, 29 Jun 2024 08:05:23 +0200 Subject: [PATCH 3/5] db: prevent dbulog writing to file if lazy loaded If DatabaseULog is lazy loaded it doesn't have data, so it will fail if trying to write to file. --- pyulog/db.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pyulog/db.py b/pyulog/db.py index dc0434b..d28acb3 100644 --- a/pyulog/db.py +++ b/pyulog/db.py @@ -170,6 +170,7 @@ def __init__(self, db_handle, primary_key=None, log_file=None, lazy=True, **kwar self._pk = primary_key self._db = db_handle + self._lazy_loaded = lazy if log_file is not None: self._sha256sum = DatabaseULog.calc_sha256sum(log_file) @@ -187,6 +188,11 @@ def __eq__(self, other): return other.__eq__(self) return super().__eq__(other) + def write_ulog(self, path): + if self._lazy_loaded: + raise ValueError('Cannot write after lazy load because it has no datasets.') + super().write_ulog(path) + @property def primary_key(self): '''The primary key of the ulog, pointing to the correct "ULog" row in the database.''' @@ -400,6 +406,7 @@ def load(self, lazy=True): self._changed_parameters.append((timestamp, key, value)) cur.close() + self._lazy_loaded = lazy def get_dataset(self, name, multi_instance=0, lazy=False, db_cursor=None, caching=True): ''' From c6cbd3313d9e12f73a63c4eaf63435e53dfaa23c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herman=20=C3=98ie=20Kolden?= Date: Sat, 29 Jun 2024 08:07:29 +0200 Subject: [PATCH 4/5] db: store ints as ints, not floats DatabaseULog.write_ulog complained about wrong value types during struct packing. --- pyulog/db.py | 2 +- pyulog/sql/pyulog.4.sql | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 pyulog/sql/pyulog.4.sql diff --git a/pyulog/db.py b/pyulog/db.py index d28acb3..6d14905 100644 --- a/pyulog/db.py +++ b/pyulog/db.py @@ -49,7 +49,7 @@ class DatabaseULog(ULog): contsructor will throw an exception. See the documentation of "ulog_migratedb" for more information. ''' - SCHEMA_VERSION = 3 + SCHEMA_VERSION = 4 @staticmethod def get_db_handle(db_path): diff --git a/pyulog/sql/pyulog.4.sql b/pyulog/sql/pyulog.4.sql new file mode 100644 index 0000000..5866070 --- /dev/null +++ b/pyulog/sql/pyulog.4.sql @@ -0,0 +1,37 @@ +BEGIN; +PRAGMA foreign_keys=off; + +-- Change REAL timestamps to INT. SQLITE only supports INT64, but ULog -- changed from REAL +-- timestamps are UINT64. We accept losing 1 bit at the top end, since 2^63 +-- microseconds = 400,000 years. which should be enough. + +ALTER TABLE ULog RENAME COLUMN StartTimestamp TO StartTimestamp_old; +ALTER TABLE ULog ADD COLUMN StartTimestamp INT; +UPDATE ULog SET StartTimestamp = CAST(StartTimestamp_old AS INT); + +ALTER TABLE ULog RENAME COLUMN LastTimestamp TO LastTimestamp_old; +ALTER TABLE ULog ADD COLUMN LastTimestamp INT; +UPDATE ULog SET LastTimestamp = CAST(LastTimestamp_old AS INT); + +ALTER TABLE ULogMessageDropout RENAME COLUMN Timestamp TO Timestamp_old; +ALTER TABLE ULogMessageDropout ADD COLUMN Timestamp INT; +UPDATE ULogMessageDropout SET Timestamp = CAST(Timestamp_old AS INT); + +ALTER TABLE ULogMessageDropout RENAME COLUMN Duration TO Duration_old; +ALTER TABLE ULogMessageDropout ADD COLUMN Duration INT; +UPDATE ULogMessageDropout SET Duration = CAST(Duration_old AS INT); + +ALTER TABLE ULogMessageLogging RENAME COLUMN Timestamp TO Timestamp_old; +ALTER TABLE ULogMessageLogging ADD COLUMN Timestamp INT; +UPDATE ULogMessageLogging SET Timestamp = CAST(Timestamp_old AS INT); + +ALTER TABLE ULogMessageLoggingTagged RENAME COLUMN Timestamp TO Timestamp_old; +ALTER TABLE ULogMessageLoggingTagged ADD COLUMN Timestamp INT; +UPDATE ULogMessageLoggingTagged SET Timestamp = CAST(Timestamp_old AS INT); + +ALTER TABLE ULogChangedParameter RENAME COLUMN Timestamp TO Timestamp_old; +ALTER TABLE ULogChangedParameter ADD COLUMN Timestamp INT; +UPDATE ULogChangedParameter SET Timestamp = CAST(Timestamp_old AS INT); + +PRAGMA foreign_keys=on; +COMMIT; From 3d506dc8152070ec6b4adeaf9a03fc8636ad9c5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herman=20=C3=98ie=20Kolden?= Date: Sun, 30 Jun 2024 15:17:30 +0200 Subject: [PATCH 5/5] db: add back indices There were lost in pyulog.2.sql, so the DB is very slow for large databases. NB: Applying this migration will take a while for large databases. --- pyulog/db.py | 2 +- pyulog/sql/pyulog.5.sql | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 pyulog/sql/pyulog.5.sql diff --git a/pyulog/db.py b/pyulog/db.py index 6d14905..141becd 100644 --- a/pyulog/db.py +++ b/pyulog/db.py @@ -49,7 +49,7 @@ class DatabaseULog(ULog): contsructor will throw an exception. See the documentation of "ulog_migratedb" for more information. ''' - SCHEMA_VERSION = 4 + SCHEMA_VERSION = 5 @staticmethod def get_db_handle(db_path): diff --git a/pyulog/sql/pyulog.5.sql b/pyulog/sql/pyulog.5.sql new file mode 100644 index 0000000..42ca5bc --- /dev/null +++ b/pyulog/sql/pyulog.5.sql @@ -0,0 +1,17 @@ +BEGIN; +CREATE INDEX IF NOT EXISTS btree_ULogAppendedOffsets_ULogId ON ULogAppendedOffsets(ULogId); +CREATE INDEX IF NOT EXISTS btree_ULogDataset_ULogId ON ULogDataset(ULogId); +CREATE INDEX IF NOT EXISTS btree_ULogField_DatasetId ON ULogField(DatasetId); +CREATE INDEX IF NOT EXISTS btree_ULogMessageDropout_ULogId ON ULogMessageDropout(ULogId); +CREATE INDEX IF NOT EXISTS btree_ULogMessageFormat_ULogId ON ULogMessageFormat(ULogId); +CREATE INDEX IF NOT EXISTS btree_ULogMessageFormatField_MessageId ON ULogMessageFormatField(MessageId); +CREATE INDEX IF NOT EXISTS btree_ULogMessageLogging_ULogId ON ULogMessageLogging(ULogId); +CREATE INDEX IF NOT EXISTS btree_ULogMessageLoggingTagged_ULogId ON ULogMessageLoggingTagged(ULogId); +CREATE INDEX IF NOT EXISTS btree_ULogMessageInfo_ULogId ON ULogMessageInfo(ULogId); +CREATE INDEX IF NOT EXISTS btree_ULogMessageInfoMultiple_ULogId ON ULogMessageInfoMultiple(ULogId); +CREATE INDEX IF NOT EXISTS btree_ULogMessageInfoMultipleList_MessageId ON ULogMessageInfoMultipleList(MessageId); +CREATE INDEX IF NOT EXISTS btree_ULogMessageInfoMultipleListElement_ListId ON ULogMessageInfoMultipleListElement(ListId); +CREATE INDEX IF NOT EXISTS btree_ULogInitialParameter_ULogId ON ULogInitialParameter(ULogId); +CREATE INDEX IF NOT EXISTS btree_ULogChangedParameter_ULogId ON ULogChangedParameter(ULogId); +CREATE INDEX IF NOT EXISTS btree_ULogDefaultParameter_ULogId ON ULogDefaultParameter(ULogId); +COMMIT;