diff --git a/doc/source/admin/container_resolvers.rst b/doc/source/admin/container_resolvers.rst index 918088f7e026..153c5053c605 100644 --- a/doc/source/admin/container_resolvers.rst +++ b/doc/source/admin/container_resolvers.rst @@ -379,7 +379,7 @@ Setting up Galaxy using docker / singularity on distributed compute resources (in particular in real user setups) requires careful planning. Other considerations -==================== +-------------------- Tools frequently use ``$TMP``, ``$TEMP``, or ``$TMPDIR`` (or simply use hardcoded ``/tmp``) for storing temporary data. In containerized environments ``/tmp`` diff --git a/doc/source/admin/useful_scripts.rst b/doc/source/admin/useful_scripts.rst index 933c846aae2b..9fe85c291008 100644 --- a/doc/source/admin/useful_scripts.rst +++ b/doc/source/admin/useful_scripts.rst @@ -17,3 +17,22 @@ This script was developed to be as general as possible, allowing you to pipe the Find has an extremely expressive command line for selecting specific files that are of interest to you. These will then be recursively uploaded into Galaxy, maintaining the folder hierarchy, a useful feature when moving legacy data into Galaxy. For a complete description of the options of this script, you can run ``python $GALAXY_ROOT/scripts/api/library_upload_dir.py --help`` This tool will not overwrite or re-upload already uploaded datasets. As a result, one can imagine running this on a cron job to keep an "incoming sequencing data" directory synced with a data library. + +Deleting unused histories +------------------------- + +Galaxy accommodates anonymous usage by creating a default history. Often, such histories will remain unused, as a result of which the database may contain a considerable number of anonymous histories along with associated records, which serve no purpose. Deleting such records will declutter the database and free up space. However, given that a row in the history table may be referenced from multiple other tables, manually deleting such data may leave the database in an inconsistent state. Furthermore, whereas some types of data associated with such histories are clearly obsolete and can be safely deleted, others may require preservation for a variety of reasons. + +To safely delete unused histories and their associated records, please use the `prune_history_table` script. Due to the potentially very large size of some of the tables in the database, the script deletes records in batches. The default size is 1000, which means the script will delete up to 1000 histories, plus any associated records in a single batch. The size of the batch is configurable. By default, an anonymous history should be at least a month old to be considered unused. This value is configurable as well. + +.. code-block:: console + + $ python $GALAXY_ROOT/lib/galaxy/model/scripts/prune_history_table.py + usage: prune_history_table.py [-h] [--batch BATCH] [--created CREATED] + + Remove unused histories from database. A history is considered unused if it doesn't have a user and its hid counter has not been incremented. + + optional arguments: + -h, --help show this help message and exit + --batch BATCH batch size + --created CREATED most recent created date/time in ISO format (for example, March 11, 1952 is represented as '1952-03-11') diff --git a/lib/galaxy/model/scripts/__init__.py b/lib/galaxy/model/scripts/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/lib/galaxy/model/scripts/history_table_pruner.py b/lib/galaxy/model/scripts/history_table_pruner.py new file mode 100644 index 000000000000..c23813cbd7ac --- /dev/null +++ b/lib/galaxy/model/scripts/history_table_pruner.py @@ -0,0 +1,214 @@ +import datetime +import logging +import math + +from sqlalchemy import text + +TMP_TABLE = "tmp_unused_history" + +ASSOC_TABLES = ( + "event", + "history_audit", + "history_tag_association", + "history_annotation_association", + "history_rating_association", + "history_user_share_association", + "default_history_permissions", + "data_manager_history_association", + "cleanup_event_history_association", + "galaxy_session_to_history", +) + +EXCLUDED_ASSOC_TABLES = ( + "job_import_history_archive", + "job_export_history_archive", + "workflow_invocation", + "history_dataset_collection_association", + "job", + "history_dataset_association", +) + +DEFAULT_BATCH_SIZE = 1000 + +logging.basicConfig() +log = logging.getLogger(__name__) + + +class HistoryTablePruner: + """Removes unused histories (user is null, hid == 1).""" + + def __init__(self, engine, batch_size=None, max_create_time=None): + self.engine = engine + self.batch_size = batch_size or DEFAULT_BATCH_SIZE + self.max_create_time = max_create_time or self._get_default_max_create_time() + self.min_id, self.max_id = self._get_min_max_ids() + self.batches = self._get_batch_count() + + def run(self): + """ + Due to the very large size of some tables, we run operations in batches, using low/high history id as boundaries. + """ + + def get_high(low): + return min(self.max_id + 1, low + self.batch_size) + + if self.min_id is None: + log.info("No histories exist") + return + + log.info( + f"Total batches to run: {self.batches}, minimum history id: {self.min_id}, maximum history id: {self.max_id}" + ) + + low = self.min_id + high = get_high(low) + batch_counter = 1 + while low <= self.max_id: + log.info(f"Running batch {batch_counter} of {self.batches}: history id range {low}-{high-1}:") + self._run_batch(low, high) + low = high + high = get_high(high) + batch_counter += 1 + + def _get_default_max_create_time(self): + """By default, do not delete histories created less than a month ago.""" + today = datetime.date.today() + return today.replace(month=today.month - 1) + + def _run_batch(self, low, high): + empty_batch_msg = f" No histories to delete in id range {low}-{high-1}" + self._mark_histories_as_deleted_and_purged(low, high) + histories = self._get_histories(low, high) + if not histories: + log.info(empty_batch_msg) + return + + exclude = self._get_histories_to_exclude(low, high) + + # Calculate set of histories to delete. + to_delete = set(histories) - exclude + if not to_delete: + log.info(empty_batch_msg) + return + + self._create_tmp_table() + try: + self._populate_tmp_table(to_delete) + self._delete_associations() + self._set_references_to_null() + self._delete_histories(low, high) + except Exception as e: + raise e + finally: + self._drop_tmp_table() + + def _get_min_max_ids(self): + stmt = text( + "SELECT min(id), max(id) FROM history WHERE user_id IS NULL AND hid_counter = 1 AND create_time < :create_time" + ) + params = {"create_time": self.max_create_time} + with self.engine.begin() as conn: + minmax = conn.execute(stmt, params).all() + # breakpoint() + return minmax[0][0], minmax[0][1] + + def _get_batch_count(self): + """Calculate number of batches to run.""" + return math.ceil((self.max_id - self.min_id) / self.batch_size) + + def _mark_histories_as_deleted_and_purged(self, low, high): + """Mark target histories as deleted and purged to prevent their further usage.""" + log.info(" Marking histories as deleted and purged") + stmt = text( + """ + UPDATE history + SET deleted = TRUE, purged = TRUE + WHERE user_id IS NULL AND hid_counter = 1 AND create_time < :create_time AND id >= :low AND id < :high + """ + ) + params = self._get_stmt_params(low, high) + with self.engine.begin() as conn: + return conn.execute(stmt, params) + + def _get_histories(self, low, high): + """Return ids of histories to delete.""" + log.info(" Collecting history ids") + stmt = text( + "SELECT id FROM history WHERE user_id IS NULL AND hid_counter = 1 AND create_time < :create_time AND id >= :low AND id < :high" + ) + params = self._get_stmt_params(low, high) + with self.engine.begin() as conn: + return conn.scalars(stmt, params).all() + + def _get_histories_to_exclude(self, low, high): + """Retrieve histories that should NOT be deleted due to existence of associated records that should be preserved.""" + log.info(f" Collecting ids of histories to exclude based on {len(EXCLUDED_ASSOC_TABLES)} associated tables:") + statements = [] + for table in EXCLUDED_ASSOC_TABLES: + statements.append((table, text(f"SELECT history_id FROM {table} WHERE history_id >= :low AND id < :high"))) + + params = self._get_stmt_params(low, high) + ids = [] + for table, stmt in statements: + with self.engine.begin() as conn: + log.info(f" Collecting history_id from {table}") + ids += conn.scalars(stmt, params).all() + + excluded = set(ids) + if None in excluded: + excluded.remove(None) + return excluded + + def _create_tmp_table(self): + """Create temporary table to hold history ids.""" + stmt = text(f"CREATE TEMPORARY TABLE {TMP_TABLE} (id INT PRIMARY KEY)") + with self.engine.begin() as conn: + conn.execute(stmt) + + def _drop_tmp_table(self): + stmt = text(f"CREATE TEMPORARY TABLE {TMP_TABLE} (id INT PRIMARY KEY)") + stmt = text(f"DROP TABLE {TMP_TABLE}") + with self.engine.begin() as conn: + conn.execute(stmt) + + def _populate_tmp_table(self, to_delete): + """Load ids of histories to delete into temporary table.""" + assert to_delete + log.info(" Populating temporary table") + sql_values = ",".join([f"({id})" for id in to_delete]) + stmt = text(f"INSERT INTO {TMP_TABLE} VALUES {sql_values}") + with self.engine.begin() as conn: + conn.execute(stmt) + + def _delete_associations(self): + """Delete records associated with histories to be deleted.""" + + for table in ASSOC_TABLES: + log.info(f" Deleting associated records from {table}") + stmt = text(f"DELETE FROM {table} WHERE history_id IN (SELECT id FROM {TMP_TABLE})") + with self.engine.begin() as conn: + conn.execute(stmt) + + def _set_references_to_null(self): + """Set history_id to null in galaxy_session table for records referring to histories to be deleted.""" + log.info(" Set history_id to null in galaxy_session") + stmt = text( + f"UPDATE galaxy_session SET current_history_id = NULL WHERE current_history_id IN (SELECT id FROM {TMP_TABLE})" + ) + with self.engine.begin() as conn: + conn.execute(stmt) + + def _delete_histories(self, low, high): + """Last step: delete histories that are safe to delete.""" + log.info(f" Deleting histories in id range {low}-{high-1}") + stmt = text(f"DELETE FROM history WHERE id IN (SELECT id FROM {TMP_TABLE})") + with self.engine.begin() as conn: + conn.execute(stmt) + + def _get_stmt_params(self, low, high): + params = { + "create_time": self.max_create_time, + "low": low, + "high": high, + } + return params diff --git a/lib/galaxy/model/scripts/prune_history_table.py b/lib/galaxy/model/scripts/prune_history_table.py new file mode 100644 index 000000000000..d4c976b050d7 --- /dev/null +++ b/lib/galaxy/model/scripts/prune_history_table.py @@ -0,0 +1,41 @@ +import argparse +import datetime +import os +import sys + +from sqlalchemy import create_engine + +sys.path.insert( + 1, os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir, os.pardir, "lib")) +) + +from galaxy.model.orm.scripts import get_config +from galaxy.model.scripts.history_table_pruner import HistoryTablePruner + +DESCRIPTION = """Remove unused histories from database. + +A history is considered unused if it doesn't have a user and its hid counter has not been incremented. +""" + + +def main(): + args = _get_parser().parse_args() + config = get_config(sys.argv, use_argparse=False, cwd=os.getcwd()) + engine = create_engine(config["db_url"]) + htp = HistoryTablePruner(engine=engine, batch_size=args.batch, max_create_time=args.created) + htp.run() + + +def _get_parser(): + parser = argparse.ArgumentParser(description=DESCRIPTION) + parser.add_argument("--batch", type=int, help="batch size") + parser.add_argument( + "--created", + type=datetime.datetime.fromisoformat, + help="most recent created date/time in ISO format (for example, March 11, 1952 is represented as '1952-03-11')", + ) + return parser + + +if __name__ == "__main__": + main() diff --git a/lib/galaxy/model/unittest_utils/gxy_model_fixtures.py b/lib/galaxy/model/unittest_utils/gxy_model_fixtures.py deleted file mode 100644 index d3ece2b7ecdb..000000000000 --- a/lib/galaxy/model/unittest_utils/gxy_model_fixtures.py +++ /dev/null @@ -1,1000 +0,0 @@ -import pytest - -from galaxy import model -from galaxy.model.orm.util import add_object_to_object_session -from galaxy.model.unittest_utils.mapping_testing_utils import get_unique_value -from galaxy.model.unittest_utils.model_testing_utils import ( - dbcleanup_wrapper, - initialize_model, -) - - -@pytest.fixture(scope="module") -def init_model(engine): - """Create model objects in the engine's database.""" - # Must use the same engine as the session fixture used by this module. - initialize_model(model.mapper_registry, engine) - - -# Fixtures yielding persisted instances of models, deleted from the database on test exit. - - -@pytest.fixture -def api_keys(session): - instance = model.APIKeys(key=get_unique_value()) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def cleanup_event(session): - instance = model.CleanupEvent() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def cloud_authz(session, user, user_authnz_token): - instance = model.CloudAuthz(user.id, "a", "b", user_authnz_token.id, "c") - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def custos_authnz_token(session, user): - instance = model.CustosAuthnzToken() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def data_manager_history_association(session): - instance = model.DataManagerHistoryAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def data_manager_job_association(session): - instance = model.DataManagerJobAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def dataset(session): - instance = model.Dataset() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def dataset_collection(session): - instance = model.DatasetCollection(collection_type="a") - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def dataset_collection_element(session, dataset_collection, history_dataset_association): - instance = model.DatasetCollectionElement(collection=dataset_collection, element=history_dataset_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def dataset_hash(session): - instance = model.DatasetHash() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def dataset_permission(session, dataset): - instance = model.DatasetPermissions("a", dataset) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def dataset_source(session): - instance = model.DatasetSource() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def dataset_source_hash(session): - instance = model.DatasetSourceHash() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def default_history_permissions(session, history, role): - instance = model.DefaultHistoryPermissions(history, "a", role) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def default_quota_association(session, quota): - type_ = model.DefaultQuotaAssociation.types.REGISTERED - instance = model.DefaultQuotaAssociation(type_, quota) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def default_user_permissions(session, user, role): - instance = model.DefaultUserPermissions(user, None, role) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def dynamic_tool(session): - instance = model.DynamicTool() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def extended_metadata(session): - instance = model.ExtendedMetadata(None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def extended_metadata_index(session, extended_metadata): - instance = model.ExtendedMetadataIndex(extended_metadata, None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def form_definition(session, form_definition_current): - instance = model.FormDefinition(name="a", form_definition_current=form_definition_current) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def form_definition_current(session): - instance = model.FormDefinitionCurrent() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def form_values(session): - instance = model.FormValues() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def galaxy_session(session): - instance = model.GalaxySession(session_key=get_unique_value()) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def galaxy_session_history_association(session, galaxy_session, history): - instance = model.GalaxySessionToHistoryAssociation(galaxy_session, history) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def group(session): - instance = model.Group(name=get_unique_value()) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def group_quota_association(session): - instance = model.GroupQuotaAssociation(None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def group_role_association(session): - instance = model.GroupRoleAssociation(None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history(session): - instance = model.History() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_annotation_association(session): - instance = model.HistoryAnnotationAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_dataset_association(session, dataset): - instance = model.HistoryDatasetAssociation(dataset=dataset) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_dataset_association_annotation_association(session): - instance = model.HistoryDatasetAssociationAnnotationAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_dataset_association_rating_association(session): - instance = model.HistoryDatasetAssociationRatingAssociation(None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_dataset_association_tag_association(session): - instance = model.HistoryDatasetAssociationTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_dataset_collection_annotation_association(session): - instance = model.HistoryDatasetCollectionAssociationAnnotationAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_dataset_collection_association(session): - instance = model.HistoryDatasetCollectionAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_dataset_collection_rating_association( - session, - user, - history_dataset_collection_association, -): - instance = model.HistoryDatasetCollectionRatingAssociation(user, history_dataset_collection_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_dataset_collection_tag_association(session): - instance = model.HistoryDatasetCollectionTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_rating_association(session, user, history): - instance = model.HistoryRatingAssociation(user, history) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_tag_association(session): - instance = model.HistoryTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_user_share_association(session): - instance = model.HistoryUserShareAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def implicit_collection_jobs(session): - instance = model.ImplicitCollectionJobs(populated_state="new") - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def implicit_collection_jobs_job_association(session): - instance = model.ImplicitCollectionJobsJobAssociation() - instance.order_index = 1 - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def implicitly_converted_dataset_association(session, history_dataset_association): - instance = model.ImplicitlyConvertedDatasetAssociation( - dataset=history_dataset_association, - parent=history_dataset_association, # using the same dataset; should work here. - ) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def implicitly_created_dataset_collection_input(session, history_dataset_collection_association): - instance = model.ImplicitlyCreatedDatasetCollectionInput(None, history_dataset_collection_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def interactive_tool_entry_point(session): - instance = model.InteractiveToolEntryPoint() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job(session): - instance = model.Job() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_container_association(session): - instance = model.JobContainerAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_export_history_archive(session): - instance = model.JobExportHistoryArchive() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_external_output_metadata(session, job, history_dataset_association): - instance = model.JobExternalOutputMetadata(job, history_dataset_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_metric_numeric(session): - instance = model.JobMetricNumeric(None, None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_metric_text(session): - instance = model.JobMetricText(None, None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_parameter(session): - instance = model.JobParameter(None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_state_history(session, job): - instance = model.JobStateHistory(job) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_to_implicit_output_dataset_collection_association(session, dataset_collection): - instance = model.JobToImplicitOutputDatasetCollectionAssociation(None, dataset_collection) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_to_input_dataset_association(session, history_dataset_association): - instance = model.JobToInputDatasetAssociation(None, history_dataset_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_to_input_dataset_collection_association(session, history_dataset_collection_association): - instance = model.JobToInputDatasetCollectionAssociation(None, history_dataset_collection_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_to_input_dataset_collection_element_association(session, dataset_collection_element): - instance = model.JobToInputDatasetCollectionElementAssociation(None, dataset_collection_element) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_to_input_library_dataset_association(session, library_dataset_dataset_association): - instance = model.JobToInputLibraryDatasetAssociation(None, library_dataset_dataset_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_to_output_dataset_association(session, history_dataset_association): - instance = model.JobToOutputDatasetAssociation(None, history_dataset_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_to_output_dataset_collection_association(session, history_dataset_collection_association): - instance = model.JobToOutputDatasetCollectionAssociation(None, history_dataset_collection_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_to_output_library_dataset_association(session, library_dataset_dataset_association): - instance = model.JobToOutputLibraryDatasetAssociation(None, library_dataset_dataset_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library(session): - instance = model.Library() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_dataset(session, library_dataset_dataset_association): - instance = model.LibraryDataset(library_dataset_dataset_association=library_dataset_dataset_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_dataset_collection_annotation_association(session): - instance = model.LibraryDatasetCollectionAnnotationAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_dataset_collection_association(session): - instance = model.LibraryDatasetCollectionAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_dataset_collection_rating_association(session): - instance = model.LibraryDatasetCollectionRatingAssociation(None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_dataset_collection_tag_association(session): - instance = model.LibraryDatasetCollectionTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_dataset_dataset_association(session): - instance = model.LibraryDatasetDatasetAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_dataset_dataset_association_permission(session, library_dataset_dataset_association, role): - instance = model.LibraryDatasetDatasetAssociationPermissions("a", library_dataset_dataset_association, role) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_dataset_dataset_association_tag_association(session): - instance = model.LibraryDatasetDatasetAssociationTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_dataset_permission(session, library_dataset, role): - instance = model.LibraryDatasetPermissions("a", library_dataset, role) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_folder(session): - instance = model.LibraryFolder() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_folder_permission(session, library_folder, role): - instance = model.LibraryFolderPermissions("a", library_folder, role) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_permission(session, library, role): - instance = model.LibraryPermissions("a", library, role) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def metadata_file(session): - instance = model.MetadataFile() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def page(session, user): - instance = model.Page() - instance.user = user - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def page_annotation_association(session): - instance = model.PageAnnotationAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def page_rating_association(session): - instance = model.PageRatingAssociation(None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def page_revision(session, page): - instance = model.PageRevision() - instance.page = page - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def page_tag_association(session): - instance = model.PageTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def page_user_share_association(session): - instance = model.PageUserShareAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def post_job_action(session): - instance = model.PostJobAction("a") - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def post_job_action_association(session, post_job_action, job): - instance = model.PostJobActionAssociation(post_job_action, job) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def quota(session): - instance = model.Quota(get_unique_value(), "b") - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def role(session): - instance = model.Role(name=get_unique_value()) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def stored_workflow(session, user): - instance = model.StoredWorkflow() - add_object_to_object_session(instance, user) - instance.user = user - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def stored_workflow_annotation_association(session): - instance = model.StoredWorkflowAnnotationAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def stored_workflow_rating_association(session): - instance = model.StoredWorkflowRatingAssociation(None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def stored_workflow_tag_association(session): - instance = model.StoredWorkflowTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def stored_workflow_user_share_association(session): - instance = model.StoredWorkflowUserShareAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def tag(session): - instance = model.Tag() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def task(session, job): - instance = model.Task(job, "a", "b") - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def task_metric_numeric(session): - instance = model.TaskMetricNumeric("a", "b", 9) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def task_metric_text(session): - instance = model.TaskMetricText("a", "b", "c") - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def tool_tag_association(session): - instance = model.ToolTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def user(session): - instance = model.User(email=get_unique_value(), password="password") - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def user_address(session): - instance = model.UserAddress() - instance.name = "a" - instance.address = "b" - instance.city = "c" - instance.state = "d" - instance.postal_code = "e" - instance.country = "f" - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def user_authnz_token(session, user): - instance = model.UserAuthnzToken("a", "b", "c", 1, "d", user) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def user_group_association(session): - instance = model.UserGroupAssociation(None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def user_preference(session): - instance = model.UserPreference() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def user_quota_association(session): - instance = model.UserQuotaAssociation(None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def user_role_association(session, user, role): - instance = model.UserRoleAssociation(user, role) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def visualization(session, user): - instance = model.Visualization() - instance.user = user - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def visualization_annotation_association(session): - instance = model.VisualizationAnnotationAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def visualization_rating_association(session, user, visualization): - instance = model.VisualizationRatingAssociation(user, visualization) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def visualization_revision(session, visualization): - instance = model.VisualizationRevision(visualization_id=visualization.id) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def visualization_tag_association(session): - instance = model.VisualizationTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def visualization_user_share_association(session): - instance = model.VisualizationUserShareAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow(session): - instance = model.Workflow() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_invocation(session, workflow): - instance = model.WorkflowInvocation() - instance.workflow = workflow - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_invocation_output_dataset_association(session): - instance = model.WorkflowInvocationOutputDatasetAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_invocation_output_dataset_collection_association(session): - instance = model.WorkflowInvocationOutputDatasetCollectionAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_invocation_output_value(session): - instance = model.WorkflowInvocationOutputValue() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_invocation_step(session, workflow_invocation, workflow_step): - instance = model.WorkflowInvocationStep() - instance.workflow_invocation = workflow_invocation - instance.workflow_step = workflow_step - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_invocation_step_output_dataset_association(session): - instance = model.WorkflowInvocationStepOutputDatasetAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_invocation_step_output_dataset_collection_association(session): - instance = model.WorkflowInvocationStepOutputDatasetCollectionAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_output(session, workflow_step): - instance = model.WorkflowOutput(workflow_step) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_request_input_parameter(session): - instance = model.WorkflowRequestInputParameter() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_request_input_step_parameter(session): - instance = model.WorkflowRequestInputStepParameter() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_request_step_state(session): - instance = model.WorkflowRequestStepState() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_request_to_input_dataset_association(session): - instance = model.WorkflowRequestToInputDatasetAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_request_to_input_dataset_collection_association(session): - instance = model.WorkflowRequestToInputDatasetCollectionAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_step(session, workflow): - instance = model.WorkflowStep() - add_object_to_object_session(instance, workflow) - instance.workflow = workflow - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_step_annotation_association(session): - instance = model.WorkflowStepAnnotationAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_step_connection(session): - instance = model.WorkflowStepConnection() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_step_input(session, workflow_step): - instance = model.WorkflowStepInput(workflow_step) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_step_tag_association(session): - instance = model.WorkflowStepTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -# Fixtures yielding factory functions. -# In some tests we may need more than one instance of the same model. We cannot reuse a model -# fixture, and we cannot pass multiple copies of the same fixture to one test. We have to -# instantiate a new instance of the model inside the test. However, a test should only know -# how to construct the model it is testing, so instead of constructing an object directly, -# a test calls a factory function, passed to it as a fixture. - - -@pytest.fixture -def dataset_collection_factory(): - def make_instance(*args, **kwds): - if "collection_type" not in kwds: - kwds["collection_type"] = "a" - return model.DatasetCollection(*args, **kwds) - - return make_instance - - -@pytest.fixture -def history_dataset_association_factory(): - def make_instance(*args, **kwds): - return model.HistoryDatasetAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def history_dataset_collection_association_factory(): - def make_instance(*args, **kwds): - return model.HistoryDatasetCollectionAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def history_factory(): - def make_instance(**kwds): - instance = model.History() - if "deleted" in kwds: - instance.deleted = kwds["deleted"] - return instance - - return make_instance - - -@pytest.fixture -def history_rating_association_factory(): - def make_instance(*args, **kwds): - return model.HistoryRatingAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def implicitly_converted_dataset_association_factory(history_dataset_association): - def make_instance(*args, **kwds): - instance = model.ImplicitlyConvertedDatasetAssociation( - dataset=history_dataset_association, - parent=history_dataset_association, # using the same dataset; should work here. - ) - return instance - - return make_instance - - -@pytest.fixture -def library_dataset_dataset_association_factory(): - def make_instance(*args, **kwds): - return model.LibraryDatasetDatasetAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def library_folder_factory(): - def make_instance(*args, **kwds): - return model.LibraryFolder(*args, **kwds) - - return make_instance - - -@pytest.fixture -def page_rating_association_factory(): - def make_instance(*args, **kwds): - return model.PageRatingAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def role_factory(): - def make_instance(*args, **kwds): - return model.Role(*args, **kwds) - - return make_instance - - -@pytest.fixture -def stored_workflow_menu_entry_factory(): - def make_instance(*args, **kwds): - return model.StoredWorkflowMenuEntry(*args, **kwds) - - return make_instance - - -@pytest.fixture -def stored_workflow_rating_association_factory(): - def make_instance(*args, **kwds): - return model.StoredWorkflowRatingAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def stored_workflow_tag_association_factory(): - def make_instance(*args, **kwds): - return model.StoredWorkflowTagAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def user_role_association_factory(): - def make_instance(*args, **kwds): - return model.UserRoleAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def visualization_rating_association_factory(): - def make_instance(*args, **kwds): - return model.VisualizationRatingAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def visualization_revision_factory(visualization): - def make_instance(*args, **kwds): - if "visualization_id" not in kwds: - kwds["visualization_id"] = visualization.id - return model.VisualizationRevision(*args, **kwds) - - return make_instance - - -@pytest.fixture -def workflow_factory(): - def make_instance(*args, **kwds): - return model.Workflow(*args, **kwds) - - return make_instance - - -@pytest.fixture -def workflow_invocation_factory(workflow): - def make_instance(**kwds): - instance = model.WorkflowInvocation() - instance.workflow = kwds.get("workflow", workflow) - return instance - - return make_instance - - -@pytest.fixture -def workflow_invocation_to_subworkflow_invocation_association_factory(): - def make_instance(*args, **kwds): - return model.WorkflowInvocationToSubworkflowInvocationAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def workflow_step_connection_factory(): - def make_instance(*args, **kwds): - return model.WorkflowStepConnection(*args, **kwds) - - return make_instance - - -@pytest.fixture -def workflow_step_factory(workflow): - def make_instance(*args, **kwds): - instance = model.WorkflowStep() - workflow2 = kwds.get("workflow", workflow) # rename workflow not to confuse pytest - add_object_to_object_session(instance, workflow2) - instance.workflow = workflow2 - instance.subworkflow = kwds.get("subworkflow") - return instance - - return make_instance diff --git a/lib/galaxy/model/unittest_utils/mapping_testing_utils.py b/lib/galaxy/model/unittest_utils/mapping_testing_utils.py deleted file mode 100644 index 76b096820efb..000000000000 --- a/lib/galaxy/model/unittest_utils/mapping_testing_utils.py +++ /dev/null @@ -1,66 +0,0 @@ -from abc import ( - ABC, - abstractmethod, -) -from uuid import uuid4 - -import pytest -from sqlalchemy import UniqueConstraint - - -class AbstractBaseTest(ABC): - @pytest.fixture - def cls_(self): - """ - Return class under test. - Assumptions: if the class under test is Foo, then the class grouping - the tests should be a subclass of BaseTest, named TestFoo. - """ - prefix = len("Test") - class_name = self.__class__.__name__[prefix:] - return getattr(self.get_model(), class_name) - - @abstractmethod - def get_model(self): - pass - - -def has_unique_constraint(table, fields): - for constraint in table.constraints: - if isinstance(constraint, UniqueConstraint): - col_names = {c.name for c in constraint.columns} - if set(fields) == col_names: - return True - - -def has_index(table, fields): - for index in table.indexes: - col_names = {c.name for c in index.columns} - if set(fields) == col_names: - return True - - -def collection_consists_of_objects(collection, *objects): - """ - Returns True iff list(collection) == list(objects), where object equality is determined - by primary key equality: object1.id == object2.id. - """ - if len(collection) != len(objects): # False if lengths are different - return False - if not collection: # True if both are empty - return True - - # Sort, then compare each member by its 'id' attribute, which must be its primary key. - collection.sort(key=lambda item: item.id) - objects_l = list(objects) - objects_l.sort(key=lambda item: item.id) - - for item1, item2 in zip(collection, objects_l): - if item1.id is None or item2.id is None or item1.id != item2.id: - return False - return True - - -def get_unique_value(): - """Generate unique values to accommodate unique constraints.""" - return uuid4().hex diff --git a/lib/galaxy/model/unittest_utils/tsi_model_fixtures.py b/lib/galaxy/model/unittest_utils/tsi_model_fixtures.py deleted file mode 100644 index 96f01ab03704..000000000000 --- a/lib/galaxy/model/unittest_utils/tsi_model_fixtures.py +++ /dev/null @@ -1,68 +0,0 @@ -import pytest - -from galaxy.model import tool_shed_install as model -from galaxy.model.unittest_utils.model_testing_utils import ( - dbcleanup_wrapper, - initialize_model, -) - - -@pytest.fixture(scope="module") -def init_model(engine): - """Create model objects in the engine's database.""" - # Must use the same engine as the session fixture used by this module. - initialize_model(model.mapper_registry, engine) - - -# Fixtures yielding persisted instances of models, deleted from the database on test exit. - - -@pytest.fixture -def repository(session): - instance = model.ToolShedRepository() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def repository_repository_dependency_association(session): - instance = model.RepositoryRepositoryDependencyAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def repository_dependency(session, repository): - instance = model.RepositoryDependency(repository.id) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def tool_dependency(session, repository): - instance = model.ToolDependency() - instance.tool_shed_repository = repository - instance.status = "a" - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def tool_version(session): - instance = model.ToolVersion() - yield from dbcleanup_wrapper(session, instance) - - -# Fixtures yielding factory functions. - - -@pytest.fixture -def tool_version_association_factory(): - def make_instance(*args, **kwds): - return model.ToolVersionAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def tool_version_factory(): - def make_instance(*args, **kwds): - return model.ToolVersion(*args, **kwds) - - return make_instance diff --git a/packages/data/setup.cfg b/packages/data/setup.cfg index d1404b141164..cd53e425aa10 100644 --- a/packages/data/setup.cfg +++ b/packages/data/setup.cfg @@ -71,6 +71,7 @@ console_scripts = galaxy-build-objects = galaxy.model.store.build_objects:main galaxy-load-objects = galaxy.model.store.load_objects:main galaxy-manage-db = galaxy.model.orm.scripts:manage_db + galaxy-prune-histories = galaxy.model.scripts:prune_history_table [options.packages.find] exclude = diff --git a/test/unit/data/model/conftest.py b/test/unit/data/model/conftest.py index 4d8728e9f197..034be09e51b8 100644 --- a/test/unit/data/model/conftest.py +++ b/test/unit/data/model/conftest.py @@ -1,4 +1,7 @@ +import contextlib import os +import random +import string import tempfile import uuid @@ -6,6 +9,8 @@ from sqlalchemy import create_engine from sqlalchemy.orm import Session +from galaxy import model as m + @pytest.fixture def database_name(): @@ -43,3 +48,259 @@ def session(init_model, engine): def tmp_directory(): with tempfile.TemporaryDirectory() as tmp_dir: yield tmp_dir + + +# model fixture factories + + +@pytest.fixture +def make_cleanup_event_history_association(session): + def f(**kwd): + model = m.CleanupEventHistoryAssociation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_data_manager_history_association(session): + def f(**kwd): + model = m.DataManagerHistoryAssociation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_default_history_permissions(session, make_history, make_role): + def f(**kwd): + if "history" not in kwd: + kwd["history"] = make_history() + if "action" not in kwd: + kwd["action"] = random_str() + if "role" not in kwd: + kwd["role"] = make_role() + model = m.DefaultHistoryPermissions(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_event(session): + def f(**kwd): + model = m.Event(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_galaxy_session(session): + def f(**kwd): + model = m.GalaxySession(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_galaxy_session_to_history_association(session, make_history, make_galaxy_session): + def f(**kwd): + if "galaxy_session" not in kwd: + kwd["galaxy_session"] = make_galaxy_session() + if "history" not in kwd: + kwd["history"] = make_history() + model = m.GalaxySessionToHistoryAssociation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_history(session, make_user): + def f(**kwd): + if "user" not in kwd: + kwd["user"] = make_user() + model = m.History(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_history_annotation_association(session): + def f(**kwd): + model = m.HistoryAnnotationAssociation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_history_dataset_association(session): + def f(**kwd): + model = m.HistoryDatasetAssociation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_history_dataset_collection_association(session): + def f(**kwd): + model = m.HistoryDatasetCollectionAssociation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_history_rating_association(session, make_user, make_history): + def f(**kwd): + if "user" not in kwd: + kwd["user"] = make_user() + if "item" not in kwd: + kwd["item"] = make_history() + model = m.HistoryRatingAssociation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_history_tag_association(session): + def f(**kwd): + model = m.HistoryTagAssociation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_history_user_share_association(session): + def f(**kwd): + model = m.HistoryUserShareAssociation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_job(session): + def f(**kwd): + model = m.Job(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_job_export_history_archive(session): + def f(**kwd): + model = m.JobExportHistoryArchive(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_job_import_history_archive(session): + def f(**kwd): + model = m.JobImportHistoryArchive(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_role(session): + def f(**kwd): + model = m.Role(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_workflow(session): + def f(**kwd): + model = m.Workflow(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_workflow_invocation(session, make_workflow): + def f(**kwd): + if "workflow" not in kwd: + kwd["workflow"] = make_workflow() + model = m.WorkflowInvocation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_user(session): + def f(**kwd): + if "username" not in kwd: + kwd["username"] = random_str() + if "email" not in kwd: + kwd["email"] = random_email() + if "password" not in kwd: + kwd["password"] = random_str() + model = m.User(**kwd) + write_to_db(session, model) + return model + + return f + + +# utility functions + + +@contextlib.contextmanager +def transaction(session): + if not session.in_transaction(): + with session.begin(): + yield + else: + yield + + +def random_str() -> str: + alphabet = string.ascii_lowercase + string.digits + size = random.randint(5, 10) + return "".join(random.choices(alphabet, k=size)) + + +def random_email() -> str: + text = random_str() + return f"{text}@galaxy.testing" + + +def write_to_db(session, model) -> None: + with transaction(session): + session.add(model) + session.commit() diff --git a/test/unit/data/model/db/__init__.py b/test/unit/data/model/db/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/test/unit/data/model/db/conftest.py b/test/unit/data/model/db/conftest.py new file mode 100644 index 000000000000..240268ef207f --- /dev/null +++ b/test/unit/data/model/db/conftest.py @@ -0,0 +1,53 @@ +from typing import ( + Generator, + TYPE_CHECKING, +) + +import pytest +from sqlalchemy import ( + create_engine, + text, +) +from sqlalchemy.orm import Session + +from galaxy import model as m + +if TYPE_CHECKING: + from sqlalchemy.engine import Engine + + +@pytest.fixture(scope="module") +def db_url() -> str: + """ + By default, use an in-memory database. + To overwrite, add this fixture with a new db url to a test module. + """ + return "sqlite:///:memory:" + + +@pytest.fixture(scope="module") +def engine(db_url: str) -> "Engine": + return create_engine(db_url) + + +@pytest.fixture +def session(engine: "Engine") -> Session: + return Session(engine) + + +@pytest.fixture(autouse=True, scope="module") +def init_database(engine: "Engine") -> None: + """Create database objects.""" + m.mapper_registry.metadata.create_all(engine) + + +@pytest.fixture(autouse=True) +def clear_database(engine: "Engine") -> "Generator": + """Delete all rows from all tables. Called after each test.""" + yield + with engine.begin() as conn: + for table in m.mapper_registry.metadata.tables: + # Unless db is sqlite, disable foreign key constraints to delete out of order + if engine.name != "sqlite": + conn.execute(text(f"ALTER TABLE {table} DISABLE TRIGGER ALL")) + conn.execute(text(f"DELETE FROM {table}")) diff --git a/test/unit/data/model/db/test_history_table_pruner.py b/test/unit/data/model/db/test_history_table_pruner.py new file mode 100644 index 000000000000..376b0c52939c --- /dev/null +++ b/test/unit/data/model/db/test_history_table_pruner.py @@ -0,0 +1,167 @@ +import datetime + +import pytest +from sqlalchemy import ( + func, + select, + text, +) + +from galaxy import model as m +from galaxy.model.scripts.history_table_pruner import HistoryTablePruner + + +@pytest.fixture() +def setup_db( + db_url, + session, + make_user, + make_history, + make_event, + make_history_tag_association, + make_history_annotation_association, + make_history_rating_association, + make_history_user_share_association, + make_default_history_permissions, + make_data_manager_history_association, + make_cleanup_event_history_association, + make_galaxy_session_to_history_association, + make_job_import_history_archive, + make_job_export_history_archive, + make_workflow_invocation, + make_history_dataset_collection_association, + make_job, + make_history_dataset_association, + make_galaxy_session, +): + # 1. Create 100 histories; make them deletable: user = null, hid_counter = 1. + histories = [] + for id in range(100): + h = make_history(id=id) + h.user = None + h.hid_counter = 1 + histories.append(h) + + # 2. Set 10 histories as not deletable: hid_counter != 1. + for i in range(10): + histories[i].hid_counter = 42 + + # 3. Set next 10 histories as not deletable: user not null. + u = make_user() + for i in range(10, 20): + histories[i].user = u + + # 4. For the next 6 histories create associations that cannot be deleted. + make_job_import_history_archive(history=histories[20]) + make_job_export_history_archive(history=histories[21]) + make_workflow_invocation(history=histories[22]) + make_history_dataset_collection_association(history=histories[23]) + make_history_dataset_association(history=histories[25]) + make_job().history = histories[24] + + # 5. For the next 10 histories create associations that can be deleted. + make_event(history=histories[26]) + make_history_tag_association(history=histories[27]) + make_history_annotation_association(history=histories[28]) + make_history_rating_association(item=histories[29]) + make_history_user_share_association(history=histories[30]) + make_default_history_permissions(history=histories[31]) + make_data_manager_history_association(history=histories[32]) + make_cleanup_event_history_association(history_id=histories[33].id) + make_galaxy_session_to_history_association(history=histories[34]) + # HistoryAudit is not instantiable, so created association manually. + stmt = text("insert into history_audit values(:history_id, :update_time)") + params = {"history_id": histories[35].id, "update_time": datetime.date.today()} + session.execute(stmt, params) + + # 6. Create a galaxy_session record referring to a history. + # This cannot be deleted, but the history reference can be set to null. + make_galaxy_session(current_history=histories[36]) + + session.commit() + + # TOTAL counts of loaded histories: + # histories that should NOT be deleted: 10 + 10 + 6 = 26 + # histories that SHOULD be deleted: 100 - 26 = 74 + + +def test_run(setup_db, session, db_url, engine): + + def verify_counts(model, expected): + assert session.scalar(select(func.count()).select_from(model)) == expected + + # 1. Verify history counts + stmt = select(m.History).order_by(m.History.id) + result = session.scalars(stmt).all() + assert len(result) == 100 + for i, h in enumerate(result): + if i < 10: # first 10 + assert h.hid_counter > 1 + assert h.user is None + elif i < 20: # next 10 + assert h.hid_counter == 1 + assert h.user is not None + else: # the rest + assert h.hid_counter == 1 + assert h.user is None + + # 2. Verify association counts + for model in [ + m.JobImportHistoryArchive, + m.JobExportHistoryArchive, + m.WorkflowInvocation, + m.HistoryDatasetCollectionAssociation, + m.Job, + m.HistoryDatasetAssociation, + m.Event, + m.HistoryTagAssociation, + m.HistoryAnnotationAssociation, + m.HistoryRatingAssociation, + m.HistoryUserShareAssociation, + m.DefaultHistoryPermissions, + m.DataManagerHistoryAssociation, + m.CleanupEventHistoryAssociation, + m.GalaxySessionToHistoryAssociation, + m.HistoryAudit, + ]: + verify_counts(model, 1) + verify_counts( + m.GalaxySession, 2 + ) # one extra session was automatically created for GalaxySessionToHistoryAssociation + + # 3. Run pruning script + today = datetime.date.today() + newdate = today.replace(year=today.year + 1) + HistoryTablePruner(engine, max_create_time=newdate).run() + + # 4 Verify new counts (for details on expected counts see comments in setup_db) + + # 4.1 Verify new history counts + verify_counts(m.History, 26) + + # 4.2 Verify new association counts: no change (these associations should NOT be deleted) + for model in [ + m.JobImportHistoryArchive, + m.JobExportHistoryArchive, + m.WorkflowInvocation, + m.HistoryDatasetCollectionAssociation, + m.Job, + m.HistoryDatasetAssociation, + ]: + verify_counts(model, 1) + verify_counts(m.GalaxySession, 2) + + # 4.3 Verify new association counts: deleted (these associations SHOULD be deleted) + for model in [ + m.Event, + m.HistoryTagAssociation, + m.HistoryAnnotationAssociation, + m.HistoryRatingAssociation, + m.HistoryUserShareAssociation, + m.DefaultHistoryPermissions, + m.DataManagerHistoryAssociation, + m.CleanupEventHistoryAssociation, + m.GalaxySessionToHistoryAssociation, + m.HistoryAudit, + ]: + verify_counts(model, 0) diff --git a/test/unit/data/model/test_mapping_testing_utils.py b/test/unit/data/model/test_mapping_testing_utils.py deleted file mode 100644 index 346ec2421403..000000000000 --- a/test/unit/data/model/test_mapping_testing_utils.py +++ /dev/null @@ -1,100 +0,0 @@ -""" -This module contains tests for the utility functions in the test_mapping module. -""" - -import pytest -from sqlalchemy import ( - Column, - Index, - Integer, - UniqueConstraint, -) -from sqlalchemy.orm import registry - -from galaxy.model import _HasTable -from galaxy.model.unittest_utils.mapping_testing_utils import ( - collection_consists_of_objects, - has_index, - has_unique_constraint, -) -from galaxy.model.unittest_utils.model_testing_utils import ( - get_stored_instance_by_id, - initialize_model, - persist, -) - - -def test_has_index(session): - assert has_index(Bar.__table__, ("field1",)) - assert not has_index(Foo.__table__, ("field1",)) - - -def test_has_unique_constraint(session): - assert has_unique_constraint(Bar.__table__, ("field2",)) - assert not has_unique_constraint(Foo.__table__, ("field1",)) - - -def test_collection_consists_of_objects(session): - # create objects - foo1 = Foo() - foo2 = Foo() - foo3 = Foo() - # store objects - persist(session, foo1) - persist(session, foo2) - persist(session, foo3) - - # retrieve objects from storage - stored_foo1 = get_stored_instance_by_id(session, Foo, foo1.id) - stored_foo2 = get_stored_instance_by_id(session, Foo, foo2.id) - stored_foo3 = get_stored_instance_by_id(session, Foo, foo3.id) - - # verify retrieved objects are not the same python objects as those we stored - assert stored_foo1 is not foo1 - assert stored_foo2 is not foo2 - assert stored_foo3 is not foo3 - - # trivial case - assert collection_consists_of_objects([stored_foo1, stored_foo2], foo1, foo2) - # empty collection and no objects - assert collection_consists_of_objects([]) - # ordering in collection does not matter - assert collection_consists_of_objects([stored_foo2, stored_foo1], foo1, foo2) - # contains wrong object - assert not collection_consists_of_objects([stored_foo1, stored_foo3], foo1, foo2) - # contains wrong number of objects - assert not collection_consists_of_objects([stored_foo1, stored_foo1, stored_foo2], foo1, foo2) - # if an object's primary key is not set, it cannot be equal to another object - foo1.id, stored_foo1.id = None, None # type:ignore[assignment] - assert not collection_consists_of_objects([stored_foo1], foo1) - - -# Test utilities - -mapper_registry = registry() - - -@mapper_registry.mapped -class Foo(_HasTable): - __tablename__ = "foo" - id = Column(Integer, primary_key=True) - field1 = Column(Integer) - - -@mapper_registry.mapped -class Bar(_HasTable): - __tablename__ = "bar" - id = Column(Integer, primary_key=True) - field1 = Column(Integer) - field2 = Column(Integer) - __table_args__ = ( - Index("ix", "field1"), - UniqueConstraint("field2"), - ) - - -@pytest.fixture(scope="module") -def init_model(engine): - """Create model objects in the engine's database.""" - # Must use the same engine as the session fixture used by this module. - initialize_model(mapper_registry, engine)