From 76c8003d4f5a3f14401ea653e677abe75abc5f91 Mon Sep 17 00:00:00 2001 From: John Davis Date: Tue, 30 Apr 2024 15:30:38 -0400 Subject: [PATCH 01/11] Drop unused mapping testing utilities and fixtures --- .../unittest_utils/gxy_model_fixtures.py | 1000 ----------------- .../unittest_utils/mapping_testing_utils.py | 66 -- .../unittest_utils/tsi_model_fixtures.py | 68 -- .../data/model/test_mapping_testing_utils.py | 100 -- 4 files changed, 1234 deletions(-) delete mode 100644 lib/galaxy/model/unittest_utils/gxy_model_fixtures.py delete mode 100644 lib/galaxy/model/unittest_utils/mapping_testing_utils.py delete mode 100644 lib/galaxy/model/unittest_utils/tsi_model_fixtures.py delete mode 100644 test/unit/data/model/test_mapping_testing_utils.py diff --git a/lib/galaxy/model/unittest_utils/gxy_model_fixtures.py b/lib/galaxy/model/unittest_utils/gxy_model_fixtures.py deleted file mode 100644 index d3ece2b7ecdb..000000000000 --- a/lib/galaxy/model/unittest_utils/gxy_model_fixtures.py +++ /dev/null @@ -1,1000 +0,0 @@ -import pytest - -from galaxy import model -from galaxy.model.orm.util import add_object_to_object_session -from galaxy.model.unittest_utils.mapping_testing_utils import get_unique_value -from galaxy.model.unittest_utils.model_testing_utils import ( - dbcleanup_wrapper, - initialize_model, -) - - -@pytest.fixture(scope="module") -def init_model(engine): - """Create model objects in the engine's database.""" - # Must use the same engine as the session fixture used by this module. - initialize_model(model.mapper_registry, engine) - - -# Fixtures yielding persisted instances of models, deleted from the database on test exit. - - -@pytest.fixture -def api_keys(session): - instance = model.APIKeys(key=get_unique_value()) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def cleanup_event(session): - instance = model.CleanupEvent() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def cloud_authz(session, user, user_authnz_token): - instance = model.CloudAuthz(user.id, "a", "b", user_authnz_token.id, "c") - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def custos_authnz_token(session, user): - instance = model.CustosAuthnzToken() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def data_manager_history_association(session): - instance = model.DataManagerHistoryAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def data_manager_job_association(session): - instance = model.DataManagerJobAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def dataset(session): - instance = model.Dataset() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def dataset_collection(session): - instance = model.DatasetCollection(collection_type="a") - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def dataset_collection_element(session, dataset_collection, history_dataset_association): - instance = model.DatasetCollectionElement(collection=dataset_collection, element=history_dataset_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def dataset_hash(session): - instance = model.DatasetHash() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def dataset_permission(session, dataset): - instance = model.DatasetPermissions("a", dataset) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def dataset_source(session): - instance = model.DatasetSource() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def dataset_source_hash(session): - instance = model.DatasetSourceHash() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def default_history_permissions(session, history, role): - instance = model.DefaultHistoryPermissions(history, "a", role) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def default_quota_association(session, quota): - type_ = model.DefaultQuotaAssociation.types.REGISTERED - instance = model.DefaultQuotaAssociation(type_, quota) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def default_user_permissions(session, user, role): - instance = model.DefaultUserPermissions(user, None, role) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def dynamic_tool(session): - instance = model.DynamicTool() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def extended_metadata(session): - instance = model.ExtendedMetadata(None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def extended_metadata_index(session, extended_metadata): - instance = model.ExtendedMetadataIndex(extended_metadata, None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def form_definition(session, form_definition_current): - instance = model.FormDefinition(name="a", form_definition_current=form_definition_current) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def form_definition_current(session): - instance = model.FormDefinitionCurrent() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def form_values(session): - instance = model.FormValues() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def galaxy_session(session): - instance = model.GalaxySession(session_key=get_unique_value()) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def galaxy_session_history_association(session, galaxy_session, history): - instance = model.GalaxySessionToHistoryAssociation(galaxy_session, history) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def group(session): - instance = model.Group(name=get_unique_value()) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def group_quota_association(session): - instance = model.GroupQuotaAssociation(None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def group_role_association(session): - instance = model.GroupRoleAssociation(None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history(session): - instance = model.History() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_annotation_association(session): - instance = model.HistoryAnnotationAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_dataset_association(session, dataset): - instance = model.HistoryDatasetAssociation(dataset=dataset) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_dataset_association_annotation_association(session): - instance = model.HistoryDatasetAssociationAnnotationAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_dataset_association_rating_association(session): - instance = model.HistoryDatasetAssociationRatingAssociation(None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_dataset_association_tag_association(session): - instance = model.HistoryDatasetAssociationTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_dataset_collection_annotation_association(session): - instance = model.HistoryDatasetCollectionAssociationAnnotationAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_dataset_collection_association(session): - instance = model.HistoryDatasetCollectionAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_dataset_collection_rating_association( - session, - user, - history_dataset_collection_association, -): - instance = model.HistoryDatasetCollectionRatingAssociation(user, history_dataset_collection_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_dataset_collection_tag_association(session): - instance = model.HistoryDatasetCollectionTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_rating_association(session, user, history): - instance = model.HistoryRatingAssociation(user, history) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_tag_association(session): - instance = model.HistoryTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def history_user_share_association(session): - instance = model.HistoryUserShareAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def implicit_collection_jobs(session): - instance = model.ImplicitCollectionJobs(populated_state="new") - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def implicit_collection_jobs_job_association(session): - instance = model.ImplicitCollectionJobsJobAssociation() - instance.order_index = 1 - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def implicitly_converted_dataset_association(session, history_dataset_association): - instance = model.ImplicitlyConvertedDatasetAssociation( - dataset=history_dataset_association, - parent=history_dataset_association, # using the same dataset; should work here. - ) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def implicitly_created_dataset_collection_input(session, history_dataset_collection_association): - instance = model.ImplicitlyCreatedDatasetCollectionInput(None, history_dataset_collection_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def interactive_tool_entry_point(session): - instance = model.InteractiveToolEntryPoint() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job(session): - instance = model.Job() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_container_association(session): - instance = model.JobContainerAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_export_history_archive(session): - instance = model.JobExportHistoryArchive() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_external_output_metadata(session, job, history_dataset_association): - instance = model.JobExternalOutputMetadata(job, history_dataset_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_metric_numeric(session): - instance = model.JobMetricNumeric(None, None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_metric_text(session): - instance = model.JobMetricText(None, None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_parameter(session): - instance = model.JobParameter(None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_state_history(session, job): - instance = model.JobStateHistory(job) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_to_implicit_output_dataset_collection_association(session, dataset_collection): - instance = model.JobToImplicitOutputDatasetCollectionAssociation(None, dataset_collection) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_to_input_dataset_association(session, history_dataset_association): - instance = model.JobToInputDatasetAssociation(None, history_dataset_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_to_input_dataset_collection_association(session, history_dataset_collection_association): - instance = model.JobToInputDatasetCollectionAssociation(None, history_dataset_collection_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_to_input_dataset_collection_element_association(session, dataset_collection_element): - instance = model.JobToInputDatasetCollectionElementAssociation(None, dataset_collection_element) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_to_input_library_dataset_association(session, library_dataset_dataset_association): - instance = model.JobToInputLibraryDatasetAssociation(None, library_dataset_dataset_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_to_output_dataset_association(session, history_dataset_association): - instance = model.JobToOutputDatasetAssociation(None, history_dataset_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_to_output_dataset_collection_association(session, history_dataset_collection_association): - instance = model.JobToOutputDatasetCollectionAssociation(None, history_dataset_collection_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def job_to_output_library_dataset_association(session, library_dataset_dataset_association): - instance = model.JobToOutputLibraryDatasetAssociation(None, library_dataset_dataset_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library(session): - instance = model.Library() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_dataset(session, library_dataset_dataset_association): - instance = model.LibraryDataset(library_dataset_dataset_association=library_dataset_dataset_association) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_dataset_collection_annotation_association(session): - instance = model.LibraryDatasetCollectionAnnotationAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_dataset_collection_association(session): - instance = model.LibraryDatasetCollectionAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_dataset_collection_rating_association(session): - instance = model.LibraryDatasetCollectionRatingAssociation(None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_dataset_collection_tag_association(session): - instance = model.LibraryDatasetCollectionTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_dataset_dataset_association(session): - instance = model.LibraryDatasetDatasetAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_dataset_dataset_association_permission(session, library_dataset_dataset_association, role): - instance = model.LibraryDatasetDatasetAssociationPermissions("a", library_dataset_dataset_association, role) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_dataset_dataset_association_tag_association(session): - instance = model.LibraryDatasetDatasetAssociationTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_dataset_permission(session, library_dataset, role): - instance = model.LibraryDatasetPermissions("a", library_dataset, role) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_folder(session): - instance = model.LibraryFolder() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_folder_permission(session, library_folder, role): - instance = model.LibraryFolderPermissions("a", library_folder, role) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def library_permission(session, library, role): - instance = model.LibraryPermissions("a", library, role) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def metadata_file(session): - instance = model.MetadataFile() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def page(session, user): - instance = model.Page() - instance.user = user - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def page_annotation_association(session): - instance = model.PageAnnotationAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def page_rating_association(session): - instance = model.PageRatingAssociation(None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def page_revision(session, page): - instance = model.PageRevision() - instance.page = page - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def page_tag_association(session): - instance = model.PageTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def page_user_share_association(session): - instance = model.PageUserShareAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def post_job_action(session): - instance = model.PostJobAction("a") - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def post_job_action_association(session, post_job_action, job): - instance = model.PostJobActionAssociation(post_job_action, job) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def quota(session): - instance = model.Quota(get_unique_value(), "b") - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def role(session): - instance = model.Role(name=get_unique_value()) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def stored_workflow(session, user): - instance = model.StoredWorkflow() - add_object_to_object_session(instance, user) - instance.user = user - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def stored_workflow_annotation_association(session): - instance = model.StoredWorkflowAnnotationAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def stored_workflow_rating_association(session): - instance = model.StoredWorkflowRatingAssociation(None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def stored_workflow_tag_association(session): - instance = model.StoredWorkflowTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def stored_workflow_user_share_association(session): - instance = model.StoredWorkflowUserShareAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def tag(session): - instance = model.Tag() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def task(session, job): - instance = model.Task(job, "a", "b") - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def task_metric_numeric(session): - instance = model.TaskMetricNumeric("a", "b", 9) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def task_metric_text(session): - instance = model.TaskMetricText("a", "b", "c") - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def tool_tag_association(session): - instance = model.ToolTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def user(session): - instance = model.User(email=get_unique_value(), password="password") - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def user_address(session): - instance = model.UserAddress() - instance.name = "a" - instance.address = "b" - instance.city = "c" - instance.state = "d" - instance.postal_code = "e" - instance.country = "f" - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def user_authnz_token(session, user): - instance = model.UserAuthnzToken("a", "b", "c", 1, "d", user) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def user_group_association(session): - instance = model.UserGroupAssociation(None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def user_preference(session): - instance = model.UserPreference() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def user_quota_association(session): - instance = model.UserQuotaAssociation(None, None) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def user_role_association(session, user, role): - instance = model.UserRoleAssociation(user, role) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def visualization(session, user): - instance = model.Visualization() - instance.user = user - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def visualization_annotation_association(session): - instance = model.VisualizationAnnotationAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def visualization_rating_association(session, user, visualization): - instance = model.VisualizationRatingAssociation(user, visualization) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def visualization_revision(session, visualization): - instance = model.VisualizationRevision(visualization_id=visualization.id) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def visualization_tag_association(session): - instance = model.VisualizationTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def visualization_user_share_association(session): - instance = model.VisualizationUserShareAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow(session): - instance = model.Workflow() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_invocation(session, workflow): - instance = model.WorkflowInvocation() - instance.workflow = workflow - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_invocation_output_dataset_association(session): - instance = model.WorkflowInvocationOutputDatasetAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_invocation_output_dataset_collection_association(session): - instance = model.WorkflowInvocationOutputDatasetCollectionAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_invocation_output_value(session): - instance = model.WorkflowInvocationOutputValue() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_invocation_step(session, workflow_invocation, workflow_step): - instance = model.WorkflowInvocationStep() - instance.workflow_invocation = workflow_invocation - instance.workflow_step = workflow_step - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_invocation_step_output_dataset_association(session): - instance = model.WorkflowInvocationStepOutputDatasetAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_invocation_step_output_dataset_collection_association(session): - instance = model.WorkflowInvocationStepOutputDatasetCollectionAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_output(session, workflow_step): - instance = model.WorkflowOutput(workflow_step) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_request_input_parameter(session): - instance = model.WorkflowRequestInputParameter() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_request_input_step_parameter(session): - instance = model.WorkflowRequestInputStepParameter() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_request_step_state(session): - instance = model.WorkflowRequestStepState() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_request_to_input_dataset_association(session): - instance = model.WorkflowRequestToInputDatasetAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_request_to_input_dataset_collection_association(session): - instance = model.WorkflowRequestToInputDatasetCollectionAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_step(session, workflow): - instance = model.WorkflowStep() - add_object_to_object_session(instance, workflow) - instance.workflow = workflow - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_step_annotation_association(session): - instance = model.WorkflowStepAnnotationAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_step_connection(session): - instance = model.WorkflowStepConnection() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_step_input(session, workflow_step): - instance = model.WorkflowStepInput(workflow_step) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def workflow_step_tag_association(session): - instance = model.WorkflowStepTagAssociation() - yield from dbcleanup_wrapper(session, instance) - - -# Fixtures yielding factory functions. -# In some tests we may need more than one instance of the same model. We cannot reuse a model -# fixture, and we cannot pass multiple copies of the same fixture to one test. We have to -# instantiate a new instance of the model inside the test. However, a test should only know -# how to construct the model it is testing, so instead of constructing an object directly, -# a test calls a factory function, passed to it as a fixture. - - -@pytest.fixture -def dataset_collection_factory(): - def make_instance(*args, **kwds): - if "collection_type" not in kwds: - kwds["collection_type"] = "a" - return model.DatasetCollection(*args, **kwds) - - return make_instance - - -@pytest.fixture -def history_dataset_association_factory(): - def make_instance(*args, **kwds): - return model.HistoryDatasetAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def history_dataset_collection_association_factory(): - def make_instance(*args, **kwds): - return model.HistoryDatasetCollectionAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def history_factory(): - def make_instance(**kwds): - instance = model.History() - if "deleted" in kwds: - instance.deleted = kwds["deleted"] - return instance - - return make_instance - - -@pytest.fixture -def history_rating_association_factory(): - def make_instance(*args, **kwds): - return model.HistoryRatingAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def implicitly_converted_dataset_association_factory(history_dataset_association): - def make_instance(*args, **kwds): - instance = model.ImplicitlyConvertedDatasetAssociation( - dataset=history_dataset_association, - parent=history_dataset_association, # using the same dataset; should work here. - ) - return instance - - return make_instance - - -@pytest.fixture -def library_dataset_dataset_association_factory(): - def make_instance(*args, **kwds): - return model.LibraryDatasetDatasetAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def library_folder_factory(): - def make_instance(*args, **kwds): - return model.LibraryFolder(*args, **kwds) - - return make_instance - - -@pytest.fixture -def page_rating_association_factory(): - def make_instance(*args, **kwds): - return model.PageRatingAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def role_factory(): - def make_instance(*args, **kwds): - return model.Role(*args, **kwds) - - return make_instance - - -@pytest.fixture -def stored_workflow_menu_entry_factory(): - def make_instance(*args, **kwds): - return model.StoredWorkflowMenuEntry(*args, **kwds) - - return make_instance - - -@pytest.fixture -def stored_workflow_rating_association_factory(): - def make_instance(*args, **kwds): - return model.StoredWorkflowRatingAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def stored_workflow_tag_association_factory(): - def make_instance(*args, **kwds): - return model.StoredWorkflowTagAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def user_role_association_factory(): - def make_instance(*args, **kwds): - return model.UserRoleAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def visualization_rating_association_factory(): - def make_instance(*args, **kwds): - return model.VisualizationRatingAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def visualization_revision_factory(visualization): - def make_instance(*args, **kwds): - if "visualization_id" not in kwds: - kwds["visualization_id"] = visualization.id - return model.VisualizationRevision(*args, **kwds) - - return make_instance - - -@pytest.fixture -def workflow_factory(): - def make_instance(*args, **kwds): - return model.Workflow(*args, **kwds) - - return make_instance - - -@pytest.fixture -def workflow_invocation_factory(workflow): - def make_instance(**kwds): - instance = model.WorkflowInvocation() - instance.workflow = kwds.get("workflow", workflow) - return instance - - return make_instance - - -@pytest.fixture -def workflow_invocation_to_subworkflow_invocation_association_factory(): - def make_instance(*args, **kwds): - return model.WorkflowInvocationToSubworkflowInvocationAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def workflow_step_connection_factory(): - def make_instance(*args, **kwds): - return model.WorkflowStepConnection(*args, **kwds) - - return make_instance - - -@pytest.fixture -def workflow_step_factory(workflow): - def make_instance(*args, **kwds): - instance = model.WorkflowStep() - workflow2 = kwds.get("workflow", workflow) # rename workflow not to confuse pytest - add_object_to_object_session(instance, workflow2) - instance.workflow = workflow2 - instance.subworkflow = kwds.get("subworkflow") - return instance - - return make_instance diff --git a/lib/galaxy/model/unittest_utils/mapping_testing_utils.py b/lib/galaxy/model/unittest_utils/mapping_testing_utils.py deleted file mode 100644 index 76b096820efb..000000000000 --- a/lib/galaxy/model/unittest_utils/mapping_testing_utils.py +++ /dev/null @@ -1,66 +0,0 @@ -from abc import ( - ABC, - abstractmethod, -) -from uuid import uuid4 - -import pytest -from sqlalchemy import UniqueConstraint - - -class AbstractBaseTest(ABC): - @pytest.fixture - def cls_(self): - """ - Return class under test. - Assumptions: if the class under test is Foo, then the class grouping - the tests should be a subclass of BaseTest, named TestFoo. - """ - prefix = len("Test") - class_name = self.__class__.__name__[prefix:] - return getattr(self.get_model(), class_name) - - @abstractmethod - def get_model(self): - pass - - -def has_unique_constraint(table, fields): - for constraint in table.constraints: - if isinstance(constraint, UniqueConstraint): - col_names = {c.name for c in constraint.columns} - if set(fields) == col_names: - return True - - -def has_index(table, fields): - for index in table.indexes: - col_names = {c.name for c in index.columns} - if set(fields) == col_names: - return True - - -def collection_consists_of_objects(collection, *objects): - """ - Returns True iff list(collection) == list(objects), where object equality is determined - by primary key equality: object1.id == object2.id. - """ - if len(collection) != len(objects): # False if lengths are different - return False - if not collection: # True if both are empty - return True - - # Sort, then compare each member by its 'id' attribute, which must be its primary key. - collection.sort(key=lambda item: item.id) - objects_l = list(objects) - objects_l.sort(key=lambda item: item.id) - - for item1, item2 in zip(collection, objects_l): - if item1.id is None or item2.id is None or item1.id != item2.id: - return False - return True - - -def get_unique_value(): - """Generate unique values to accommodate unique constraints.""" - return uuid4().hex diff --git a/lib/galaxy/model/unittest_utils/tsi_model_fixtures.py b/lib/galaxy/model/unittest_utils/tsi_model_fixtures.py deleted file mode 100644 index 96f01ab03704..000000000000 --- a/lib/galaxy/model/unittest_utils/tsi_model_fixtures.py +++ /dev/null @@ -1,68 +0,0 @@ -import pytest - -from galaxy.model import tool_shed_install as model -from galaxy.model.unittest_utils.model_testing_utils import ( - dbcleanup_wrapper, - initialize_model, -) - - -@pytest.fixture(scope="module") -def init_model(engine): - """Create model objects in the engine's database.""" - # Must use the same engine as the session fixture used by this module. - initialize_model(model.mapper_registry, engine) - - -# Fixtures yielding persisted instances of models, deleted from the database on test exit. - - -@pytest.fixture -def repository(session): - instance = model.ToolShedRepository() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def repository_repository_dependency_association(session): - instance = model.RepositoryRepositoryDependencyAssociation() - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def repository_dependency(session, repository): - instance = model.RepositoryDependency(repository.id) - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def tool_dependency(session, repository): - instance = model.ToolDependency() - instance.tool_shed_repository = repository - instance.status = "a" - yield from dbcleanup_wrapper(session, instance) - - -@pytest.fixture -def tool_version(session): - instance = model.ToolVersion() - yield from dbcleanup_wrapper(session, instance) - - -# Fixtures yielding factory functions. - - -@pytest.fixture -def tool_version_association_factory(): - def make_instance(*args, **kwds): - return model.ToolVersionAssociation(*args, **kwds) - - return make_instance - - -@pytest.fixture -def tool_version_factory(): - def make_instance(*args, **kwds): - return model.ToolVersion(*args, **kwds) - - return make_instance diff --git a/test/unit/data/model/test_mapping_testing_utils.py b/test/unit/data/model/test_mapping_testing_utils.py deleted file mode 100644 index 346ec2421403..000000000000 --- a/test/unit/data/model/test_mapping_testing_utils.py +++ /dev/null @@ -1,100 +0,0 @@ -""" -This module contains tests for the utility functions in the test_mapping module. -""" - -import pytest -from sqlalchemy import ( - Column, - Index, - Integer, - UniqueConstraint, -) -from sqlalchemy.orm import registry - -from galaxy.model import _HasTable -from galaxy.model.unittest_utils.mapping_testing_utils import ( - collection_consists_of_objects, - has_index, - has_unique_constraint, -) -from galaxy.model.unittest_utils.model_testing_utils import ( - get_stored_instance_by_id, - initialize_model, - persist, -) - - -def test_has_index(session): - assert has_index(Bar.__table__, ("field1",)) - assert not has_index(Foo.__table__, ("field1",)) - - -def test_has_unique_constraint(session): - assert has_unique_constraint(Bar.__table__, ("field2",)) - assert not has_unique_constraint(Foo.__table__, ("field1",)) - - -def test_collection_consists_of_objects(session): - # create objects - foo1 = Foo() - foo2 = Foo() - foo3 = Foo() - # store objects - persist(session, foo1) - persist(session, foo2) - persist(session, foo3) - - # retrieve objects from storage - stored_foo1 = get_stored_instance_by_id(session, Foo, foo1.id) - stored_foo2 = get_stored_instance_by_id(session, Foo, foo2.id) - stored_foo3 = get_stored_instance_by_id(session, Foo, foo3.id) - - # verify retrieved objects are not the same python objects as those we stored - assert stored_foo1 is not foo1 - assert stored_foo2 is not foo2 - assert stored_foo3 is not foo3 - - # trivial case - assert collection_consists_of_objects([stored_foo1, stored_foo2], foo1, foo2) - # empty collection and no objects - assert collection_consists_of_objects([]) - # ordering in collection does not matter - assert collection_consists_of_objects([stored_foo2, stored_foo1], foo1, foo2) - # contains wrong object - assert not collection_consists_of_objects([stored_foo1, stored_foo3], foo1, foo2) - # contains wrong number of objects - assert not collection_consists_of_objects([stored_foo1, stored_foo1, stored_foo2], foo1, foo2) - # if an object's primary key is not set, it cannot be equal to another object - foo1.id, stored_foo1.id = None, None # type:ignore[assignment] - assert not collection_consists_of_objects([stored_foo1], foo1) - - -# Test utilities - -mapper_registry = registry() - - -@mapper_registry.mapped -class Foo(_HasTable): - __tablename__ = "foo" - id = Column(Integer, primary_key=True) - field1 = Column(Integer) - - -@mapper_registry.mapped -class Bar(_HasTable): - __tablename__ = "bar" - id = Column(Integer, primary_key=True) - field1 = Column(Integer) - field2 = Column(Integer) - __table_args__ = ( - Index("ix", "field1"), - UniqueConstraint("field2"), - ) - - -@pytest.fixture(scope="module") -def init_model(engine): - """Create model objects in the engine's database.""" - # Must use the same engine as the session fixture used by this module. - initialize_model(mapper_registry, engine) From 2652ec6510d3f61c41a9ec123f817dcebbf24b37 Mon Sep 17 00:00:00 2001 From: John Davis Date: Wed, 1 May 2024 12:05:54 -0400 Subject: [PATCH 02/11] Add conftest.py for db tests --- test/unit/data/model/db/conftest.py | 53 +++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 test/unit/data/model/db/conftest.py diff --git a/test/unit/data/model/db/conftest.py b/test/unit/data/model/db/conftest.py new file mode 100644 index 000000000000..240268ef207f --- /dev/null +++ b/test/unit/data/model/db/conftest.py @@ -0,0 +1,53 @@ +from typing import ( + Generator, + TYPE_CHECKING, +) + +import pytest +from sqlalchemy import ( + create_engine, + text, +) +from sqlalchemy.orm import Session + +from galaxy import model as m + +if TYPE_CHECKING: + from sqlalchemy.engine import Engine + + +@pytest.fixture(scope="module") +def db_url() -> str: + """ + By default, use an in-memory database. + To overwrite, add this fixture with a new db url to a test module. + """ + return "sqlite:///:memory:" + + +@pytest.fixture(scope="module") +def engine(db_url: str) -> "Engine": + return create_engine(db_url) + + +@pytest.fixture +def session(engine: "Engine") -> Session: + return Session(engine) + + +@pytest.fixture(autouse=True, scope="module") +def init_database(engine: "Engine") -> None: + """Create database objects.""" + m.mapper_registry.metadata.create_all(engine) + + +@pytest.fixture(autouse=True) +def clear_database(engine: "Engine") -> "Generator": + """Delete all rows from all tables. Called after each test.""" + yield + with engine.begin() as conn: + for table in m.mapper_registry.metadata.tables: + # Unless db is sqlite, disable foreign key constraints to delete out of order + if engine.name != "sqlite": + conn.execute(text(f"ALTER TABLE {table} DISABLE TRIGGER ALL")) + conn.execute(text(f"DELETE FROM {table}")) From 1773dca37af18ba8557ef4be00bd2ff259af404a Mon Sep 17 00:00:00 2001 From: John Davis Date: Wed, 1 May 2024 12:30:45 -0400 Subject: [PATCH 03/11] Add model fixtures --- test/unit/data/model/conftest.py | 261 +++++++++++++++++++++++++++++++ 1 file changed, 261 insertions(+) diff --git a/test/unit/data/model/conftest.py b/test/unit/data/model/conftest.py index 4d8728e9f197..034be09e51b8 100644 --- a/test/unit/data/model/conftest.py +++ b/test/unit/data/model/conftest.py @@ -1,4 +1,7 @@ +import contextlib import os +import random +import string import tempfile import uuid @@ -6,6 +9,8 @@ from sqlalchemy import create_engine from sqlalchemy.orm import Session +from galaxy import model as m + @pytest.fixture def database_name(): @@ -43,3 +48,259 @@ def session(init_model, engine): def tmp_directory(): with tempfile.TemporaryDirectory() as tmp_dir: yield tmp_dir + + +# model fixture factories + + +@pytest.fixture +def make_cleanup_event_history_association(session): + def f(**kwd): + model = m.CleanupEventHistoryAssociation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_data_manager_history_association(session): + def f(**kwd): + model = m.DataManagerHistoryAssociation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_default_history_permissions(session, make_history, make_role): + def f(**kwd): + if "history" not in kwd: + kwd["history"] = make_history() + if "action" not in kwd: + kwd["action"] = random_str() + if "role" not in kwd: + kwd["role"] = make_role() + model = m.DefaultHistoryPermissions(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_event(session): + def f(**kwd): + model = m.Event(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_galaxy_session(session): + def f(**kwd): + model = m.GalaxySession(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_galaxy_session_to_history_association(session, make_history, make_galaxy_session): + def f(**kwd): + if "galaxy_session" not in kwd: + kwd["galaxy_session"] = make_galaxy_session() + if "history" not in kwd: + kwd["history"] = make_history() + model = m.GalaxySessionToHistoryAssociation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_history(session, make_user): + def f(**kwd): + if "user" not in kwd: + kwd["user"] = make_user() + model = m.History(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_history_annotation_association(session): + def f(**kwd): + model = m.HistoryAnnotationAssociation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_history_dataset_association(session): + def f(**kwd): + model = m.HistoryDatasetAssociation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_history_dataset_collection_association(session): + def f(**kwd): + model = m.HistoryDatasetCollectionAssociation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_history_rating_association(session, make_user, make_history): + def f(**kwd): + if "user" not in kwd: + kwd["user"] = make_user() + if "item" not in kwd: + kwd["item"] = make_history() + model = m.HistoryRatingAssociation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_history_tag_association(session): + def f(**kwd): + model = m.HistoryTagAssociation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_history_user_share_association(session): + def f(**kwd): + model = m.HistoryUserShareAssociation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_job(session): + def f(**kwd): + model = m.Job(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_job_export_history_archive(session): + def f(**kwd): + model = m.JobExportHistoryArchive(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_job_import_history_archive(session): + def f(**kwd): + model = m.JobImportHistoryArchive(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_role(session): + def f(**kwd): + model = m.Role(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_workflow(session): + def f(**kwd): + model = m.Workflow(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_workflow_invocation(session, make_workflow): + def f(**kwd): + if "workflow" not in kwd: + kwd["workflow"] = make_workflow() + model = m.WorkflowInvocation(**kwd) + write_to_db(session, model) + return model + + return f + + +@pytest.fixture +def make_user(session): + def f(**kwd): + if "username" not in kwd: + kwd["username"] = random_str() + if "email" not in kwd: + kwd["email"] = random_email() + if "password" not in kwd: + kwd["password"] = random_str() + model = m.User(**kwd) + write_to_db(session, model) + return model + + return f + + +# utility functions + + +@contextlib.contextmanager +def transaction(session): + if not session.in_transaction(): + with session.begin(): + yield + else: + yield + + +def random_str() -> str: + alphabet = string.ascii_lowercase + string.digits + size = random.randint(5, 10) + return "".join(random.choices(alphabet, k=size)) + + +def random_email() -> str: + text = random_str() + return f"{text}@galaxy.testing" + + +def write_to_db(session, model) -> None: + with transaction(session): + session.add(model) + session.commit() From 7f83b903cb8a2cdb599c4893832f8e341c3f8ec9 Mon Sep 17 00:00:00 2001 From: John Davis Date: Wed, 1 May 2024 17:30:21 -0400 Subject: [PATCH 04/11] Add history pruner + test Add __init__.py to test.unit.data.model.db --- .../model/scripts/history_table_pruner.py | 191 ++++++++++++++++++ test/unit/data/model/db/__init__.py | 0 .../model/db/test_history_table_pruner.py | 167 +++++++++++++++ 3 files changed, 358 insertions(+) create mode 100644 lib/galaxy/model/scripts/history_table_pruner.py create mode 100644 test/unit/data/model/db/__init__.py create mode 100644 test/unit/data/model/db/test_history_table_pruner.py diff --git a/lib/galaxy/model/scripts/history_table_pruner.py b/lib/galaxy/model/scripts/history_table_pruner.py new file mode 100644 index 000000000000..329d3f8bb7de --- /dev/null +++ b/lib/galaxy/model/scripts/history_table_pruner.py @@ -0,0 +1,191 @@ +import datetime +import logging + +from sqlalchemy import text + +TMP_TABLE = "tmp_unused_history" + +ASSOC_TABLES = ( + "event", + "history_audit", + "history_tag_association", + "history_annotation_association", + "history_rating_association", + "history_user_share_association", + "default_history_permissions", + "data_manager_history_association", + "cleanup_event_history_association", + "galaxy_session_to_history", +) + +EXCLUDED_ASSOC_TABLES = ( + "job_import_history_archive", + "job_export_history_archive", + "workflow_invocation", + "history_dataset_collection_association", + "job", + "history_dataset_association", +) + +DEFAULT_BATCH_SIZE = 1000 + +logging.basicConfig() +log = logging.getLogger(__name__) + + +class HistoryTablePruner: + """Removes unused histories (user is null, hid == 1).""" + + def __init__(self, engine, batch_size=None, max_create_time=None): + self.engine = engine + self.batch_size = batch_size or DEFAULT_BATCH_SIZE + self.max_create_time = max_create_time or self._get_default_max_create_time() + self.min_id, self.max_id = self._get_min_max_ids() + + def run(self): + """ + Due to the very large size of some tables, we run operations in batches, using low/high history id as boundaries. + """ + if self.min_id is None: + logging.info("No histories exist") + return + + low = self.min_id + high = min(self.max_id, low + self.batch_size) + while low <= self.max_id: + self._run_batch(low, high) + low = high + high = high + self.batch_size + + def _get_default_max_create_time(self): + """By default, do not delete histories created less than a month ago.""" + today = datetime.date.today() + return today.replace(month=today.month - 1) + + def _run_batch(self, low, high): + self._mark_histories_as_deleted_and_purged(low, high) + histories = self._get_histories(low, high) + exclude = self._get_histories_to_exclude(low, high) + + # Calculate set of histories to delete. + to_delete = set(histories) - exclude + if not to_delete: + logging.info(f"No histories to delete in the id range {low}-{high}") + return + + self._create_tmp_table() + try: + self._populate_tmp_table(to_delete) + self._delete_associations() + self._set_references_to_null() + self._delete_histories(low, high) + except Exception as e: + raise e + finally: + self._drop_tmp_table() + + def _get_min_max_ids(self): + stmt = text( + "SELECT min(id), max(id) FROM history WHERE user_id IS NULL AND hid_counter = 1 AND create_time < :create_time" + ) + params = {"create_time": self.max_create_time} + with self.engine.begin() as conn: + minmax = conn.execute(stmt, params).all() + return minmax[0][0], minmax[0][1] + + def _mark_histories_as_deleted_and_purged(self, low, high): + """Mark target histories as deleted and purged to prevent their further usage.""" + logging.info(f"Marking histories {low}-{high} as deleted and purged") + stmt = text( + """ + UPDATE history + SET deleted = TRUE, purged = TRUE + WHERE user_id IS NULL AND hid_counter = 1 AND create_time < :create_time AND id >= :low AND id < :high + """ + ) + params = self._get_stmt_params(low, high) + with self.engine.begin() as conn: + return conn.execute(stmt, params) + + def _get_histories(self, low, high): + """Return ids of histories to delete.""" + logging.info(f"Collecting history ids between {low}-{high}") + stmt = text( + "SELECT id FROM history WHERE user_id IS NULL AND hid_counter = 1 AND create_time < :create_time AND id >= :low AND id < :high" + ) + params = self._get_stmt_params(low, high) + with self.engine.begin() as conn: + return conn.scalars(stmt, params).all() + + def _get_histories_to_exclude(self, low, high): + """Retrieve histories that should NOT be deleted due to existence of associated records that should be preserved.""" + logging.info(f"Collecting ids of histories to exclude based on {len(EXCLUDED_ASSOC_TABLES)} associated tables:") + statements = [] + for table in EXCLUDED_ASSOC_TABLES: + statements.append((table, text(f"SELECT history_id FROM {table} WHERE history_id >= :low AND id < :high"))) + + params = self._get_stmt_params(low, high) + ids = [] + for table, stmt in statements: + with self.engine.begin() as conn: + logging.info(f"\tCollecting history_id from {table}") + ids += conn.scalars(stmt, params).all() + + excluded = set(ids) + if None in excluded: + excluded.remove(None) + return excluded + + def _create_tmp_table(self): + """Create temporary table to hold history ids.""" + stmt = text(f"CREATE TEMPORARY TABLE {TMP_TABLE} (id INT PRIMARY KEY)") + with self.engine.begin() as conn: + conn.execute(stmt) + + def _drop_tmp_table(self): + stmt = text(f"CREATE TEMPORARY TABLE {TMP_TABLE} (id INT PRIMARY KEY)") + stmt = text(f"DROP TABLE {TMP_TABLE}") + with self.engine.begin() as conn: + conn.execute(stmt) + + def _populate_tmp_table(self, to_delete): + """Load ids of histories to delete into temporary table.""" + assert to_delete + logging.info("Populating temporary table") + sql_values = ",".join([f"({id})" for id in to_delete]) + stmt = text(f"INSERT INTO {TMP_TABLE} VALUES {sql_values}") + with self.engine.begin() as conn: + conn.execute(stmt) + + def _delete_associations(self): + """Delete records associated with histories to be deleted.""" + logging.info("Deleting associated records from ...") + + for table in ASSOC_TABLES: + stmt = text(f"DELETE FROM {table} WHERE history_id IN (SELECT id FROM {TMP_TABLE})") + with self.engine.begin() as conn: + conn.execute(stmt) + + def _set_references_to_null(self): + """Set history_id to null in galaxy_session table for records referring to histories to be deleted.""" + logging.info("Set history_id to null in galaxy_session") + stmt = text( + f"UPDATE galaxy_session SET current_history_id = NULL WHERE current_history_id IN (SELECT id FROM {TMP_TABLE})" + ) + with self.engine.begin() as conn: + conn.execute(stmt) + + def _delete_histories(self, low, high): + """Last step: delete histories that are safe to delete.""" + logging.info(f"Delete histories in the id range {low} - {high}") + stmt = text(f"DELETE FROM history WHERE id IN (SELECT id FROM {TMP_TABLE})") + with self.engine.begin() as conn: + conn.execute(stmt) + + def _get_stmt_params(self, low, high): + params = { + "create_time": self.max_create_time, + "low": low, + "high": high, + } + return params diff --git a/test/unit/data/model/db/__init__.py b/test/unit/data/model/db/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/test/unit/data/model/db/test_history_table_pruner.py b/test/unit/data/model/db/test_history_table_pruner.py new file mode 100644 index 000000000000..376b0c52939c --- /dev/null +++ b/test/unit/data/model/db/test_history_table_pruner.py @@ -0,0 +1,167 @@ +import datetime + +import pytest +from sqlalchemy import ( + func, + select, + text, +) + +from galaxy import model as m +from galaxy.model.scripts.history_table_pruner import HistoryTablePruner + + +@pytest.fixture() +def setup_db( + db_url, + session, + make_user, + make_history, + make_event, + make_history_tag_association, + make_history_annotation_association, + make_history_rating_association, + make_history_user_share_association, + make_default_history_permissions, + make_data_manager_history_association, + make_cleanup_event_history_association, + make_galaxy_session_to_history_association, + make_job_import_history_archive, + make_job_export_history_archive, + make_workflow_invocation, + make_history_dataset_collection_association, + make_job, + make_history_dataset_association, + make_galaxy_session, +): + # 1. Create 100 histories; make them deletable: user = null, hid_counter = 1. + histories = [] + for id in range(100): + h = make_history(id=id) + h.user = None + h.hid_counter = 1 + histories.append(h) + + # 2. Set 10 histories as not deletable: hid_counter != 1. + for i in range(10): + histories[i].hid_counter = 42 + + # 3. Set next 10 histories as not deletable: user not null. + u = make_user() + for i in range(10, 20): + histories[i].user = u + + # 4. For the next 6 histories create associations that cannot be deleted. + make_job_import_history_archive(history=histories[20]) + make_job_export_history_archive(history=histories[21]) + make_workflow_invocation(history=histories[22]) + make_history_dataset_collection_association(history=histories[23]) + make_history_dataset_association(history=histories[25]) + make_job().history = histories[24] + + # 5. For the next 10 histories create associations that can be deleted. + make_event(history=histories[26]) + make_history_tag_association(history=histories[27]) + make_history_annotation_association(history=histories[28]) + make_history_rating_association(item=histories[29]) + make_history_user_share_association(history=histories[30]) + make_default_history_permissions(history=histories[31]) + make_data_manager_history_association(history=histories[32]) + make_cleanup_event_history_association(history_id=histories[33].id) + make_galaxy_session_to_history_association(history=histories[34]) + # HistoryAudit is not instantiable, so created association manually. + stmt = text("insert into history_audit values(:history_id, :update_time)") + params = {"history_id": histories[35].id, "update_time": datetime.date.today()} + session.execute(stmt, params) + + # 6. Create a galaxy_session record referring to a history. + # This cannot be deleted, but the history reference can be set to null. + make_galaxy_session(current_history=histories[36]) + + session.commit() + + # TOTAL counts of loaded histories: + # histories that should NOT be deleted: 10 + 10 + 6 = 26 + # histories that SHOULD be deleted: 100 - 26 = 74 + + +def test_run(setup_db, session, db_url, engine): + + def verify_counts(model, expected): + assert session.scalar(select(func.count()).select_from(model)) == expected + + # 1. Verify history counts + stmt = select(m.History).order_by(m.History.id) + result = session.scalars(stmt).all() + assert len(result) == 100 + for i, h in enumerate(result): + if i < 10: # first 10 + assert h.hid_counter > 1 + assert h.user is None + elif i < 20: # next 10 + assert h.hid_counter == 1 + assert h.user is not None + else: # the rest + assert h.hid_counter == 1 + assert h.user is None + + # 2. Verify association counts + for model in [ + m.JobImportHistoryArchive, + m.JobExportHistoryArchive, + m.WorkflowInvocation, + m.HistoryDatasetCollectionAssociation, + m.Job, + m.HistoryDatasetAssociation, + m.Event, + m.HistoryTagAssociation, + m.HistoryAnnotationAssociation, + m.HistoryRatingAssociation, + m.HistoryUserShareAssociation, + m.DefaultHistoryPermissions, + m.DataManagerHistoryAssociation, + m.CleanupEventHistoryAssociation, + m.GalaxySessionToHistoryAssociation, + m.HistoryAudit, + ]: + verify_counts(model, 1) + verify_counts( + m.GalaxySession, 2 + ) # one extra session was automatically created for GalaxySessionToHistoryAssociation + + # 3. Run pruning script + today = datetime.date.today() + newdate = today.replace(year=today.year + 1) + HistoryTablePruner(engine, max_create_time=newdate).run() + + # 4 Verify new counts (for details on expected counts see comments in setup_db) + + # 4.1 Verify new history counts + verify_counts(m.History, 26) + + # 4.2 Verify new association counts: no change (these associations should NOT be deleted) + for model in [ + m.JobImportHistoryArchive, + m.JobExportHistoryArchive, + m.WorkflowInvocation, + m.HistoryDatasetCollectionAssociation, + m.Job, + m.HistoryDatasetAssociation, + ]: + verify_counts(model, 1) + verify_counts(m.GalaxySession, 2) + + # 4.3 Verify new association counts: deleted (these associations SHOULD be deleted) + for model in [ + m.Event, + m.HistoryTagAssociation, + m.HistoryAnnotationAssociation, + m.HistoryRatingAssociation, + m.HistoryUserShareAssociation, + m.DefaultHistoryPermissions, + m.DataManagerHistoryAssociation, + m.CleanupEventHistoryAssociation, + m.GalaxySessionToHistoryAssociation, + m.HistoryAudit, + ]: + verify_counts(model, 0) From 2d9f78f4d33667f0ec3806233450c13e929be85c Mon Sep 17 00:00:00 2001 From: John Davis Date: Wed, 1 May 2024 18:03:34 -0400 Subject: [PATCH 05/11] Add script --- lib/galaxy/model/scripts/__init__.py | 0 .../model/scripts/prune_history_table.py | 36 +++++++++++++++++++ packages/data/setup.cfg | 1 + 3 files changed, 37 insertions(+) create mode 100644 lib/galaxy/model/scripts/__init__.py create mode 100644 lib/galaxy/model/scripts/prune_history_table.py diff --git a/lib/galaxy/model/scripts/__init__.py b/lib/galaxy/model/scripts/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/lib/galaxy/model/scripts/prune_history_table.py b/lib/galaxy/model/scripts/prune_history_table.py new file mode 100644 index 000000000000..af5d11a0b0d8 --- /dev/null +++ b/lib/galaxy/model/scripts/prune_history_table.py @@ -0,0 +1,36 @@ +import argparse +import os +import sys + +from sqlalchemy import create_engine + +sys.path.insert( + 1, os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir, os.pardir, "lib")) +) + +from galaxy.model.orm.scripts import get_config +from galaxy.model.scripts.history_table_pruner import HistoryTablePruner + +DESCRIPTION = """Remove unused histories from database. + +A history is considered unused if it doesn't have a user and its hid counter has not been incremented. +""" + + +def main(): + args = _get_parser().parse_args() + config = get_config(sys.argv, use_argparse=False, cwd=os.getcwd()) + engine = create_engine(config["db_url"]) + htp = HistoryTablePruner(engine=engine, batch_size=args.batch, max_create_time=args.created) + htp.run() + + +def _get_parser(): + parser = argparse.ArgumentParser(description=DESCRIPTION) + parser.add_argument("--batch", help="batch size") + parser.add_argument("--created", help="most recent create_time") + return parser + + +if __name__ == "__main__": + main() diff --git a/packages/data/setup.cfg b/packages/data/setup.cfg index d1404b141164..cd53e425aa10 100644 --- a/packages/data/setup.cfg +++ b/packages/data/setup.cfg @@ -71,6 +71,7 @@ console_scripts = galaxy-build-objects = galaxy.model.store.build_objects:main galaxy-load-objects = galaxy.model.store.load_objects:main galaxy-manage-db = galaxy.model.orm.scripts:manage_db + galaxy-prune-histories = galaxy.model.scripts:prune_history_table [options.packages.find] exclude = From 37e7d87a53e0f5a2b95057772e4909780faad332 Mon Sep 17 00:00:00 2001 From: John Davis Date: Wed, 8 May 2024 10:24:47 -0400 Subject: [PATCH 06/11] Check for empty batch earlier --- lib/galaxy/model/scripts/history_table_pruner.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/model/scripts/history_table_pruner.py b/lib/galaxy/model/scripts/history_table_pruner.py index 329d3f8bb7de..217468dfdb3e 100644 --- a/lib/galaxy/model/scripts/history_table_pruner.py +++ b/lib/galaxy/model/scripts/history_table_pruner.py @@ -63,14 +63,19 @@ def _get_default_max_create_time(self): return today.replace(month=today.month - 1) def _run_batch(self, low, high): + empty_batch_msg = f"No histories to delete in the id range {low}-{high}" self._mark_histories_as_deleted_and_purged(low, high) histories = self._get_histories(low, high) + if not histories: + logging.info(empty_batch_msg) + return + exclude = self._get_histories_to_exclude(low, high) # Calculate set of histories to delete. to_delete = set(histories) - exclude if not to_delete: - logging.info(f"No histories to delete in the id range {low}-{high}") + logging.info(empty_batch_msg) return self._create_tmp_table() From 9e78069ab1f61873be0e4aa16bdbb539957b89ec Mon Sep 17 00:00:00 2001 From: John Davis Date: Mon, 13 May 2024 20:44:12 -0400 Subject: [PATCH 07/11] Convert command args to appropriate types --- lib/galaxy/model/scripts/prune_history_table.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/model/scripts/prune_history_table.py b/lib/galaxy/model/scripts/prune_history_table.py index af5d11a0b0d8..1d326154c7a5 100644 --- a/lib/galaxy/model/scripts/prune_history_table.py +++ b/lib/galaxy/model/scripts/prune_history_table.py @@ -1,4 +1,5 @@ import argparse +import datetime import os import sys @@ -27,8 +28,10 @@ def main(): def _get_parser(): parser = argparse.ArgumentParser(description=DESCRIPTION) - parser.add_argument("--batch", help="batch size") - parser.add_argument("--created", help="most recent create_time") + parser.add_argument("--batch", type=int, help="batch size") + parser.add_argument( + "--created", type=datetime.datetime.fromisoformat, help="most recent created date/time in ISO format" + ) return parser From 594f702b9dc268aed66a6095f9c6162233bdff4e Mon Sep 17 00:00:00 2001 From: John Davis Date: Mon, 13 May 2024 20:59:53 -0400 Subject: [PATCH 08/11] Cleanup log statements; fix off-by-one errors --- .../model/scripts/history_table_pruner.py | 46 +++++++++++++------ 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/lib/galaxy/model/scripts/history_table_pruner.py b/lib/galaxy/model/scripts/history_table_pruner.py index 217468dfdb3e..c23813cbd7ac 100644 --- a/lib/galaxy/model/scripts/history_table_pruner.py +++ b/lib/galaxy/model/scripts/history_table_pruner.py @@ -1,5 +1,6 @@ import datetime import logging +import math from sqlalchemy import text @@ -41,21 +42,33 @@ def __init__(self, engine, batch_size=None, max_create_time=None): self.batch_size = batch_size or DEFAULT_BATCH_SIZE self.max_create_time = max_create_time or self._get_default_max_create_time() self.min_id, self.max_id = self._get_min_max_ids() + self.batches = self._get_batch_count() def run(self): """ Due to the very large size of some tables, we run operations in batches, using low/high history id as boundaries. """ + + def get_high(low): + return min(self.max_id + 1, low + self.batch_size) + if self.min_id is None: - logging.info("No histories exist") + log.info("No histories exist") return + log.info( + f"Total batches to run: {self.batches}, minimum history id: {self.min_id}, maximum history id: {self.max_id}" + ) + low = self.min_id - high = min(self.max_id, low + self.batch_size) + high = get_high(low) + batch_counter = 1 while low <= self.max_id: + log.info(f"Running batch {batch_counter} of {self.batches}: history id range {low}-{high-1}:") self._run_batch(low, high) low = high - high = high + self.batch_size + high = get_high(high) + batch_counter += 1 def _get_default_max_create_time(self): """By default, do not delete histories created less than a month ago.""" @@ -63,11 +76,11 @@ def _get_default_max_create_time(self): return today.replace(month=today.month - 1) def _run_batch(self, low, high): - empty_batch_msg = f"No histories to delete in the id range {low}-{high}" + empty_batch_msg = f" No histories to delete in id range {low}-{high-1}" self._mark_histories_as_deleted_and_purged(low, high) histories = self._get_histories(low, high) if not histories: - logging.info(empty_batch_msg) + log.info(empty_batch_msg) return exclude = self._get_histories_to_exclude(low, high) @@ -75,7 +88,7 @@ def _run_batch(self, low, high): # Calculate set of histories to delete. to_delete = set(histories) - exclude if not to_delete: - logging.info(empty_batch_msg) + log.info(empty_batch_msg) return self._create_tmp_table() @@ -96,11 +109,16 @@ def _get_min_max_ids(self): params = {"create_time": self.max_create_time} with self.engine.begin() as conn: minmax = conn.execute(stmt, params).all() + # breakpoint() return minmax[0][0], minmax[0][1] + def _get_batch_count(self): + """Calculate number of batches to run.""" + return math.ceil((self.max_id - self.min_id) / self.batch_size) + def _mark_histories_as_deleted_and_purged(self, low, high): """Mark target histories as deleted and purged to prevent their further usage.""" - logging.info(f"Marking histories {low}-{high} as deleted and purged") + log.info(" Marking histories as deleted and purged") stmt = text( """ UPDATE history @@ -114,7 +132,7 @@ def _mark_histories_as_deleted_and_purged(self, low, high): def _get_histories(self, low, high): """Return ids of histories to delete.""" - logging.info(f"Collecting history ids between {low}-{high}") + log.info(" Collecting history ids") stmt = text( "SELECT id FROM history WHERE user_id IS NULL AND hid_counter = 1 AND create_time < :create_time AND id >= :low AND id < :high" ) @@ -124,7 +142,7 @@ def _get_histories(self, low, high): def _get_histories_to_exclude(self, low, high): """Retrieve histories that should NOT be deleted due to existence of associated records that should be preserved.""" - logging.info(f"Collecting ids of histories to exclude based on {len(EXCLUDED_ASSOC_TABLES)} associated tables:") + log.info(f" Collecting ids of histories to exclude based on {len(EXCLUDED_ASSOC_TABLES)} associated tables:") statements = [] for table in EXCLUDED_ASSOC_TABLES: statements.append((table, text(f"SELECT history_id FROM {table} WHERE history_id >= :low AND id < :high"))) @@ -133,7 +151,7 @@ def _get_histories_to_exclude(self, low, high): ids = [] for table, stmt in statements: with self.engine.begin() as conn: - logging.info(f"\tCollecting history_id from {table}") + log.info(f" Collecting history_id from {table}") ids += conn.scalars(stmt, params).all() excluded = set(ids) @@ -156,7 +174,7 @@ def _drop_tmp_table(self): def _populate_tmp_table(self, to_delete): """Load ids of histories to delete into temporary table.""" assert to_delete - logging.info("Populating temporary table") + log.info(" Populating temporary table") sql_values = ",".join([f"({id})" for id in to_delete]) stmt = text(f"INSERT INTO {TMP_TABLE} VALUES {sql_values}") with self.engine.begin() as conn: @@ -164,16 +182,16 @@ def _populate_tmp_table(self, to_delete): def _delete_associations(self): """Delete records associated with histories to be deleted.""" - logging.info("Deleting associated records from ...") for table in ASSOC_TABLES: + log.info(f" Deleting associated records from {table}") stmt = text(f"DELETE FROM {table} WHERE history_id IN (SELECT id FROM {TMP_TABLE})") with self.engine.begin() as conn: conn.execute(stmt) def _set_references_to_null(self): """Set history_id to null in galaxy_session table for records referring to histories to be deleted.""" - logging.info("Set history_id to null in galaxy_session") + log.info(" Set history_id to null in galaxy_session") stmt = text( f"UPDATE galaxy_session SET current_history_id = NULL WHERE current_history_id IN (SELECT id FROM {TMP_TABLE})" ) @@ -182,7 +200,7 @@ def _set_references_to_null(self): def _delete_histories(self, low, high): """Last step: delete histories that are safe to delete.""" - logging.info(f"Delete histories in the id range {low} - {high}") + log.info(f" Deleting histories in id range {low}-{high-1}") stmt = text(f"DELETE FROM history WHERE id IN (SELECT id FROM {TMP_TABLE})") with self.engine.begin() as conn: conn.execute(stmt) From 54bb8a9a06967f6f434261ad225c83f7959062dd Mon Sep 17 00:00:00 2001 From: John Davis Date: Mon, 13 May 2024 10:06:50 -0400 Subject: [PATCH 09/11] Fix bug in admin/container docs section title markup --- doc/source/admin/container_resolvers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/admin/container_resolvers.rst b/doc/source/admin/container_resolvers.rst index 918088f7e026..153c5053c605 100644 --- a/doc/source/admin/container_resolvers.rst +++ b/doc/source/admin/container_resolvers.rst @@ -379,7 +379,7 @@ Setting up Galaxy using docker / singularity on distributed compute resources (in particular in real user setups) requires careful planning. Other considerations -==================== +-------------------- Tools frequently use ``$TMP``, ``$TEMP``, or ``$TMPDIR`` (or simply use hardcoded ``/tmp``) for storing temporary data. In containerized environments ``/tmp`` From 77d8d61e9061db68d502bf6ff047cfd6ddf16ead Mon Sep 17 00:00:00 2001 From: John Davis Date: Mon, 13 May 2024 21:00:59 -0400 Subject: [PATCH 10/11] Add documentation --- doc/source/admin/useful_scripts.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/doc/source/admin/useful_scripts.rst b/doc/source/admin/useful_scripts.rst index 933c846aae2b..1b3438588372 100644 --- a/doc/source/admin/useful_scripts.rst +++ b/doc/source/admin/useful_scripts.rst @@ -17,3 +17,22 @@ This script was developed to be as general as possible, allowing you to pipe the Find has an extremely expressive command line for selecting specific files that are of interest to you. These will then be recursively uploaded into Galaxy, maintaining the folder hierarchy, a useful feature when moving legacy data into Galaxy. For a complete description of the options of this script, you can run ``python $GALAXY_ROOT/scripts/api/library_upload_dir.py --help`` This tool will not overwrite or re-upload already uploaded datasets. As a result, one can imagine running this on a cron job to keep an "incoming sequencing data" directory synced with a data library. + +Deleting unused histories +------------------------- + +Galaxy accommodates anonymous usage by creating a default history. Often, such histories will remain unused, as a result of which the database may contain a considerable number of anonymous histories along with associated records, which serve no purpose. Deleting such records will declutter the database and free up space. However, given that a row in the history table may be referenced from multiple other tables, manually deleting such data may leave the database in an inconsistent state. Furthermore, whereas some types of data associated with such histories are clearly obsolete and can be safely deleted, others may require preservation for a variety of reasons. + +To safely delete unused histories and their associated records, please use the `prune_history_table` script. Due to the potentially very large size of some of the tables in the database, the script deletes records in batches. The default size is 1000, which means the script will delete up to 1000 histories, plus any associated records in a single batch. The size of the batch is configurable. By default, an anonymous history should be at least a month old to be considered unused. This value is configurable as well. + +.. code-block:: console + + $ python $GALAXY_ROOT/lib/galaxy/model/scripts/prune_history_table.py + usage: prune_history_table.py [-h] [--batch BATCH] [--created CREATED] + + Remove unused histories from database. A history is considered unused if it doesn't have a user and its hid counter has not been incremented. + + optional arguments: + -h, --help show this help message and exit + --batch BATCH batch size + --created CREATED most recent created date/time in ISO format From 3904681f40ec1b7679bd424ac1a5707bbea5289a Mon Sep 17 00:00:00 2001 From: John Davis Date: Tue, 14 May 2024 13:47:35 -0400 Subject: [PATCH 11/11] Provide an example of ISO format --- doc/source/admin/useful_scripts.rst | 2 +- lib/galaxy/model/scripts/prune_history_table.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/source/admin/useful_scripts.rst b/doc/source/admin/useful_scripts.rst index 1b3438588372..9fe85c291008 100644 --- a/doc/source/admin/useful_scripts.rst +++ b/doc/source/admin/useful_scripts.rst @@ -35,4 +35,4 @@ To safely delete unused histories and their associated records, please use the ` optional arguments: -h, --help show this help message and exit --batch BATCH batch size - --created CREATED most recent created date/time in ISO format + --created CREATED most recent created date/time in ISO format (for example, March 11, 1952 is represented as '1952-03-11') diff --git a/lib/galaxy/model/scripts/prune_history_table.py b/lib/galaxy/model/scripts/prune_history_table.py index 1d326154c7a5..d4c976b050d7 100644 --- a/lib/galaxy/model/scripts/prune_history_table.py +++ b/lib/galaxy/model/scripts/prune_history_table.py @@ -30,7 +30,9 @@ def _get_parser(): parser = argparse.ArgumentParser(description=DESCRIPTION) parser.add_argument("--batch", type=int, help="batch size") parser.add_argument( - "--created", type=datetime.datetime.fromisoformat, help="most recent created date/time in ISO format" + "--created", + type=datetime.datetime.fromisoformat, + help="most recent created date/time in ISO format (for example, March 11, 1952 is represented as '1952-03-11')", ) return parser