From 630941e913c80648c7f3cce86b417aa606b49483 Mon Sep 17 00:00:00 2001 From: Cristiano Singulani Date: Wed, 5 Jun 2024 17:29:37 +0000 Subject: [PATCH 01/20] Starting integration with orchestration --- .devcontainer/docker-compose.yml | 3 + .gitignore | 8 + README.md | 45 ++++- backend/core/admin.py | 18 +- backend/core/migrations/0029_pipeline.py | 25 +++ backend/core/migrations/0030_process.py | 35 ++++ .../0031_pipeline_product_types_accepted.py | 18 ++ ...032_process_upload_alter_process_inputs.py | 25 +++ .../migrations/0033_alter_process_upload.py | 19 +++ .../0034_process_upload_product_type.py | 20 +++ ...ve_process_upload_product_type_and_more.py | 30 ++++ backend/core/models/__init__.py | 4 +- backend/core/models/pipeline.py | 24 +++ backend/core/models/process.py | 64 +++++++ backend/core/models/product.py | 3 +- backend/core/pipeline_objects.py | 48 ++++++ backend/core/serializers/__init__.py | 6 +- backend/core/serializers/pipeline.py | 8 + backend/core/serializers/process.py | 59 +++++++ backend/core/utils.py | 93 ++++++++++ backend/core/views/__init__.py | 13 +- backend/core/views/create_product.py | 155 +++++++++++++++++ backend/core/views/pipeline.py | 26 +++ backend/core/views/process.py | 133 +++++++++++++++ backend/core/views/product.py | 161 ++++-------------- backend/core/views/registry_product.py | 1 + backend/pzserver/settings.py | 9 + backend/pzserver/urls.py | 25 +-- backend/requirements.txt | 1 + docker-compose-development.yml | 31 ++++ env_template | 11 +- nginx_development.conf | 52 +++++- orchestration/.orchestration-env | 43 +++++ .../DatasetA/Norder=0/Dir=0/Npix=0.parquet | Bin 0 -> 5074 bytes .../DatasetA/Norder=0/Dir=0/Npix=11.parquet | Bin 0 -> 5074 bytes .../DatasetA/Norder=0/Dir=0/Npix=4.parquet | Bin 0 -> 7618 bytes .../DatasetA/Norder=0/Dir=0/Npix=8.parquet | Bin 0 -> 5802 bytes .../datasets/DatasetA/_common_metadata | Bin 0 -> 3305 bytes orchestration/datasets/DatasetA/_metadata | Bin 0 -> 6627 bytes .../datasets/DatasetA/catalog_info.json | 8 + .../datasets/DatasetA/point_map.fits | Bin 0 -> 1581120 bytes .../datasets/DatasetA/provenance_info.json | 47 +++++ .../DatasetB/Norder=0/Dir=0/Npix=4.parquet | Bin 0 -> 7130 bytes .../DatasetB/Norder=0/Dir=0/Npix=8.parquet | Bin 0 -> 5731 bytes .../datasets/DatasetB/_common_metadata | Bin 0 -> 3294 bytes orchestration/datasets/DatasetB/_metadata | Bin 0 -> 4947 bytes .../datasets/DatasetB/catalog_info.json | 8 + .../datasets/DatasetB/point_map.fits | Bin 0 -> 1581120 bytes .../datasets/DatasetB/provenance_info.json | 47 +++++ orchestration/datasets/README | 1 + .../pipelines/cross_lsdb_dev/VERSION | 1 + .../pipelines/cross_lsdb_dev/config.py | 51 ++++++ .../pipelines/cross_lsdb_dev/environment.yml | 14 ++ .../pipelines/cross_lsdb_dev/install.sh | 34 ++++ .../cross_lsdb_dev/packages/__init__.py | 0 .../cross_lsdb_dev/packages/executor.py | 47 +++++ .../cross_lsdb_dev/packages/utils.py | 46 +++++ orchestration/pipelines/cross_lsdb_dev/run.sh | 40 +++++ .../cross_lsdb_dev/scripts/run-crossmatch | 60 +++++++ orchestration/pipelines/load_pipelines.sh | 12 ++ orchestration/pipelines/pipelines.yaml | 8 + .../pipelines/pipelines.yaml.template | 8 + orchestration/rabbitmq/enabled_plugins | 1 + pipeline.py | 16 ++ 64 files changed, 1503 insertions(+), 162 deletions(-) create mode 100644 backend/core/migrations/0029_pipeline.py create mode 100644 backend/core/migrations/0030_process.py create mode 100644 backend/core/migrations/0031_pipeline_product_types_accepted.py create mode 100644 backend/core/migrations/0032_process_upload_alter_process_inputs.py create mode 100644 backend/core/migrations/0033_alter_process_upload.py create mode 100644 backend/core/migrations/0034_process_upload_product_type.py create mode 100644 backend/core/migrations/0035_remove_process_upload_product_type_and_more.py create mode 100644 backend/core/models/pipeline.py create mode 100644 backend/core/models/process.py create mode 100644 backend/core/pipeline_objects.py create mode 100644 backend/core/serializers/pipeline.py create mode 100644 backend/core/serializers/process.py create mode 100644 backend/core/utils.py create mode 100644 backend/core/views/create_product.py create mode 100644 backend/core/views/pipeline.py create mode 100644 backend/core/views/process.py create mode 100644 orchestration/.orchestration-env create mode 100644 orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=0.parquet create mode 100644 orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=11.parquet create mode 100644 orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=4.parquet create mode 100644 orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=8.parquet create mode 100644 orchestration/datasets/DatasetA/_common_metadata create mode 100644 orchestration/datasets/DatasetA/_metadata create mode 100644 orchestration/datasets/DatasetA/catalog_info.json create mode 100644 orchestration/datasets/DatasetA/point_map.fits create mode 100644 orchestration/datasets/DatasetA/provenance_info.json create mode 100644 orchestration/datasets/DatasetB/Norder=0/Dir=0/Npix=4.parquet create mode 100644 orchestration/datasets/DatasetB/Norder=0/Dir=0/Npix=8.parquet create mode 100644 orchestration/datasets/DatasetB/_common_metadata create mode 100644 orchestration/datasets/DatasetB/_metadata create mode 100644 orchestration/datasets/DatasetB/catalog_info.json create mode 100644 orchestration/datasets/DatasetB/point_map.fits create mode 100644 orchestration/datasets/DatasetB/provenance_info.json create mode 100644 orchestration/datasets/README create mode 100644 orchestration/pipelines/cross_lsdb_dev/VERSION create mode 100644 orchestration/pipelines/cross_lsdb_dev/config.py create mode 100644 orchestration/pipelines/cross_lsdb_dev/environment.yml create mode 100755 orchestration/pipelines/cross_lsdb_dev/install.sh create mode 100644 orchestration/pipelines/cross_lsdb_dev/packages/__init__.py create mode 100755 orchestration/pipelines/cross_lsdb_dev/packages/executor.py create mode 100755 orchestration/pipelines/cross_lsdb_dev/packages/utils.py create mode 100755 orchestration/pipelines/cross_lsdb_dev/run.sh create mode 100755 orchestration/pipelines/cross_lsdb_dev/scripts/run-crossmatch create mode 100755 orchestration/pipelines/load_pipelines.sh create mode 100644 orchestration/pipelines/pipelines.yaml create mode 100644 orchestration/pipelines/pipelines.yaml.template create mode 100644 orchestration/rabbitmq/enabled_plugins create mode 100644 pipeline.py diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index d8ae63b..1e91e4d 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -9,6 +9,9 @@ services: - ..:/workspaces:cached - ./archive/log/backend:/archive/log - ./archive/data:/archive/data + - ./orchestration/pipelines:/pipelines + - ./orchestration/processes:/processes + - ./orchestration/datasets:/datasets # Overrides default command so things don't shut down after the process ends. command: sleep infinity diff --git a/.gitignore b/.gitignore index 0756aee..3ccd2ab 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,11 @@ nginx.conf # Docker Compose docker-compose.yml .env.local + +# Orchestration +orchestration/db +orchestration/processes +orchestration/logs +orchestration/rabbitmq/* +!orchestration/rabbitmq/enabled_plugins + diff --git a/README.md b/README.md index f3c26d5..697fb4d 100644 --- a/README.md +++ b/README.md @@ -98,9 +98,51 @@ In the development environment it is not necessary to change Ngnix settings. But if a local change is needed, copy the `nginx_development.conf` file to `nginx.conf` Also change the `docker-compose.yml` file in the ngnix service at the line `- ./nginx_development.conf:/etc/nginx/conf.d/default.conf:ro`. In this way, the ngnix.conf file represents your local environment, if you make any modifications that are necessary for the project, copy this modification to the template file, as the nginx.conf file is not part of the repository. + +### Orchestration setup + +The Pz Server uses [orchestration](https://github.com/linea-it/orchestration/) to process its pipelines and for this you need to configure it: + +``` bash +mkdir orchestration/db orchestration/logs orchestration/processes +``` + +The next step is to add a virtual host to your local machine. On Linux, this must be done by adding the line `127.0.0.1 orchestration` in the `/etc/host`. The file should look like this: + +``` bash +127.0.0.1 localhost +127.0.0.1 orchestration + +# The following lines are desirable for IPv6 capable hosts +::1 ip6-localhost ip6-loopback +fe00::0 ip6-localnet +ff00::0 ip6-mcastprefix +ff02::1 ip6-allnodes +ff02::2 ip6-allrouters +``` + +Start the orchestration with the command: + +``` bash +docker-compose up orchestration +``` + +And follow the procedure to add an authentication app in this [link](https://github.com/linea-it/orchestration?tab=readme-ov-file#how-to-use-using-client-credential). But be careful because when integrating with the Pz Server, the orchestration will have a different url than `http://localhost`, in this case it will be [http://orchestration/admin/oauth2_provider/application/add/](http://orchestration/admin/oauth2_provider/application/add/). + +Another important detail is that the `CLIENT ID` and `SECRET KEY` value from the previous procedure must be changed in the `.env` of the Pz Server, looking similar to this: + +``` bash +# Client ID and Client Secret must be registered in Django Admin +# after backend Setup, in the Django Oauth Applications interface +ORC_CLIENT_ID=wD85gkYeqGEQvVWv5o3Cx6ppBlfDl2S88dek8Exp +ORC_CLIENT_SECRET=eM2dhhxa2vovfaAXmMwqR1M8TdGhVmBjT7co5uaA9pI4aKPDZGxtBtDG5LHfhHvZUabbSP5aUDRpTLpUJAiGS0ScNuhktbuCwuSPiz0bmEftEROJ3ZzzKp2aDNO7Vx0k +``` + +This is enough to have orchestration working with an image pinned to `orchestration/docker-compose.yml`. If you want to change the orchestration version, just change the image in `orchestration/docker-compose.yml` + Once this is done, the development environment setup process is complete. -Finally, to start the whole application: +Finally, to start the whole application: ``` bash docker-compose up @@ -125,6 +167,7 @@ Go to Django ADMIN (for local installation, open a web browser and go to the URL The installation is done, you can now test the newly configured application. + ### Some example commands Turn on background environment (if you have the application already running on the terminal, stop it with `CTRL + C` keys and up ir again, but in the background using `-d` argument): diff --git a/backend/core/admin.py b/backend/core/admin.py index fe802c5..96e783a 100644 --- a/backend/core/admin.py +++ b/backend/core/admin.py @@ -1,11 +1,18 @@ -from core.models import (Product, ProductContent, ProductFile, ProductType, - Profile, Release) +from core.models import (Pipeline, Process, Product, ProductContent, + ProductFile, ProductType, Profile, Release) from django import forms from django.contrib import admin from django.contrib.auth.admin import UserAdmin from django.contrib.auth.models import User +@admin.register(Process) +class ProcessAdmin(admin.ModelAdmin): + list_display = ("id", "pipeline", "status", "user", "created_at") + exclude = ["path"] + search_fields = ("pipeline", "status") + + @admin.register(ProductType) class ProductTypeAdmin(admin.ModelAdmin): list_display = ("id", "name", "display_name", "created_at") @@ -20,6 +27,13 @@ class ReleaseAdmin(admin.ModelAdmin): search_fields = ("name", "display_name") +@admin.register(Pipeline) +class PipelineAdmin(admin.ModelAdmin): + list_display = ("id", "name", "display_name", "created_at") + + search_fields = ("name", "display_name") + + class ProductAdminForm(forms.ModelForm): class Meta: model = Product diff --git a/backend/core/migrations/0029_pipeline.py b/backend/core/migrations/0029_pipeline.py new file mode 100644 index 0000000..d5ced5f --- /dev/null +++ b/backend/core/migrations/0029_pipeline.py @@ -0,0 +1,25 @@ +# Generated by Django 5.0.6 on 2024-05-27 21:32 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0028_productfile_created_productfile_updated'), + ] + + operations = [ + migrations.CreateModel( + name='Pipeline', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=255)), + ('display_name', models.CharField(max_length=255)), + ('version', models.CharField(max_length=55)), + ('description', models.TextField(blank=True, null=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('system_config', models.JSONField(blank=True, null=True)), + ], + ), + ] diff --git a/backend/core/migrations/0030_process.py b/backend/core/migrations/0030_process.py new file mode 100644 index 0000000..9f3217b --- /dev/null +++ b/backend/core/migrations/0030_process.py @@ -0,0 +1,35 @@ +# Generated by Django 5.0.6 on 2024-05-28 15:39 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0029_pipeline'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name='Process', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('pipeline_version', models.CharField(blank=True, default=None, max_length=255, null=True)), + ('used_config', models.JSONField(blank=True, null=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('started_at', models.DateTimeField(blank=True, null=True)), + ('ended_at', models.DateTimeField(blank=True, null=True)), + ('task_id', models.CharField(blank=True, default=None, max_length=255, null=True)), + ('status', models.CharField(default='Pending', max_length=55)), + ('path', models.FilePathField(blank=True, default=None, null=True, verbose_name='Path')), + ('comment', models.TextField(blank=True, null=True)), + ('inputs', models.ManyToManyField(related_name='processes', to='core.product')), + ('pipeline', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='processes', to='core.pipeline')), + ('release', models.ForeignKey(blank=True, default=None, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='processes', to='core.release')), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='processes', to=settings.AUTH_USER_MODEL)), + ], + ), + ] diff --git a/backend/core/migrations/0031_pipeline_product_types_accepted.py b/backend/core/migrations/0031_pipeline_product_types_accepted.py new file mode 100644 index 0000000..10b436b --- /dev/null +++ b/backend/core/migrations/0031_pipeline_product_types_accepted.py @@ -0,0 +1,18 @@ +# Generated by Django 5.0.6 on 2024-05-29 15:12 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0030_process'), + ] + + operations = [ + migrations.AddField( + model_name='pipeline', + name='product_types_accepted', + field=models.ManyToManyField(related_name='pipelines', to='core.producttype'), + ), + ] diff --git a/backend/core/migrations/0032_process_upload_alter_process_inputs.py b/backend/core/migrations/0032_process_upload_alter_process_inputs.py new file mode 100644 index 0000000..958e6d8 --- /dev/null +++ b/backend/core/migrations/0032_process_upload_alter_process_inputs.py @@ -0,0 +1,25 @@ +# Generated by Django 5.0.6 on 2024-05-29 17:45 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0031_pipeline_product_types_accepted'), + ] + + operations = [ + migrations.AddField( + model_name='process', + name='upload', + field=models.ForeignKey(default=1, on_delete=django.db.models.deletion.CASCADE, related_name='upload', to='core.product'), + preserve_default=False, + ), + migrations.AlterField( + model_name='process', + name='inputs', + field=models.ManyToManyField(related_name='inputs', to='core.product'), + ), + ] diff --git a/backend/core/migrations/0033_alter_process_upload.py b/backend/core/migrations/0033_alter_process_upload.py new file mode 100644 index 0000000..ed99e64 --- /dev/null +++ b/backend/core/migrations/0033_alter_process_upload.py @@ -0,0 +1,19 @@ +# Generated by Django 5.0.6 on 2024-05-29 17:55 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0032_process_upload_alter_process_inputs'), + ] + + operations = [ + migrations.AlterField( + model_name='process', + name='upload', + field=models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='upload', to='core.product'), + ), + ] diff --git a/backend/core/migrations/0034_process_upload_product_type.py b/backend/core/migrations/0034_process_upload_product_type.py new file mode 100644 index 0000000..43af886 --- /dev/null +++ b/backend/core/migrations/0034_process_upload_product_type.py @@ -0,0 +1,20 @@ +# Generated by Django 5.0.6 on 2024-05-29 22:01 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0033_alter_process_upload'), + ] + + operations = [ + migrations.AddField( + model_name='process', + name='upload_product_type', + field=models.ForeignKey(default=2, on_delete=django.db.models.deletion.CASCADE, related_name='upload_product_type', to='core.producttype'), + preserve_default=False, + ), + ] diff --git a/backend/core/migrations/0035_remove_process_upload_product_type_and_more.py b/backend/core/migrations/0035_remove_process_upload_product_type_and_more.py new file mode 100644 index 0000000..074ea3e --- /dev/null +++ b/backend/core/migrations/0035_remove_process_upload_product_type_and_more.py @@ -0,0 +1,30 @@ +# Generated by Django 5.0.6 on 2024-05-31 14:53 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0034_process_upload_product_type'), + ] + + operations = [ + migrations.RemoveField( + model_name='process', + name='upload_product_type', + ), + migrations.AddField( + model_name='pipeline', + name='output_product_type', + field=models.ForeignKey(default=2, on_delete=django.db.models.deletion.CASCADE, related_name='output_product_type', to='core.producttype'), + preserve_default=False, + ), + migrations.AddField( + model_name='process', + name='display_name', + field=models.CharField(default='Test DN', max_length=255), + preserve_default=False, + ), + ] diff --git a/backend/core/models/__init__.py b/backend/core/models/__init__.py index 4a0bc9f..f4fcd78 100644 --- a/backend/core/models/__init__.py +++ b/backend/core/models/__init__.py @@ -1,6 +1,8 @@ from core.models.release import Release from core.models.product_type import ProductType -from core.models.product import Product +from core.models.product import Product, ProductStatus from core.models.product_content import ProductContent from core.models.product_file import ProductFile from core.models.user_profile import Profile +from core.models.pipeline import Pipeline +from core.models.process import Process diff --git a/backend/core/models/pipeline.py b/backend/core/models/pipeline.py new file mode 100644 index 0000000..ffdf6d1 --- /dev/null +++ b/backend/core/models/pipeline.py @@ -0,0 +1,24 @@ +from core.models import ProductType +from django.db import models + + +class Pipeline(models.Model): + + name = models.CharField(max_length=255) + display_name = models.CharField(max_length=255) + version = models.CharField(max_length=55) + description = models.TextField(null=True, blank=True) + created_at = models.DateTimeField(auto_now_add=True) + system_config = models.JSONField(null=True, blank=True) + product_types_accepted= models.ManyToManyField( + ProductType, related_name="pipelines" + ) + output_product_type = models.ForeignKey( + ProductType, + on_delete=models.CASCADE, + related_name="output_product_type", + ) + + def __str__(self): + return f"{self.display_name}" + \ No newline at end of file diff --git a/backend/core/models/process.py b/backend/core/models/process.py new file mode 100644 index 0000000..6226ccc --- /dev/null +++ b/backend/core/models/process.py @@ -0,0 +1,64 @@ +import pathlib +import shutil + +from core.models import Pipeline, Product, ProductStatus, Release +from django.conf import settings +from django.contrib.auth.models import User +from django.db import models + + +class Process(models.Model): + display_name = models.CharField(max_length=255) + pipeline = models.ForeignKey( + Pipeline, on_delete=models.CASCADE, related_name="processes" + ) + pipeline_version = models.CharField( + max_length=255, null=True, blank=True, default=None + ) + used_config = models.JSONField(null=True, blank=True) + inputs = models.ManyToManyField(Product, related_name="inputs") + release = models.ForeignKey( + Release, + on_delete=models.CASCADE, + related_name="processes", + null=True, + blank=True, + default=None, + ) + upload = models.OneToOneField( + Product, + on_delete=models.CASCADE, + related_name="upload", + ) + user = models.ForeignKey(User, on_delete=models.CASCADE, related_name="processes") + created_at = models.DateTimeField(auto_now_add=True) + started_at = models.DateTimeField(null=True, blank=True) + ended_at = models.DateTimeField(null=True, blank=True) + task_id = models.CharField(max_length=255, null=True, blank=True, default=None) + status = models.IntegerField( + verbose_name="Status", + default=ProductStatus.REGISTERING, + choices=ProductStatus.choices, + ) + path = models.FilePathField( + verbose_name="Path", null=True, blank=True, default=None + ) + comment = models.TextField(null=True, blank=True) + + def __str__(self): + return f"{self.pipeline}-{str(self.pk).zfill(8)}" + + def delete(self, *args, **kwargs): + process_path = pathlib.Path(settings.PROCESSING_DIR, str(self.path)) + if process_path.exists(): + self.rmtree(process_path) + + super().delete(*args, **kwargs) + + @staticmethod + def rmtree(process_path): + try: + # WARN: is not run by admin + shutil.rmtree(process_path) + except OSError as e: + raise OSError("Failed to remove directory: [ %s ] %s" % (process_path, e)) \ No newline at end of file diff --git a/backend/core/models/product.py b/backend/core/models/product.py index b576d10..f0db996 100644 --- a/backend/core/models/product.py +++ b/backend/core/models/product.py @@ -75,4 +75,5 @@ def can_delete(self, user) -> bool: def can_update(self, user) -> bool: if self.user.id == user.id or user.profile.is_admin(): return True - return False \ No newline at end of file + return False + \ No newline at end of file diff --git a/backend/core/pipeline_objects.py b/backend/core/pipeline_objects.py new file mode 100644 index 0000000..634c032 --- /dev/null +++ b/backend/core/pipeline_objects.py @@ -0,0 +1,48 @@ +from pathlib import Path + +from core.utils import get_pipeline, get_pipelines +from pydantic import BaseModel, validator + + +class Pipeline(): + def __init__(self): + self.__raw_pipelines = get_pipelines() + + def all(self): + pipelines = [] + for pipename, data in self.__raw_pipelines.items(): + data["name"] = pipename + pipelines.append(PipelineModel(**data)) + return pipelines + + def get(self, name): + data = self.__raw_pipelines.get("name", {}) + data["name"] = name + return PipelineModel(**data) + + +class PipelineModel(BaseModel): + name: str + path: str + executor: str + runner: str + executable: str + version: str + display_name: str | None + schema_config: str | None + + @validator('path', pre=True) + def validate_path(cls, value): + assert Path(value).is_dir(), f"Folder '{value}' not found." + return value + + @validator('schema_config', pre=True) + def validate_config(cls, value): + assert Path(value).is_file(), f"File '{value}' not found." + return value + + +if __name__ == "__main__": + from core.utils import get_pipeline + pipe_info = get_pipeline('cross_lsdb') + pipeline = Pipeline(**pipe_info) \ No newline at end of file diff --git a/backend/core/serializers/__init__.py b/backend/core/serializers/__init__.py index ec9acd3..78fb0f9 100644 --- a/backend/core/serializers/__init__.py +++ b/backend/core/serializers/__init__.py @@ -1,6 +1,8 @@ -from core.serializers.release import ReleaseSerializer -from core.serializers.product_type import ProductTypeSerializer +from core.serializers.pipeline import PipelineSerializer +from core.serializers.process import ProcessSerializer from core.serializers.product import ProductSerializer from core.serializers.product_content import ProductContentSerializer from core.serializers.product_file import ProductFileSerializer +from core.serializers.product_type import ProductTypeSerializer +from core.serializers.release import ReleaseSerializer from core.serializers.user import UserSerializer diff --git a/backend/core/serializers/pipeline.py b/backend/core/serializers/pipeline.py new file mode 100644 index 0000000..2cdd4c7 --- /dev/null +++ b/backend/core/serializers/pipeline.py @@ -0,0 +1,8 @@ +from core.models import Pipeline +from rest_framework import serializers + + +class PipelineSerializer(serializers.ModelSerializer): + class Meta: + model = Pipeline + fields = "__all__" diff --git a/backend/core/serializers/process.py b/backend/core/serializers/process.py new file mode 100644 index 0000000..3ffd00e --- /dev/null +++ b/backend/core/serializers/process.py @@ -0,0 +1,59 @@ +from core.models import Process, Product, Release +from rest_framework import serializers + + +class ProcessSerializer(serializers.ModelSerializer): + + release = serializers.PrimaryKeyRelatedField( + queryset=Release.objects.all(), many=False, allow_null=True, required=False + ) + # upload = serializers.PrimaryKeyRelatedField( + # queryset=Product.objects.all(), many=False + # ) + release_name = serializers.SerializerMethodField() + pipeline_name = serializers.SerializerMethodField() + pipeline_version = serializers.SerializerMethodField() + status = serializers.SerializerMethodField() + owned_by = serializers.SerializerMethodField() + is_owner = serializers.SerializerMethodField() + # can_delete = serializers.SerializerMethodField() + # can_update = serializers.SerializerMethodField() + + class Meta: + model = Process + read_only_fields = ("pipeline_version", "is_owner", "upload", "status") + exclude = ("user", "path") + + def get_pipeline_name(self, obj): + return obj.pipeline.name + + def get_pipeline_version(self, obj): + return obj.pipeline.version + + def get_status(self, obj): + return obj.upload.status + # return "REG" + + def get_release_name(self, obj): + try: + return obj.release.display_name + except: + return None + + def get_owned_by(self, obj): + return obj.user.username + + def get_is_owner(self, obj): + current_user = self.context["request"].user + if obj.user.pk == current_user.pk: + return True + else: + return False + + # def get_can_delete(self, obj): + # current_user = self.context["request"].user + # return obj.can_delete(current_user) + + # def get_can_update(self, obj): + # current_user = self.context["request"].user + # return obj.can_update(current_user) \ No newline at end of file diff --git a/backend/core/utils.py b/backend/core/utils.py new file mode 100644 index 0000000..85237bc --- /dev/null +++ b/backend/core/utils.py @@ -0,0 +1,93 @@ +import importlib +import importlib.util +import json +import logging +import pathlib +import sys + +import yaml +from django.conf import settings +from django.db.models import Q + +logger = logging.getLogger() + + +def get_pipelines(): + sys_pipes_file = pathlib.Path(settings.PIPELINES_DIR, 'pipelines.yaml') + with open(sys_pipes_file, encoding="utf-8") as _file: + return yaml.safe_load(_file) + + +def get_pipeline(name): + system_pipelines = get_pipelines() + pipeline = system_pipelines.get(name, None) + assert pipeline, f"Pipeline {name} not found." + pipeline['name'] = name + return pipeline + + +def load_config(schema_path, config={}): + mod = load_module_from_file(schema_path) + return mod.Config(**config) + + +def import_module(module): + return importlib.import_module(module) + + +def load_module_from_file(module): + spec = importlib.util.spec_from_file_location(f"{module}.mod", module) + assert spec, f"No module named '{module}'." + mod = importlib.util.module_from_spec(spec) + assert mod, f"Failed to import python module: {module}" + sys.modules[f"{module}.mod"] = mod + spec.loader.exec_module(mod) + return mod + + +def load_executor(executor): + assert validate_executor(executor), f"No executor named '{executor}'." + mod = import_module(f"core.executors.{executor}") + return getattr(mod, f"Executor{executor.capitalize()}") + + +def validate_executor(executor): + try: import_module(f"core.executors.{executor}") + except ModuleNotFoundError: return False + return True + + +def validate_json(data): + try: json.loads(data) + except ValueError: return False + return True + + +def validate_config(config): + if not config: return True + return validate_json(config) and isinstance(json.loads(config), dict) + + +def get_returncode(process_dir): + try: + with open(f"{process_dir}/return.code", encoding="utf-8") as _file: + content = _file.readline() + return int(content.replace('\n','')) + except Exception as err: + logger.error(f"Error when redeeming return code: {err}") + return -1 + + +def format_query_to_char(key, value, fields): + condition = Q.OR if key.endswith("__or") else Q.AND + values = value.split(",") + query = Q() + + for value in values: + subfilter = Q() + for field in fields: + subfilter.add(Q(**{f"{field}__icontains": value}), Q.OR) + + query.add(subfilter, condition) + + return query \ No newline at end of file diff --git a/backend/core/views/__init__.py b/backend/core/views/__init__.py index e63b8e6..bac1068 100644 --- a/backend/core/views/__init__.py +++ b/backend/core/views/__init__.py @@ -1,10 +1,9 @@ -from core.views.release import ReleaseViewSet -from core.views.product_type import ProductTypeViewSet +from core.views.pipeline import PipelineViewSet +from core.views.process import ProcessViewSet from core.views.product import ProductViewSet from core.views.product_content import ProductContentViewSet from core.views.product_file import ProductFileViewSet -from core.views.user import LoggedUserView -from core.views.user import GetToken -from core.views.user import CsrfToOauth -from core.views.user import Logout -from core.views.user import UserViewSet +from core.views.product_type import ProductTypeViewSet +from core.views.release import ReleaseViewSet +from core.views.user import (CsrfToOauth, GetToken, LoggedUserView, Logout, + UserViewSet) diff --git a/backend/core/views/create_product.py b/backend/core/views/create_product.py new file mode 100644 index 0000000..9d7f53f --- /dev/null +++ b/backend/core/views/create_product.py @@ -0,0 +1,155 @@ +import logging +import pathlib + +from core.models import Product +from core.serializers import ProductSerializer +from django.conf import settings + + +class CreateProduct: + + def __init__(self, data, user): + self.__log = logging.getLogger("create_product") + self.__log.debug(f"Creating product: {data}") + + serializer = ProductSerializer(data=data) + serializer.is_valid(raise_exception=True) + + self.__data = self.__perform_create(serializer, user) + self.__check_official_product(user) + + def save(self): + can_save = self.check_product_types() + + if not can_save.get("success"): + return can_save.get("message") + + self.__set_internal_name() + self.__create_product_path() + + self.__log.debug(f"Product ID {self.__data.pk} created") + + def __check_official_product(self, user): + """Checks if the product is official and if the user has permission + to save an official product. + + Args: + user (User): User object + + Raises: + ValueError: if the user no has permission + + Returns: + bool + """ + + is_official = self.__data.official_product + + if is_official: + if user.profile.is_admin() is False: + self.__delete() + raise ValueError( + "Not allowed. Only users with admin permissions " + "can create official products." + ) + + return True + + @property + def data(self): + return self.__data + + def get(self): + """Returns Product object + + Returns: + Product object + """ + return Product.objects.get(pk=self.__data.pk) + + def __set_internal_name(self): + """Sets the internal name based on the primary key and display name""" + + # change spaces to "_", convert to lowercase, remove trailing spaces. + name = self.__data.display_name.replace(" ", "_").lower().strip().strip("\n") + + # strip any non-alphanumeric character except "_" + name = "".join(e for e in name if e.isalnum() or e == "_") + self.__data.internal_name = f"{self.__data.pk}_{name}" + self.__data.save() + + def __create_product_path(self): + """Create product path""" + + # Create product path + relative_path = f"{self.__data.product_type.name}/{self.__data.internal_name}" + path = pathlib.Path(settings.MEDIA_ROOT, relative_path) + path.mkdir(parents=True, exist_ok=True) + + self.__data.path = relative_path + self.__data.save() + + def check_product_types(self): + """Checks product types by applying a certain business rule. + + Returns: + dict: {'message': {'entity':list(str)}, 'status': bool} + """ + + if not self.__data: + return {"message": {"product": ["No data."]}, "success": False,} + + # Release is not allowed in Spec-z Catalog + if ( + self.__data.release + and self.__data.product_type.name == "specz_catalog" + ): + self.__delete() + return { + "message": {"release": [ + "Release must be null on Spec-z Catalogs products." + ]}, "success": False, + } + + # Pzcode is only allowed in Validations Results and Photo-z Table + if self.__data.pz_code and self.__data.product_type.name in ( + "training_set", + "specz_catalog", + ): + dn = self.__data.product_type.display_name + pzc = self.__data.pz_code + self.__delete() + return { + "message": {"pz_code": [ + f"Pz Code must be null on {dn} products. '{pzc}'" + ]}, "success": False, + } + + return {"message": {"product_type": ["Success!"]}, "success": True,} + + def __perform_create(self, serializer, user): + """Add user""" + + uploaded_by = user + return serializer.save(user=uploaded_by) + + def __delete(self): + """Delete product""" + + if self.__data: + self.__data.path = f"{settings.MEDIA_ROOT}/{self.__data.path}" + self.__data.delete() + self.__data = None + + + + + + + + + + + + + diff --git a/backend/core/views/pipeline.py b/backend/core/views/pipeline.py new file mode 100644 index 0000000..814c3aa --- /dev/null +++ b/backend/core/views/pipeline.py @@ -0,0 +1,26 @@ +from core import models +from core.serializers import PipelineSerializer +from rest_framework import viewsets +from rest_framework.decorators import action +from rest_framework.response import Response + + +class PipelineViewSet(viewsets.ReadOnlyModelViewSet): + queryset = models.Pipeline.objects.all() + serializer_class = PipelineSerializer + filterset_fields = [ + "id", + "name", + ] + search_fields = [ + "display_name", + "description", + ] + ordering = ["-created_at"] + + @action(methods=["GET"], detail=True) + def api_schema(self, request): + meta = self.metadata_class() + data = meta.determine_metadata(request, self) + return Response(data) + \ No newline at end of file diff --git a/backend/core/views/process.py b/backend/core/views/process.py new file mode 100644 index 0000000..c0e515e --- /dev/null +++ b/backend/core/views/process.py @@ -0,0 +1,133 @@ +from core.models import Pipeline, Process +from core.serializers import ProcessSerializer +from core.utils import format_query_to_char +from core.views.create_product import CreateProduct +from django_filters import rest_framework as filters +from rest_framework import exceptions, status, viewsets +from rest_framework.decorators import action +from rest_framework.response import Response + + +class ProcessFilter(filters.FilterSet): + release__isnull = filters.BooleanFilter( + field_name="release", lookup_expr="isnull") + pipeline__or = filters.CharFilter(method="filter_type_name") + pipeline = filters.CharFilter(method="filter_type_name") + release_name__or = filters.CharFilter(method="filter_release") + release_name = filters.CharFilter(method="filter_release") + + class Meta: + model = Process + fields = [ + "pipeline", + "status", + "release", + "user", + ] + + def filter_user(self, queryset, name, value): + query = format_query_to_char( + name, value, + ["user__username", "user__first_name", "user__last_name"] + ) + + return queryset.filter(query) + + def filter_pipeline(self, queryset, name, value): + query = format_query_to_char( + name, value, + ["pipeline__display_name", "pipeline__name"] + ) + + return queryset.filter(query) + + def filter_release(self, queryset, name, value): + query = format_query_to_char( + name, value, ["release__display_name"]) + return queryset.filter(query) + + +class ProcessViewSet(viewsets.ModelViewSet): + queryset = Process.objects.all() + serializer_class = ProcessSerializer + search_fields = [ + "pipeline_name", + "pipeline_display_name", + "user__username", + "user__first_name", + "user__last_name", + ] + filterset_class = ProcessFilter + ordering_fields = [ + "id", + "created_at", + ] + ordering = ["-created_at"] + + def create(self, request): + print("USER: ", request.user) + print("PROCESS: ", request.data) + + serializer = self.get_serializer(data=request.data) + serializer.is_valid(raise_exception=True) + + try: + instance = self.perform_create(serializer) + + print("INSTANCE: ", instance) + print("INSTANCE type: ", type(instance)) + + process = Process.objects.get(pk=instance.pk) + process.save() + + data = self.get_serializer(instance=process).data + return Response(data, status=status.HTTP_201_CREATED) + + except Exception as e: + content = {"error": str(e)} + return Response(content, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + + def perform_create(self, serializer): + """Add user and upload""" + + owned_by = self.request.user + upload = self.create_initial_upload(serializer, owned_by) + return serializer.save(user=owned_by, upload=upload) + + def create_initial_upload(self, serializer, user): + """_summary_""" + data = serializer.initial_data + pipeline = Pipeline.objects.get(pk=data.get('pipeline')) + upload_data = { + "display_name": data.get("display_name"), + "release": data.get("release", None), + "pz_code": data.get("pz_code", None), + "official_product": data.get("official_product", False), + "description": data.get("description", None), + "product_type": pipeline.output_product_type.pk, + } + product = CreateProduct(upload_data, user) + check_prodtype = product.check_product_types() + + if not check_prodtype.get("success"): + raise ValueError(check_prodtype.get("message")) + + product.save() + return product.data + + @action(methods=["GET"], detail=True) + def api_schema(self, request): + meta = self.metadata_class() + data = meta.determine_metadata(request, self) + return Response(data) + + def destroy(self, request, pk=None, *args, **kwargs): + """Product can only be deleted by the OWNER or if the user + has an admin profile. + """ + + instance = self.get_object() + if instance.can_delete(self.request.user): + return super(ProcessViewSet, self).destroy(request, pk, *args, **kwargs) + else: + raise exceptions.PermissionDenied() \ No newline at end of file diff --git a/backend/core/views/product.py b/backend/core/views/product.py index 7d7cc4e..1aa6c28 100644 --- a/backend/core/views/product.py +++ b/backend/core/views/product.py @@ -4,30 +4,27 @@ import secrets import tempfile import zipfile -from json import dumps, loads +from json import loads from pathlib import Path -import pandas as pd from core.models import Product -from core.pagination import CustomPageNumberPagination from core.product_handle import FileHandle, NotTableError -from core.serializers import ProductContentSerializer, ProductSerializer +from core.serializers import ProductSerializer +from core.utils import format_query_to_char +from core.views.create_product import CreateProduct from core.views.registry_product import RegistryProduct from django.conf import settings -from django.contrib.auth.models import User from django.core.paginator import Paginator from django.db.models import Q -from django.http import FileResponse, JsonResponse +from django.http import FileResponse from django_filters import rest_framework as filters from rest_framework import exceptions, status, viewsets from rest_framework.decorators import action -from rest_framework.pagination import PageNumberPagination from rest_framework.response import Response class ProductFilter(filters.FilterSet): - release__isnull = filters.BooleanFilter( - field_name="release", lookup_expr="isnull") + release__isnull = filters.BooleanFilter(field_name="release", lookup_expr="isnull") uploaded_by__or = filters.CharFilter(method="filter_user") uploaded_by = filters.CharFilter(method="filter_user") product_type_name__or = filters.CharFilter(method="filter_type_name") @@ -50,44 +47,26 @@ class Meta: ] def filter_user(self, queryset, name, value): - query = self.format_query_to_char( - name, value, ["user__username", - "user__first_name", "user__last_name"] + query = format_query_to_char( + name, value, ["user__username", "user__first_name", "user__last_name"] ) return queryset.filter(query) def filter_name(self, queryset, name, value): - query = self.format_query_to_char(name, value, ["display_name"]) + query = format_query_to_char(name, value, ["display_name"]) return queryset.filter(query) def filter_type_name(self, queryset, name, value): - query = self.format_query_to_char( - name, value, ["product_type__display_name"]) + query = format_query_to_char(name, value, ["product_type__display_name"]) return queryset.filter(query) def filter_release(self, queryset, name, value): - query = self.format_query_to_char( - name, value, ["release__display_name"]) + query = format_query_to_char(name, value, ["release__display_name"]) return queryset.filter(query) - @staticmethod - def format_query_to_char(key, value, fields): - condition = Q.OR if key.endswith("__or") else Q.AND - values = value.split(",") - query = Q() - - for value in values: - subfilter = Q() - for field in fields: - subfilter.add(Q(**{f"{field}__icontains": value}), Q.OR) - - query.add(subfilter, condition) - - return query - class ProductViewSet(viewsets.ModelViewSet): queryset = Product.objects.all() @@ -108,90 +87,23 @@ class ProductViewSet(viewsets.ModelViewSet): ordering = ["-created_at"] def create(self, request): - serializer = self.get_serializer(data=request.data) - serializer.is_valid(raise_exception=True) - instance = self.perform_create(serializer) - try: - product = Product.objects.get(pk=instance.pk) + product = CreateProduct(request.data, request.user) + check_prodtype = product.check_product_types() - # Verifica se o produto é oficial, - # Apenas user que fazem parte do Group=Admin podem criar produtos oficiais. - if product.official_product is True: - if request.user.profile.is_admin() is False: - return Response( - { - "error": "Not allowed. Only users with admin permissions can create official products." - }, - status=status.HTTP_403_FORBIDDEN, - ) - - # Cria um internal name - name = self.get_internal_name(product.display_name) - product.internal_name = f"{product.pk}_{name}" - - # Cria um path para o produto - relative_path = f"{product.product_type.name}/{product.internal_name}" - # TODO: Talves mover a criação do path do produto para a parte do upload dos arquivos. - path = pathlib.Path(settings.MEDIA_ROOT, relative_path) - path.mkdir(parents=True, exist_ok=True) - - product.path = relative_path - - # Verificar campos relacionados ao Produt Type. - - # Release is not allowed in Spec-z Catalog - if ( - product.release - and product.product_type.name == "specz_catalog" - ): + if not check_prodtype.get("success"): return Response( - {"release": [ - "Release must be null on Spec-z Catalogs products."]}, - status=status.HTTP_400_BAD_REQUEST, - ) - - # Pzcode is only allowed in Validations Results and Photo-z Table - if product.pz_code and product.product_type.name in ( - "training_set", - "specz_catalog", - ): - return Response( - { - "pz_code": [ - f"Pz Code must be null on {product.product_type.display_name} products. '{product.pz_code}'" - ] - }, - status=status.HTTP_400_BAD_REQUEST, + check_prodtype.get("message"), status=status.HTTP_400_BAD_REQUEST ) product.save() - - data = self.get_serializer(instance=product).data + data = self.get_serializer(instance=product.data).data return Response(data, status=status.HTTP_201_CREATED) except Exception as e: content = {"error": str(e)} return Response(content, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - def perform_create(self, serializer): - """Create user and add internal_name""" - - uploaded_by = self.request.user - return serializer.save(user=uploaded_by) - - def get_internal_name(self, display_name): - """ - Creates an internal name without special characters or spaces. - The internal name can be used for paths, urls and tablenames. - """ - - # change spaces to "_", convert to lowercase, remove trailing spaces. - name = display_name.replace(" ", "_").lower().strip().strip("\n") - - # strip any non-alphanumeric character except "_" - return "".join(e for e in name if e.isalnum() or e == "_") - @action(methods=["GET"], detail=True) def download(self, request, **kwargs): """Download product""" @@ -201,7 +113,8 @@ def download(self, request, **kwargs): with tempfile.TemporaryDirectory() as tmpdirname: # Cria um arquivo zip no diretório tmp com os arquivos do produto zip_file = self.zip_product( - product.internal_name, product.path, tmpdirname) + product.internal_name, product.path, tmpdirname + ) # Abre o arquivo e envia em bites para o navegador mimetype, _ = mimetypes.guess_type(zip_file) @@ -211,8 +124,7 @@ def download(self, request, **kwargs): file_handle = open(zip_file, "rb") response = FileResponse(file_handle, content_type=mimetype) response["Content-Length"] = size - response["Content-Disposition"] = "attachment; filename={}".format( - name) + response["Content-Disposition"] = "attachment; filename={}".format(name) return response except Exception as e: content = {"error": str(e)} @@ -238,8 +150,7 @@ def download_main_file(self, request, **kwargs): response = FileResponse(file_handle, content_type=mimetype) response["Content-Length"] = size - response["Content-Disposition"] = "attachment; filename={}".format( - name) + response["Content-Disposition"] = "attachment; filename={}".format(name) return response except Exception as e: content = {"error": str(e)} @@ -247,8 +158,8 @@ def download_main_file(self, request, **kwargs): @action(methods=["GET"], detail=True) def read_data(self, request, **kwargs): - page = int(request.GET.get('page', 1)) - page_size = int(request.GET.get('page_size', 100)) + page = int(request.GET.get("page", 1)) + page_size = int(request.GET.get("page_size", 100)) product = self.get_object() product_file = product.files.get(role=0) @@ -256,18 +167,20 @@ def read_data(self, request, **kwargs): try: df = FileHandle(main_file_path).to_df() - records = loads(df.to_json(orient='records')) + records = loads(df.to_json(orient="records")) paginator = Paginator(records, page_size) records = paginator.get_page(page) - return Response({ - 'count': df.shape[0], - 'columns': df.columns, - 'results': records.object_list}) + return Response( + { + "count": df.shape[0], + "columns": df.columns, + "results": records.object_list, + } + ) except NotTableError as e: - content = { - "message": "Table preview not available for this product type."} + content = {"message": "Table preview not available for this product type."} return Response(content, status=status.HTTP_500_INTERNAL_SERVER_ERROR) except Exception as e: content = {"message": str(e)} @@ -341,8 +254,7 @@ def pending_publication(self, request, **kwargs): try: # Procura por produtos criados pelo usuario que ainda não foram publicados - product = Product.objects.filter( - status=0, user_id=request.user.id).first() + product = Product.objects.filter(status=0, user_id=request.user.id).first() if product: # Retorna o produto @@ -358,8 +270,7 @@ def pending_publication(self, request, **kwargs): def zip_product(self, internal_name, path, tmpdir): product_path = pathlib.Path(settings.MEDIA_ROOT, path) - thash = ''.join(secrets.choice(secrets.token_hex(16)) - for i in range(5)) + thash = "".join(secrets.choice(secrets.token_hex(16)) for i in range(5)) zip_name = f"{internal_name}_{thash}.zip" zip_path = pathlib.Path(tmpdir, zip_name) @@ -379,9 +290,11 @@ def zip_product(self, internal_name, path, tmpdir): return zip_path def destroy(self, request, pk=None, *args, **kwargs): - """Produto só pode ser excluido pelo DONO ou se o usuario tiver profile de admin. + """Product can only be deleted by the OWNER or if the user has an + admin profile. """ - # Regra do admin atualizada na issue: #192 - https://github.com/linea-it/pzserver_app/issues/192 + # Regra do admin atualizada na issue: + # 192 - https://github.com/linea-it/pzserver_app/issues/192 instance = self.get_object() if instance.can_delete(self.request.user): return super(ProductViewSet, self).destroy(request, pk, *args, **kwargs) diff --git a/backend/core/views/registry_product.py b/backend/core/views/registry_product.py index bb84aed..8752d80 100644 --- a/backend/core/views/registry_product.py +++ b/backend/core/views/registry_product.py @@ -15,6 +15,7 @@ def __init__(self, product_id): self.log = self.get_log() self.log.info("----------------------------") + self.log.info("Product ID: [%s]" % product_id) self.product = Product.objects.get(pk=product_id) diff --git a/backend/pzserver/settings.py b/backend/pzserver/settings.py index 2e4ae38..f5613b0 100644 --- a/backend/pzserver/settings.py +++ b/backend/pzserver/settings.py @@ -299,3 +299,12 @@ }, }, } + +# directory where it will contain the processing of the pipelines. +PROCESSING_DIR = os.getenv("PROCESSING_DIR", "/processes") + +# directory where it will contain the source code of the pipelines. +PIPELINES_DIR = os.getenv("PIPELINES_DIR", "/pipelines") + +# directory where it will contain the datasets. +DATASETS_DIR = os.getenv("DATASETS_DIR", "/datasets") \ No newline at end of file diff --git a/backend/pzserver/urls.py b/backend/pzserver/urls.py index 9bbc18f..768a7ec 100644 --- a/backend/pzserver/urls.py +++ b/backend/pzserver/urls.py @@ -14,31 +14,22 @@ 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) """ # from core.api import viewsets as products_viewsets -from core.views import ( - CsrfToOauth, - GetToken, - LoggedUserView, - Logout, - ProductContentViewSet, - ProductFileViewSet, - ProductTypeViewSet, - ProductViewSet, - ReleaseViewSet, - UserViewSet, -) +from core.views import (CsrfToOauth, GetToken, LoggedUserView, Logout, + PipelineViewSet, ProcessViewSet, ProductContentViewSet, + ProductFileViewSet, ProductTypeViewSet, ProductViewSet, + ReleaseViewSet, UserViewSet) from django.contrib import admin from django.urls import include, path -from drf_spectacular.views import ( - SpectacularAPIView, - SpectacularRedocView, - SpectacularSwaggerView, -) +from drf_spectacular.views import (SpectacularAPIView, SpectacularRedocView, + SpectacularSwaggerView) from rest_framework import routers route = routers.DefaultRouter() route.register(r"users", UserViewSet, basename="users") route.register(r"releases", ReleaseViewSet, basename="releases") +route.register(r"pipelines", PipelineViewSet, basename="pipelines") +route.register(r"processes", ProcessViewSet, basename="processes") route.register(r"product-types", ProductTypeViewSet, basename="product_types") route.register(r"products", ProductViewSet, basename="products") route.register(r"product-contents", ProductContentViewSet, basename="product_contents") diff --git a/backend/requirements.txt b/backend/requirements.txt index b785cb4..8b598c1 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -27,3 +27,4 @@ social-auth-core==4.5.4 tables-io==0.9.6 tables==3.9.2 uWSGI==2.0.25.1 +pydantic==2.7.1 diff --git a/docker-compose-development.yml b/docker-compose-development.yml index 3696fdd..770ad80 100644 --- a/docker-compose-development.yml +++ b/docker-compose-development.yml @@ -40,6 +40,34 @@ services: stdin_open: true command: yarn dev + orchestration: + extends: + file: ./orchestration/docker-compose.yml + service: orchestration + + celery_local_worker: + extends: + file: ./orchestration/docker-compose.yml + service: celery_local_worker + + celery_flower: + extends: + file: ./orchestration/docker-compose.yml + service: celery_flower + + rabbitmq: + image: "rabbitmq:3.12.12-management" + hostname: "rabbitmq" + env_file: + - ./orchestration/.orchestration-env + ports: + - "15672:15672" + - "5672:5672" + volumes: + - "./orchestration/rabbitmq/enabled_plugins:/etc/rabbitmq/enabled_plugins" + - "./orchestration/rabbitmq/data/:/var/lib/rabbitmq/" + - "./orchestration/rabbitmq/log/:/var/log/rabbitmq/" + nginx: image: nginx:1.21.6-alpine ports: @@ -51,3 +79,6 @@ services: depends_on: - backend - frontend + - orchestration + - rabbitmq + - celery_flower \ No newline at end of file diff --git a/env_template b/env_template index 37e913f..1f01b94 100644 --- a/env_template +++ b/env_template @@ -47,4 +47,13 @@ DJANGO_CSRF_TRUSTED_ORIGINS=http://localhost http://127.0.0.1 # Shibboleth / Satosa Auth # Url para login utilizando Shibboleth -# AUTH_SHIB_URL= \ No newline at end of file +# AUTH_SHIB_URL= + +# directory where it will contain the processing of the pipelines. +PROCESSING_DIR=/processes + +# directory where it will contain the source code of the pipelines. +PIPELINES_DIR=/pipelines + +# directory where it will contain the datasets. +DATASETS_DIR=/datasets \ No newline at end of file diff --git a/nginx_development.conf b/nginx_development.conf index 49d0f9c..1e5f1e6 100644 --- a/nginx_development.conf +++ b/nginx_development.conf @@ -2,13 +2,17 @@ upstream pzapi { server backend:8000; } +upstream orchestapi { + server orchestration:8000; +} + upstream pzfrontend { server frontend:3000; } server { - listen 8080; + server_name localhost; client_max_body_size 200M; @@ -25,7 +29,7 @@ server { fastcgi_read_timeout 120s; # access_log /var/log/nginx/host.access.log main; - + # Proxy pass to frontend development server with live relead # Based in this article: https://nathanfriend.io/2018/05/14/live-reloading-an-angular-2-app-behind-nginx.html location / { @@ -36,7 +40,7 @@ server { proxy_set_header X-Forwarded-For $remote_addr; proxy_set_header X-Real-IP $remote_addr; - proxy_set_header Host $host; + proxy_set_header Host $host; # live reload proxy_http_version 1.1; @@ -50,7 +54,7 @@ server { proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection "upgrade"; - } + } location /api { include uwsgi_params; @@ -77,5 +81,41 @@ server { alias /var/www/coverage/; try_files $uri $uri/ /index.html; autoindex off; - } -} \ No newline at end of file + } + + # Rabbitmq Management + location /rabbitmq/ { + proxy_pass http://rabbitmq:15672/; + rewrite ^/rabbitmq/(.*)$ /$1 break; + } +} + +server { + listen 8080; + server_name orchestration; + + location /api { + include uwsgi_params; + uwsgi_pass orchestapi; + } + + location /admin { + include uwsgi_params; + uwsgi_pass orchestapi; + } + + location /o { + include uwsgi_params; + uwsgi_pass orchestapi; + } + + location /django_static { + include uwsgi_params; + uwsgi_pass orchestapi; + } + + # Celery Flower + location /flower { + proxy_pass http://celery_flower:5555; + } +} diff --git a/orchestration/.orchestration-env b/orchestration/.orchestration-env new file mode 100644 index 0000000..963cb22 --- /dev/null +++ b/orchestration/.orchestration-env @@ -0,0 +1,43 @@ +# Backend/Django +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG=1 +LOGGING_LEVEL="DEBUG" +AUTORELOAD=1 + +# CORS +DJANGO_ALLOWED_HOSTS="orchestration localhost 127.0.0.1 [::1]" +ALLOWED_HOSTS="orchestration localhost 127.0.0.1 [::1]" +DJANGO_CSRF_TRUSTED_ORIGINS="http://orchestration http://localhost http://127.0.0.1" + +# AMQP +RABBITMQ_HOST="rabbitmq" +# RABBITMQ_HOST="host-gateway" +RABBITMQ_PORT="5672" +RABBITMQ_ERLANG_COOKIE="SWQOKODSQALRPCLNMEQG" +RABBITMQ_DEFAULT_USER="orcadmin" +RABBITMQ_DEFAULT_PASS="adminorc" +RABBITMQ_DEFAULT_VHOST="/" + +# Database +# DB_ENGINE=django.db.backends.postgresql +# DB_USER=orchadmin +# DB_PASSWORD=adminorch +# DB_DATABASE=orchestration +# DB_HOST=database +# DB_PORT=5432 + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY=YvbMFaR6cJUB5x9PBK6KciWljqbavSfA7K9sqZD-cpM + +DB_DIR="/db" + +LOG_DIR="/logs" + +# directory where it will contain the processing of the pipelines. +PROCESSING_DIR=/processes + +# directory where it will contain the source code of the pipelines. +PIPELINES_DIR=/pipelines + +# directory where it will contain the datasets. +DATASETS_DIR=/datasets diff --git a/orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=0.parquet b/orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=0.parquet new file mode 100644 index 0000000000000000000000000000000000000000..90a0e15b3bebd761c8139db6657db07dc3ff352d GIT binary patch literal 5074 zcmcIo&uiOu6qlXMbK0~GU1e-ABN!?MOB>sX+qf`R*@53+)RP&2KWXCx%ddvND}1v9 z08o6c)Z3Dv-=>fgn|8P^_&>iLCTH!`E7A!1?|jt1ZvpyA6DJs+ir!Do+l4bbKMnaU z;=NhK58}HE-OIT!>9x@L3eA=zw=lwf;E9KNI!C?qhh6KZf*7k_RM$UDkN=GC^Lr?nQIMBv2DV+as1D!7Kp6DxK?r&2QiF!w56QX#cKk{7%e&&A^!9BAtHr1}Cw~N#fT|3`%A`Jk0Ls+$)K-yRTVU#$YID3JqE8AigGdc!}@up#CT#>!7aaHMI7@{mhNE z$nmiQW*kMN#iR9OC@h`}MOAI`om;DuC`JS$Fi?2MOMu1-EdoPe(v!tJV^@ka>B;1| z94Y$Lh=%Ob;xP}uNkyG^ zD_6TrRNyhnx=Q*hpu}xy-KHQH1KM4x`uhF)R?@7 zm>Wa+2BPuL(!z3vrh7E3gC{luPg|)1RH|2D69<*@7SL^2pSsOxYM0BFR?G3ip^z(e z1bs`{R#F0^CYyXYbySIGqUE@$!>G|Vlg>AUct0kvi7F4axN=<;2h1%d(*awFy1+g_ z>$?)$Z*Qh)h0|13$QG-Dmg#VNpqEX=glrGlv9J`XQcmq*SvnsRG!^U@RYvL6z+Ox! zrxN9rVnWO!yR;(2^A9<;5H;m7Rmx@{o?@M8G;8u8uL@e>kjnyIgejdbl{PA!?Q}j_ zEsbG6Li+4VTN&a?=LeNN2I10pw$$b_$!>*>sYs8}^L6Y`Pt^sjSQlf-s9a8&GL35s zkfTClS3&U#dZ{Dk)X|tUA)BmA>y@dPLBB2J09$Sh%K^u3VS5w#OXr6&F`I$C%N&j7 zhtv9pmGW-9kjAmVKE!%*=@h(dDOzERbt$_s+_&mp59QK{UsDgvd~*iJ_7BQ&d!K2A zXU({$VS)NMEiZ%l*h7dV3VUGHNI$umM)m3Vy00Y%P^*KJu#Cy4?Ui0aK3C24 zleHzWYFApXnfv%Uc{wN=qH;pbII^jA{)*hh#^kSsCDG%{CiW>cvn^&-Gwze~a&pa> zd9&rOwO8eM-4Jty2K1&iCr-px<+Ex}j2G%sCTT!_sj^wS=aV;U4v1O_YCbuz=0=-m zq5sSIgxT+@1keLvMZ~orX{CV}R}TdR%@n3z*jw#wHfRO)3FG85&Xw~=Je%xd8x5wr zm0mq!^e9YSG=z6yEW=!ei5~@m&ZGI9Y9n$7-SAPc-dsLhUWS(xUB^s3#L+_Q+D6YQ zxuxmcZ8%htSS_~$B=Wh-;PWMmx%N?mjKj5o>jg_5g3o0PD#ZXb>i+>!81cW3-iujv zKo>Sa)AT30{NY1qc(lQK6vam3P=vh|aYQsgsv)}=S{fO>cOUGnA;mO0*vzC^=No!Q zM;e_nIJkp89AL1%hnYx7(-`S`Wr)A;aNYP}hBW9c!Scr-C$Jjq)#w~LKjGSSg5Fp^ z1<#;jrqzt6X2t{SaY*p|fCw%ceCuIX?%WU!;|BhSShPM87R}Fx(Bm*cO#TtKuMorz H{5S7EM+X;@ literal 0 HcmV?d00001 diff --git a/orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=11.parquet b/orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=11.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3e434eb7be30c9dce4d90de714e5f5d861119b56 GIT binary patch literal 5074 zcmcIoO>7!h5MHqB;`~}^7Js*3s}64waA6Zqlf+sQwK_I;8duaVvb;LnjWm=wZp(iYc-{czgCJ|#-R2>2IM)blBTKWXC_%csNt`PZXm z0D$7NrQVeIJSK&ln6$;U!TFq>^9G=wG;xe!NAw=@l36&@mzyku!AJQwWAUx&*)s zDd3D&>gSiNbWJ5?9nHrs?`JOR+l$ou7ZIG{+7T)gs!TRZn1#3Fj zl>0bKI<8ljsjn|%T;}2tGpFMsh{NB0fAQTnPPj}N3RKs!;X0bLa9!D{$<4OFb#`P; zltvMLGDp3-YGh6~Gh)yQq5q>O;hSj-HA&^jD+GDrh8xWdoj^?pZI4*82(thPqjYX~ z*S>tq10zfqqy4)P0M`8>3+K+pv*EU+UbjEse|Db3i#p%oLY-^#Q)kh<)Y&y0b%sqs zol9YRfAM(Tl*jwQ>~(_hFZgDfoF;N@--n*3LEJMtLPP1QT03}e{&8@;or3Qz%m;O` z-d_^ddK%O?RRVpjt0K2^wm7tY428k7p{OVgu5)L362*vM1O^K4 zU;)q=p#@Mbq>a z{LuA)rkCk>l?GmwPE;{39v_qU5OZ}XUqv+j8CqD*&~%T6b@0SS;AtwAfQt7@Y;?C& zTnD-h>*IGBRq2+~h2>%-x5sA+9bQ|PH{>|aD6s}tjPI8ssZcSZ>o7{F%_OpQKGF~K zY_!6Gt#YxZ2m|I0lj?x2XpLtdq4iyn?YGwwv|Lsdg-_=zyqfBiw?Qu*4fE+9vSVP$ zRm6e*<47MhZQlMf_U;ZrrxMZ zgRH`TD#ku}WbK`##cVm)i0WS0XzoZ8Hd$%CUuZITh=c*sy~1 z7%f}F{gvJ_Fb? zV_0^}>^inLk-tQCC==2t*t^vJXnx9Sf3H;BisTYF7TAYyPb!>%mo0=!Y`!L@SBLvn z+3ulSTJfvOuAc95v&{B)ixG35X_;g7xSg;-eVmk+Ca(=~cFM4a5K9R5z^IXaY%PK6 z)AIFDjSZky2ghLSY0QaZu~oRV(i0-NnwW|; zp}!PKuif*>n=uCjwE#6A8yIt=&9Tt`#cWjX_jnZO0ly^RS`gL3K!_-Nyo_cF)6Z=$ z_txsPjQWJJ@);=?v-=zy>tY*qrn{b4-e`5bQ}aueO~ zQIKpb?kz6DOPsD@CLZEwp>b`Xr$cUOI&&8el^9mbYyydVZZY_LNn@^g)F9(SvY zk|*GE5rc{|K#lr;gcL^nC((O8tqkbg8ffbN1e-s6=nRiGSdXGujqVAsw*roc21r$8 z7eh-RqYoZ}y%nUGKnI(iG~;|j@90RQ>5gl6*= d{)m`2J`(2j&ja{*Ztm1OuYQR3a(}1i7r5_e&rU#2}zahzI6^1d(-O9idL<5>&{EUp`hFU=dbxl=DnGDGv9e{ zX5JGfQF-w!PgbK3Yo|{(tB%1U`V+)S&zmxj;}=Ut-jXqqT2xsbur?eGP1ndjn;L|^ zkiL=c;Sq~Mg5F+Ux@{mz8}QAJL&LA}4-IF;`xNp~<9P=P_#_1tkBt58Ju(Fu*Z*br z;9wq-DC=K*=?X%XM_PC8_#C0vmRyT{F^WR!kCtuVY6M8?x9|J5=@eQz^Zeb4pDA>E zNLka}CJH4YeM4?0g=}wmogO5hP~=zQs5yxgI{&g#a%2RB{{H-ucZUPbTXj371?(*= zxHYsfhC;E6ENiDHP^kICnLjNbN+A(i_*~|*6lz*O|AIA(LN`2G|DHdLLbIN2O1%6& z*l#I%XHf%%{DUipy=SA)aPQ2~^IsZ|{#v+oQSE|pC}u`9bLAi*vPB7}WM34b4=(;7 zpOhy+^SADq|HW4VWZiSJ`uaE_n%w;Q?2vXLI@4&(Ejlj^+9n1y#nP z=Jb!3L@kO&*1)rWEhvme8@`S4P}j$zXXZ}cKllDX^yQCHRrbv+q+LEKVcnM@sCMuE zkIGgLM)#%}Kl`J-A8OHA;=Pmlq8CQ?5Bg>W3+=TmVE$`;B>G_N&cKlRFeKuKN#1Np zLj0d(8_$hNM!&6INlxUJqt^J9$2RsaM{g%f`rfFsBkRs+?yco^#QH+!p%>c`Z{;z= z8NMCuA91F6x50+~R&uk1&bOh7izQz_Kh1^~q^G~*-)2L%5C7-*b5oBy?K*($B$O@&-b6J`Re zwT=;uvn_z-&GHUslN})0WJfxM5K+Bw34+MECg}sDj=^|_RZ}w)MX~*tEKu#$r3_xN zL?c))AGPY`qWzaE$FEtTOB?Z~q~W!a&q|tRG_Ep;{&C9m$vdWlSAUyJuQ6vvhb?bj z2gyzl+$GF#R^QM>!aE?8?O{=}JVNRiA=i9EYM9)A`Ebeq3%<~3s3wBtLBLl45Q6Bv zx)!FL{`|cQh(6i=<*(ZpBl-|o)=#jTPxpJXeMrMAskD76XTUq>2GQU0LYG}^@uc6q zx##-F%Q4YDnBfn}2rsp61`S7_hEc$ZGh`Yj9gXqTO_@ToW`0d-wmb{@)h$Y ztO=%l->Q>`E)Awd(}=M8@A}h&2OQ_D)dkY?cFQ(pXZEMBCI_`Fyc9@l@2%J|ZF)cY zcfcm)_9}vBga?<^({yE^!i;_4M!aZ`@JqY)Vj;TZ>SCaa^GUdo%^%)Ey!(m z?EG|HydvY4gI0W8Dj35=F^4w=NY8sCi26oiX&}m9*;nnm_dK6nRS@L6+ZQd|x-jCI z$lZ=MpZ?$d(C31qC{8rAZ_HVK!~KoFU4P>iKlbe1#xo6@9PO2gxj$}bKM3Zq5b^yHvaCdYvQ6>esIkFnVS7l_*w^H@8fkwe8Z75 zZJ8`$w#9MOR?bVwI?6vUoR)f3Aj5I(BD);-hh_KKccuy{PK?AHz3`3#c0 zKH_(;0A@^amXHxZuHC!Fi_P3@KRAvNz?^x%SyDS%Q|a$Rl1)oQUUIVe#UwA1JeBb(SXjjYwle({ zuK>~#C-XYO)aD<~WUL`?NDg>4F!K_&NC5l5%rKBQShm5kZ9KNu9kT8DZ@eN%ON+oO zh50EeM2t9caH%{ph55>re+pp5-muR=xB59NKy3;;5jlyx`P~d~=lPZo$qcOV!waMf zE=AUmH_0Z@JgZCaiejFwjiVS5GIXM@{ zz@+uwURZf=RS>ZFOMgveu$i~(en^H*%d6CZBw_z@_5x?Eh)2Tv=$f2xgm=f+;6ztjok2uO!NvQfEC;Ugbc_2 z@TWI0q)UWPq+cFz;FFD}Q$UGYpUn)ivF2>?sH>RG z8+L$52fAU-41b`6VRB(Vp!7KArVyYt;nlm?3^FQqxko5g`#QfCYR;bH83rX=()0{e z+jlA-XmCWXx-Zl!Ndpn&?U9|GF@TR2$z-Bc5hnki9t z8Duo^%Ygm^vD0s-hGJo?_7a0!fzqbbK8JmtUdP90q9^cPDi#W+#W$#)yCxGIvRU5|_$1DjDRG4R5TSjoEaG zuo|t1-VaXx;mGzA={C z43y3tuwq;m&>(W@cudv%PB`gX-+Vs}<~3Wde4@iNdt-whDDis2o6CTrM&!S{2_0s-aRpJsmtE3~fo?E$Q}5(#}dkzjjU)jF+F$?_*yeIq^a>)SQJ4+VHSW0qb6 z?`asgn^_QoI*Kir*S+(jb8frUC*&gy-L_Ym+dDLnNE4`+tCl~J_M|xAQ5p}X3 z7mw>f*3(fJ{~>2~Fq_hI`#RZ=a|SK4rzbbF7dO2f6ZYfcKh6<;Hudz>!F{5x9x9g} zS=dwl|90=J0*h8@E3-H&#h_PNl$I)7DCR1q#Zp#*V=vcQtokw&=Ef(EiW?QjfkfuG zop%%RK$gInL$H}tUS5uDjO!v~xAu$0pOM8PilTDi2Y)6|)M!djOaZT$5*9mo0>K0F z3d~IJl&9md^T(~_l#3~6j+P*EUDEG>{vgs~OY3zEe~gI$p!9WCcc z%2k%|rCcz(fG3KR^ISS&L!4DC%qmgwD&tkM#3CiwQpj}%b(J_t zEV6;EM4d`D1+TYjWR=!zA*ELs4F*+4UXjWuvMKUGFC#Htl~IB1xUoozG?|79r%ad? zuQD3Iex5QJ#>OsLsH4DZPr9HA^9r#JS}022F+t;>pvAOUjD1Rapj= zQBtnR0K6EJFiR>;FRzs4`82-^<@cGeJ3tqR#id17~?W3Wm2m`lxi=K#T&4m z*p#Jn`ZEc1Dr25b9iJMfl?z-lqr$3!J(85<>T$d(lhmfpG<1*2sLDvyY0?UM#SHpZ zRVHA|d;m+SLN?K9ugCrgvpQw!3=y22XmJ$*maJ7W?xG4 z%xe83a;8hxYo7m)y@`K7{->}cRw(5z?7ienjyl8Oiu=*~^5~p#?OTWZPo0&Vr!%WF zB_&Xs?mqEIY(>fpLxq|r(P>1fW~eWN)|I2~CKNPM8e#Qm~>BUu>>T zbKh@xj;}OIU{=64=)(mD>nog0ETkwW(zJq3{FPnYhw(!XDWN8+-2TMa39JTt#Z)G~ zKjD+x6?A&qWAIcJxVF_5Pq8Z=Snmu8=s1EMVOCmZ*|oM9wb>lwd_+XJ9|;kz=R@cV S@LRW@{|KrdK}>?b-u@Sk`Yfsd literal 0 HcmV?d00001 diff --git a/orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=8.parquet b/orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=8.parquet new file mode 100644 index 0000000000000000000000000000000000000000..65207d6ab56ab7ad564a8869479104b063a83829 GIT binary patch literal 5802 zcmcIo4R90J5kASn93BUZK|-dU;%Go26JtxVg)O_7JIU4`vW$hXelo$;NwOq!`mug2 z3x@<6AQV%ew6y%R6b3f~v?VYDg{DpC(xx>043m~=phKI4bfyp{ZPS)ClmhL(JN;Rb z9WsR*>%Dio`*y$Gx4Tc)a?&Cf3WOi?g?IQSVVw}G#IQ5Gk59k#%^BqH?A^qMt9#Tp z?rVwCPfNZiCT=V}_266By?tupP!b!x*YX5G8R!4zf=x_RLx;_onN--Pj$r8 zJ6{!8<}M-z=3F*?y46VRs(tF)jWP#ObH>bYhQ@D7&NF zDcEQvW;R#)j_)!O{0%b}IQAL|D_I`<%aa;ny+kDcsIG%}<%M^9#>^eW>qqn(Ph3+I zd#4_M>(w_pi2a(XqZFnkgcvxXsD+_ISSl>77UOA8#CX~V5w@@h6~nMOHa?7XyeU&_ zHj`Ua3y=J2?}+TY;RjQs7X{VAvMME3T2UqDN1Q^ws7`c7AbL|EJy0UWKaZ8FS_n5L zYz=UDN!hiXtuUJmr>SV5LIC=KKPYleV*oS>PwI+s;>9)Ujj%2W-X9h>t>zQ>| zyqE6&m1K^X z^=>Qk+fd)sOP{ndqYvJ`_>Et;GG(2W%g&!_WsbcF3Xioir~BIM^6#`V{D1CvdfuuQ z=ER*&=l9=fVP3mkZ}610FoBZ90duE}X}h@U$KPEnW@b$PmvqzERHiWeX^t3A`-H+@ zNS!qN=gYF;FL8TZIq}~=O}YskbD_ow>`Zh3$8++(w(XP4Hb8{!?B3PQ!^^LqJGO*} z*G|7NU_#VQ#=~tu?HW6{6sDDBo3t=ZxmB-$$#Hg{8m7`$=B?o2f@gM5!v*>iFl;yq z!)`AOjV2iGlQ8s2r{U$B6Xl&eqI~+|>s>q?zkT7_6dd1n{k18Zzx2c&$8u!ua?-=j zg~3h36ypdXl0{AzQh_mXZf~fl77Eh2?RTeRd#^-#b-HwOImEV5;RI%i#ugWrn}mF} zvk1x&M)0FjD9YC8Vr-!R(Z%cti)l4n9rEfhDN|W3RnqR~(CpxecJrinXGzb`LU1yB zbF_Rmtn{v8u+ls+o?hj_+A${!OJENBhuJ7`xDJb?cZ4ji^yVBjMRCD>@Q0jwID=v9 ze%8=*Wj%%w+M~*|FTwosfA$^NjA2akrC+@BRhS#!{M!620IKQu##>M;bDNu&jKXWr z2!E7^>t*6qhv1bM{L$XNtuR+%`=LshufKi>(mio+3|@$)6Q^KW_e<5!(c7H)H-Qz~ z{~-^rV7|TMg`Yx^{C@7d_j!1D>~Ak$2PN3GFYg=K2GBk@^5DXDc>nK~XYXv!uaeYp zusn1eX-6ihh{M&{SWfTr`NL6{GuG!1x;?3=3PkG!(y=c{4~x^Mov*%FRI9csF>z_s zYylr#CfowGHMj~8$?t~y0T!urUm!C5a25{tGRHN=Av1AwtO5#DkE9CY_Ksp!?gp(OYh#K&?Vu&8_#GGzt%n9Nn5>F7s{Xs9< zyRt{Ss9CahZ5GUU6q1M&?H`9CMax6cPlueb#f^n1au_)z9%aar`NQ4Vg@&^9JY#WC+gupF40$qR&>0IYrHDn$6gwtlhoets z2;HxRmeSNmYAUZ>3bL?}|9_ijt03ZW#zGOcQfNOFaYmA;P{K)PBoay>--kSrs6P}$ zRJpQFRwt7{B1=-nbC?R?HAvQC;&Rer>9V$P_n-Kq8TIIS#;YYsvKv0=Sw@nLq}odY z%}Z*$EKRK*$4?+)M~2^l*6b%G#qtd#nIK^w%&Zca(?WJwsS|dqa<$#o3F|2AS2q~~ zbljoqZL}%OLzKQZMg=?lU4AuXpj9EKO})mh(8_EIPR2mXq6Up|fKm+0DXX&I3A!9M zAMHvS78|rN(53WI)|F^~+-)6>nlz-}5uj;GXX&Q`+L&Vn$mx`FN|!)-QdrFWZun1_ z`P}XHqL@KGO6dWc zejJw74r?c?uONOJV}|F_Y2oa&YjWe`2n-L|ZQTm9hV=!`L7wpRPJ-9kE3;cIKDVwT zb8h`B5-65g{{!@DF5Zr?!#cd$rbwSN>33Q=zl&jk{Fp2*VJeux8FRoHf-f>SgH(^s z{u!~6kK;cS7G=U|M3q0Y*t@P%D^^8nOls!vRat>3Am z6E20>=hmviP+zo%%iYHlw^Sc+1$rUpRmoJ}h&ruM|2CtNt9P{$)=8?t#pZ%L(3^BA z=po9FdWvD#yrMB-8X*0sP7K+2DjYWB8mCniXLSr1;+>ktHHM%Jx-M$M-O!eyFGI(d z!GbiR{;ZCo<#IHzS3!HIeyF}4Ueu(IC9+K%ZA?8~Xf6_4lGHcBrJ`cR^vhw1Y<3&i z`$fl6(^m~r4o?T0FDz#eY}T`&)CO4P+FywTa_qOGcZ-frl4cV~a`s$#_Sj1&bG5;K zm*f4 literal 0 HcmV?d00001 diff --git a/orchestration/datasets/DatasetA/_common_metadata b/orchestration/datasets/DatasetA/_common_metadata new file mode 100644 index 0000000000000000000000000000000000000000..787a029039ad634578bbfda9b77dcfcfe18001e5 GIT binary patch literal 3305 zcmcIn{Z8XX5VtslP8GUJtvChg{y>)at8gTN+>w(ib?qdMa}Xdv?D$k6eLpvNDMmRZawW@wP`9`}Gv z78(yrwTzSrzcG|+Eh9160+8#kjA$Z$DIQj5%Rp240@x7T9xtbFB7O-B%6Azl6Rv<+ zwM+zu$zK8@n_?O1N%~Ep7PiY$6)x-lGd$e|p{>V*knR-h8lfJhWTOPB9)^R2#6Gga z$Q}4ZS}48Gzt8hf$b1Gqqb!2%kbmTIo2b<~Q$DdKmHYeq19JV;t09Cg;7hL02pu7z zhd|atVvouMVNRbAVRfRfk}>^eR#Yz|lpvS~cj^T0PevUW2}wsO-FDPd7)LN)_$YgL zs7dW3wb&dP^>%Fdr|y|67&0yobXBrgY!nO+gZ~zm z-J}Qp3Wh3_)WcTEl!#yC8pXz)rZn?RU%-}BgLqnw+#mGpw1Ew;InpHHOE`&*cDve% z&%{Q#+n(coPwbV$$er*K8)@fKCcHddX-8VEJnSe1OzdR8;ZQtZa15{In1yoQRt2W> zw8(%QHTxGXiP!Mkv02BnF?ohmcC1QgC1<4{MOs$hdqWlKPxtU3ujcYBhb1%bb4ITKR2)dBo-~ z*OsN=p;hUzb^MyV+_nPKeWqrnx|MbQFS#kq>A#9gDbZENeMQZjnG$AkKcAQ9Ylh|R zN&i)Ar52sQtT+3xH?uzREVr&M;lwO99jjIjV1Hqo)$ae3x2zACUK?t@oMwF^(iPbM zYNN#VyHJ90Y8;xh7A&uwnngS^T+&nIar5#hIq4&p>=SvK&!VO_?scU+q(1uc@Kijy zm;F3+UDAXvpe;jRhK`?y0cw!`EJS2@PJZZ7P#GMI4i4Z!Kn@ksCQc@1*BQC5$Sp$k zk8r4zsagFTh6M9Mrss=9rB6o&sLnkQh% z@XQ4~dgx4!HkePMsFp@1tgT5SLO`iU{8DNnar*fR{2dcRksNF+Y1#Qkp2?Ahgg}E| zu!jQ-<|kB0ln7C#mCl6!cq(1w4?U!hPK^vtCLk~y{Pj?soS$%AOoOggUf`29htd literal 0 HcmV?d00001 diff --git a/orchestration/datasets/DatasetA/_metadata b/orchestration/datasets/DatasetA/_metadata new file mode 100644 index 0000000000000000000000000000000000000000..04bdc4b181ee8445eddd4962f74b309f257d923b GIT binary patch literal 6627 zcmcIpYit`?6`nXI>ojfp^wd#9i7Yx~*)*}8IE@#T&Df3|dlM&dTfZt4&BL)L89(Yr z9H(0ppj4{`iQN@~{ejAARiyr?R%I!oKhP>#B2=mstkj|+;0IJZ7gUOPEm^d1?wuKr zotZr34I^jf_?&z1ch0%zyZ25!O^5AfW7n6Rgr%*>rIQ)P+)0@CoG@4n1x6BD`6Ne} zjYuW%gxLT>TPRoHlZ6xH#ErwILj)n2TfIWzL^r5LYbs+XFDz})b+9rEnO2`&smNSNW-7D4Zw(SM}wTU_2-$wg>te4&AQEdx|~GYfiC2w*+3e|+b!g3 z3pGNSx-F(6xBS*2r*Bl1NTOFqOLX;_jb@05fZG9h5X83Ft40+YC8iGiW-GPQ3h?=F zzt(o|=3kysB?Pb4HUt0W6WuUda_9nIhyHpS^}$B~eTKa{4S^7iHL0#@Z5GDkHE=x^n z2I_973WY1R4(s7Q>WzK7h_!w|#oEAtI@FgBP!IPjsPF&rxIu~#OF9vf@0dsfWg?$% zKSB_jGH0m9oaZ68jm-I43;DQ%T-i(grX6$V-&bEUO;U@%9q-Zm-+TMX$Iqbh(Q0*^ z*@QbP`dXKYE_0^_|LRd{Z6B2HOZOi9&AD(8@W1H&`47MOyU|sIuhlk#|LEiD6u?pj zX_w&NZKv*9smsRz{?VuIhorhX#vU9-5`x!in}L7h6MF0c7d}f~X`${@)D5a>`lw0% z<&$cVGJOC7K37|)N4?aWy-IbV;F{u(iuH1zily+U26pW!>hGV#VA4{6z!a&O^sSAk zU|IqJ?41tsl@998Fm>ak9OLLuzSX-H#B0O{y?_1as17XomIIOl6k4-wMWK-zh2E~) z0Hn>QpS*1(*NoJwt(ZY~zPg|OX?zAMxb0u9$1S73#|+YHb)4CRK`Qv&JsJin=vUjR zD{WB0`#b+hUAgwes>O8g3!e|)dUp~NNULoI{&L+KCJ_ju|EHDuStkW+i^cTmf82{n zb@l5Xy?z==h(B6wGw7>_^!U?H;@#&D2dUMAP18rs@WUf&mC5w!C#BtI{V;X+@P}rP z3iaYq6-r@`f_0sw-ZNt?WrMSr6seiqUGG-0wB!L)Be`Ovt{tPUTa*a@`@;G6zJj>3 z^8LY64?d@ZO1^W1><8-HIwDc$W4h-yyS>Jp?GA1~xP8p~C$0$8Q6yA7imYmNe6ef` zzJrt7`{xahr@kHl?kEz%S7cS&6#n){w;%sveMlmYA|ZH1w$!Ai;CJQhwr!Gk@$j}S z*3M3E9Vnx4Sxk!5T>9#kJ}rR^@)A*hd|Un1Z*PA0JBUD%qyyV{eCxBc<(Vwc6fIve zU9{qNqggH|mNVI+_357T*2dQ+>u36Utdd!Jk4oZJ7p++)lZ5u!vM54}IUyGvwI$Bv(TEOtQrAOo@T!7p=)GG#9cd)c4$3-^mf{ zrAw+ajb2y_4C>$Dg{<4)#hjR9O2b21FX{|+ZlLfk^?^5XXjXU^(;~^rU4{~PEs~_$ z03>8}BdV#libr|{)&i;e4Pa#$c0`-LntCfRc+Jux$x0m4>A-w0m8=FuHWOM*_`zS~lP4I7 zjKxb+o}ge>wKHs*cicyRyao}&HG$=;Fp+`YWM$> zH+dc4G7+eGS5;m&icA#v9}7AqzPp_uSJ^=h*8-o3R5^#Z$O>pp2`q)q3{@uPX#w#h zpytz&hy|CKsH=>}m=BaEJwwZZtPNIOv`TmZ}g z?36nIQa%1T)SwREi@t~bVwDa}KuZaa+kl5Ro!Zt0{ZSNS&P5L9mctRz;8F^Wi(T`e zL7zPf9hHwVEU*;X?G1PBlyK!5;&{0k)0$y)iS z`ZjFrmaFxh-AZX=D65y@tkrIGZ$iIub{U1}TmN=)I@DxzbD{I+?dj*wgZcQ@xcT$z ztv5-$%cyp8@j`aIxP8{+c<<os*NtxD zD(Xkw@#}?m55?OW@f*Y&ALYKc^@DP$vR@nDztMQNL*`2>x@=}gt=B&tmj?CNNLMez zNxB`rte9Vc-@r`wFWt@e57eZGjUqTfT*>R-gm#KC(y-B2yn z>ZN!Y7~=I{b?0!3lG7p>kSF6Q`Vupcj{$JKXhJ6ZppM7_@C*Flzd zLobP+IwTnlJr2qzwPZz|Ii3$%-CjQo^ujOU*UfO&=ta%nhC+;NR(1yA{@1^)H?Ll8 z{u%eL(wHV(C|CDSYkThx<9Z~YB$W5c^-;fayBU3pEAdxIkH^J{*>Bs9^euUDwEK5y zZ*cu?9DJ-)Y9BuRP~X}=H}Pi@|3=-$`F&Z_rg7xsos)Wbr;-M?K2Ie)6Cglfxdhf` z^H`rv7*PliAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oRW|!1@eh2}OVafn^eKe|uatI6cluD*^-v5FkK+009C72oNAZfB*pk1PBly zKwxnKet)qzYiA-rfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlykR{;%f@R6rPk;ac z0t5&UAV7cs0RjYyE8z2j;?`c~1PBlyK!5-N0tBoFi~#}!2oNAZfB*pk1PBlyKww&d z^z(%DdD{1DUrYhxrI>Zkd)CHnzA8AWUXzaJE8FbeJ~huD-#_W~&)>U})$T<`SAT4u z(QM^*7cK9i<-2kNRzgz=81GYY-8JTRM`2@y1Qs=}7j>Q$D$QI{QHQy(-Ua1*M`42i z0RjZ(DB$PK9C3vvK!5-N0t5&UAV7cs0RkBU*5es4b`v15=Kf$1%u~Slny0SVc^AmK zKC<@bJ>R_czv%VI`d)fo>$#|Y7j@jEd8f~d@^&5q1SSi(E+-r1p@ll;p-TF=ssgUR zRjsVN9uhF_AJTcgW9ssJ_RdUz009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7W-Z|V3e8$n zyr&2lk5AF_UIGLN5Xig0tbPv8Do)-!<(;>R=`R5S1PBlyK!5-N0t5&UAV7e?u)w6x zx6nWPwGE|3#f=R<+Dqe`l4q zIExjScAQPyezDm&8vz0Y2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0tB)J*0T>eLVy4P0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pkGZgUm-7~}#iU0uu1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FoIE0_!W7 zlavS$AV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV45L0=~zVpIYfD z0RjXF5Fk)&0rR$E*I)Jo2oNAZfB*pk1PBlyK!5-N0t5&UAV6Rl1^oVE8LOgC1PBly zK!5;&{0Nv&=BHMAN`L?X0t5&UNCeE^i~|C$17m;y0RjXF5FkK+009C72oNAZfB*pk z1PIJi;HU4wU(E9)ViO=hfB*pk1PBl)xPb406?_0lpTH6dOnYCNw*3;OtV;w45FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1TqEu z{Y)l}0|W>VAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7csfu$Dk zJ+yn@T>`MYERV7Y7mG zef8JZH8%+$Y5ZZyll=sE_1weyvUmbKaeKTtkxPJ^x(8#|atN>?zyVyEM1TcXG4ec| zrvq5onaKpm_|lPleJTMw%6Vq`1Olu#WO%kz5kU0Dhf9}!2KAF78`ql% z@Iiyq=(VGHpxJro{e&hSxK|$)n4HE1gKpgV`ouR}K&(D>|MqJE4UfVYzVD5s5OCE z_AmI191~b_wl-i`rwNSMl%5n|Gy&1>{VmG*CU8l5m%FdQ2x3Yemt8t< z1Qyl$6wWmx7#7^(nmS~w?Yl|r*oy}er7cE}KN)}*U1 zuD%85uo_%^8LmDY%xQ#=w?l!H;lM0ro^)YeaA9>hv*`a97(O8|a507h20tSp!LR`X zS)Akh!TLr(S}b*byWWc?CzR}M-Ews_S)Q=v`0w92lA`#Bf-hG4ljgQ*gFdq*tu2n@#^dgfI z%um-`@*wB6onY6f-N|!rw~yq`rFf3O_8LHJm(_Dp+*TX(;*1(7dx3 ziym4$2f0rV&iK_bbH-?L~^77l3BwVFVR5$_g@<@D)?psUEr^}p{x7S*NC*2I~TK0}**zskH5>mS?lhV}L zAMx*5K6i}1_Ud#uFtKXg-ni>dAeJWG5Iol%oFCf8cw9RKe7@`9TEqB*meB!z9HnEU zWyhPp5q^{FaaG7ybvHh|hnEu$hdV?)vc+I+@g|UyiaLv|&tR*w=NYxf2JGvA8x{*?%K*vf9(oBq|FtJNj}zsQj`8&yW9y!-Pv0@h;+Q?hmRK zd~@Y{JL*zfaM_`Q*yHANxYjw~p3A02Tsz+72qWbfzI4^-jBC@6;@?&nCsuF2g?q-v z|8nNgQM`RhO4=M+3;vHRy@s>pHvawj%dS$+RopQ;NLtm=fuETF!^!gcR=nk7*P5o1 zW*pqVd8%-56CRu>B=&vSfDcnG;y5pBz`0Soa;ryg$H#gcx*w@-#|KBaPn+@O1$?oW zxp9mK^jl2%V)uwhE#c0%5Cn=XnRd+u{Xc(|9qGc$W+h31u? zO2&FG56G^ZP`N$$(t%LlZ>G+=6uGet)#3+ZdqbF3N(Jx%7v>2U*77l|T}&9j_6DB= zW558WGu#pw4pjI$x27>O3jG;CF5V}~%>r^Im-9AicM zod?tDHF6gk-L345)Rji3lSjJ-)7>!Pp;NBj^#0KBBB+q{l0? z6#7rE`#>g`qHG>a5BckiEnKvq`_-ZZ8ePk*aFipvCsr-aMRvbj^S1&>!Lar<3u*Mv zUV9L|8?hrtt(Y7K>1?;!BuKHtdA#{(ymwjqYBVlff1gGV1L8aHuS3JUA#Z#F5k4l$ zAq;S{CG8Mx#O)2CycMUQv`qEp!Dgg2D{;n9KBTFtC&>axnZAbNUh&{5&DSEV&ztuhp_wv+xd@4HRmz2P`)wpwE_ z%B7~+8ofek4b06h%%v`@pF>%dLEQ;sXMdo;^m&(qc{>^9YNbxf7=|%i10B%w(b_@R zueC=jg%3g~>Mj_8%!D1DboeU`d>TS7%Gy9*$O1hqDr27EJp;Q}$}INr^;z&zpRlZ# z`>c4WCI5w#Y}Sh_v#b|b8d)zTu9?dwGK*PsX7xlb4D$)`b}~ry3aQb1C4Ig>l`^yC z`C7A1Z}gw;_qu=oZ1SHK;^%J_ThmKPY0dZ7OLa=9uQzM8P$O5P)GB88=UJ`HHkBBV zqQ6@Lv{!m+Q))`8A1o^T&rQC%0I;BafkeZ}WeZId^57jk#6`Grw5jQ!?f8oMj zF#S>Z7f8|keki(m&!EWF=1Wb}BJ5H05%ht9wzn_@qUqk6Kdb@vmb#fQ*i@;#A&T{a z%~&5j_O^QY&logo^rk3#2llZ4$bGOh+FR;o4q#@WgF<$!{68{;o+$Q~Y}^-W>Y0SG zi$i<)pEghBL4i_g$}gaDMXQk&NDE3(su)V71qJy{9!2j zcN^~`WJB3h|Am;h8&Bjn0>5aZ9t=aba0(tByb$W)$#*c`|l!xJ=B+mBO_oS!%7k zL@-SdXM$^S)G~1>wK9HMu1ptalB7dDJ||qpFGA~dV@b+Y#A}Nv z8BZ84(`n)QXDejJ-B$nlI(N=!ljdnMMgS?ICyq(rG5P)(G?eFXQi@}c+_6tIfNG}MSp-- zEDFmOr>PbE*q(FCO)o;e?6d#;=V6)l|Lm7!*;KgyEWz{e^pg+dAB9+JR>Y{DcUIvr zro)o0zB5T+TtWYa!MNy-+v4a19?BV8oD-TfzP7{~ixPuGTs$|6J%DVWxY3IYGW{Q% zQ-Aq?g(XAJH7Kpv?PGW@r%0M*-S_kTVvAAxytDevChwnj&xzq&DPLP8XD6u@anS~t zH`?c78>rTx{C`P)%YyMO&?S{f#BpJ=sR9)D@Jx#h*)Y%L`ngn{fjJEO!a_v}l}Btz zmatSRjy6;EATKsNA&)R8!n%TTzSo};tz4gEkg+4EH4-%uMG1L{np~oct9!RTtCCh_uMFA6Hg z@~BG!UM^^WKGue`xXE(9Hi8<@EP(paR9BP&yM64Vi>mi3f?XtwO*6<nWjCe#Q`oR0l)~K!$sS2TwD0>2e z-Geug+WLTa`Dxnhq8NVn`aHBK)Pwf3;jKuZ3lV{(=EJVdK6Pk)>{Gjv>#T0H;)nAo x{BTw`#k~Ct(t>$rrD?L9hQ#( zrVJB8I9oy+U@&c9h})FmD5W7s*Oa3rX{JmVXoqyhG-RNJQl)xSl&p>&x%Nm$ zjC=6p$+;J=tH1igC!c<^TSNE@N=iD)#}U2P{`BagO+;MCS&=0!oJUMNb8VdLi#Y^! z`Rmq&Q)UyYTUgim-A3Z2=WqP&{z@Y;^~)<;@2@cu`I8>6>3qvbRNMKOV!e_0a16%8 z-ZT;=T{}B%yNtw}r)CnjpVJaOU*~%6&Z#7JjmNr~By;@izC2C0nwh?1I2sFna8p2*Irn@fLV9#XW#$nr3K9L=@?O6BV)eloo;%oOvI$qoY05_ z3G9gGbm_1(-yGdfbC%oVO5ynkQTNC)FogM*m}9 z<1uyiE0zy#DZ16uq^mc**V3zg?V$C%;`9;q0TYqu{PoA`%ijG5{@&WB-v0ZY;TeZ6 zs>=>9{qC8oSJhQlJMs_dAE_IEG3##c(LVLbd%K>^-*H_%b^TX$zkD`}nDO$~Q}?8I z)H7b@3HE=Wp4qqedhIjU)RU_gJrsoxs>^;?Zo07jw7RbM>Zpl5J?cMx=i=C&AAg}v z=Q-g}BuGF-`%jk+%k#+z{XFwJ5-fi&Te4-mfVLz;FQ_$PSvc%!~9GAR{v$ztAQ`}_41)ctAYGvp!jCj9f z-sQCrmHdhF>^gXFez1Gr1`O*e|Mb+Q*Wp`vdh@i6U{`i{{Y`jdQ|8Zq@eTN1(#d}V z-k40Z{B52k-dN631rxg zZL8l0bL`TTseSN$WyS8!E#UOeJ&!-$I=CJ<<~bc=J_&i~R(|xEe%i*@H+h3$morlD z^}9VBfqhxROIeabQzV~Hjvs8A`%DBDY*J#PoUp6O<8$KUF?`H)KDsHm3F>9sjMo9Z zA5`(go-V9@H%|a}E8GGgMcFmI5JW^$#GdRR*&x{cTA{R{u;A6FdRs}JZjY8c&DJy` ztwl0&?LabuB`kR=c`e+QCzMJALMAUC!wQN;IYFo2?F@@H3p&MY$<(_7bkye$ix(6v z6A!E=@hineVouGKmxs0L6#Jb%57_&oGz}IFUJvcAA1LFZ)JNKb$WR>hdtCvyM-0~f zs4v)_R0ODIk1K-YK2OByc1E0F-YNF@!QAU_L~EBW(aoMOUbU(p%s>?4kQ1#RfFdp% zgrb2CI3s1H=_pbZDKOCXRuu!9_}0YG1JWbKg(DoQBRvo+8sRY9<(wX?Up^>c)a#GT zPbV-b9*#a}jp>o%!VF3O zw3iAwL+z+kg6+;wC=f$?Z}o)2-hdxT<;pp-IWjS%vN#?*MX3N?iFg$z8cUkZwbljP z{U_?1Fc&?uNF7O%b?_fO3rVt+)HEVlBdKjN0v@C4U8k;Fao1C&L8=T=+rrga((CZi zG^IB;P(EG6u^8<1N;##EAwO{}rUrKv9b1;Ysnr@)4VD4eJCMD# zB&fqsTvGM$xCdy}Sa>-RPk4?kre2y>$9GRNy zylB6a-x6_E(W!g#QF>LAyTYCkGuVfzD!^7X085L*TEqHF<4RfsqoSm*Cl^=(% zt<`R;Q<${uUf>+$F^^>!yjF|MZZ$W#^_9tUYgimZT&CvRaP2hy(Hp_?1Ck#PhaIRjoxelgM+@Yu3~4TrN6-4r^PB zO_4#DDR2CiAmtzCoCfOuBP>h&nxKcno}R-k%9zu}?R#im5;>Y)cN}jC{lK~#B8MBC zdOGG(n3~)=RS@ci9uixoUW4lYDe-25{5JSZ?GCF>MwJ*)-ou%;1?^DhE`JN#XP|~* zZfte8vvs6!*o+-ct18O&gXT(kbu$^Qfxd!jzMoGA?ed#~l%kaNQR6L*RX5jo8`%Er zLan$I_3F83WORGxYOW=xSjX zX`5j-9}FwC>?Hx03;I9<_aG}xtxHdrvfm3sV6S4QmPIg4iH|O-{;LS4NLOhNx*97M zgC!TQssm;z@w8}7YCQJ-Nz^B+1W2&8I&|;Bg9dF3`GbjuY(p|~DH`BDOFZn*sW*8t2fj3ScN|BUyGcA9mKc8tw^E^5rJjb!!!|}J+zeg?5LQ+esYOj?Zy$ z63QF2Z_=mf6ZBC!vra;S1FBS73+(aC?96XwXJ(yqp_$uS-~O%0Wi|#@*YV7)H{90u zyK9-X&`gL|wqqsKcN1=F4MZD_erP-4?%UHpetP{Q$8lThz z5%0})*9``i8C|=+?JP*&a@(8Rdzp>E^lfvH`Tg}B&or*BJ{@}gfd91fm0w=W|FXBk zC*s7K!?f;r-}D^N_lJ~%M%#6$eZ9;}yj(})02TS6?^=D^;X&IUdck-u=#!pfMacJ@ z$h6JK1ob=K@j>17JGl1hOLhO4zrTL~vy6fdOfYY9)%oT1{Z4~-R z_|G}Qwn%%LabMKcqlQV_{Nwdm5DJ3WL0(6Y zI*ccZOY#_*mASlf0W`kD#jG&7A_%bn>)^?Zz;mXg10}|~TD;M_CxfJ(3^gh+1r$V9U_D)EX;CN;Lvo z#V%2=aQ)C$M}t#Ia1D=AA~)N_t478p=*h(#kz=$o#nNcoCp2au(p*FIKJV$N03h^|%O0VB>#x+X3#;qX(Uc@BTTCIv6ok_J) zyS0RUg8J&=z@6igYGeIUL0q1xwgyJEG}P4`MLor@b(ue3>=LipwQ{Abqlt;kGX?~5 z)aYHf7%%Z#k##~BWAcbx>e^*}C1%hckQ2amvV`TvQ0vTIDt}UKF0|J%Tm>|a^(|QK4+REx`vun$h`M$L;W6b43{Qak}9R1IEIU|PEUmsyRb`!31bOupQAPt zyp?D3T@Xs=9}f=j^+I7Q*$|%}6`+cvy_%lSFwfuP(ZUuChQItzUzUb#v2xGYcYOK24I zz`qWsqJRe?f@SN0ocU*)w&0&dWqC;&6a28A;fGbzCzsg?%<$XLiT14^*k>OOJJSz? Xo#c~Y``33Im;U4a+~l}(_+|4q4AlRM literal 0 HcmV?d00001 diff --git a/orchestration/datasets/DatasetB/_metadata b/orchestration/datasets/DatasetB/_metadata new file mode 100644 index 0000000000000000000000000000000000000000..9036c270383717f43228ff0ecfbffe7390b5b351 GIT binary patch literal 4947 zcmcgwYit`;7M>)gIIxtK;K5n4B^WZeYTDRNoYqLyX6(d{P2x0e>#+?eni-GdNoK~z zkJwJBw7c3}A;e1v5R{4{BqWfk3RV0-1)+c{fk6BM0Yd#zkN{EogGYHttlB;2j-MH) zyE5G!B{OHvIro0&oO`}|N8ZOqoEB66LvG5tDKDlamA7oBEY}QmTf1|-F7zZNQJ1<_ zDN8q)Hia{}q?8+?XO8!oZ=fj3VghZ?q?{WX0M$fYWeR8I@;cq-g|wW_i+o{0){;__ zm43dff4KjC>$VYn`n`@3m=W$c$=!P3TV$R>jym1>sm=OXmYhCID>Th>6ph%}SVI5(Z zb3+76&jhh*QYTo?I~!Q*7(h^8=%xSJ%X~J%9I;n3T>Icf`{@v7xOUP0{hub#C+FHnDg9)OUFKp&qMcI_uHjQ@9h1W zbdOQnCYBAD(*Qr$MB6HJYUpp8nUAl6=KbZZq3|=~)-|iy{^g}(ub!JF%rR;^fj@P9 zBX`0a0{*y(d1N#564LgTe|$Ut+O{6Z-E?pCjq|Q!BzL2>6YvXL+cC!u8y7I==vL;! zmX4XzDDYFTHLYWgoz|IiqK~+kvVfyHN%N7UcUJLqsW|z*GG4M^4B(~^kaRr9hkG$hs+taZp!jn zlFwToHXpK)V`)LmC`DDv+wL5^+tzww+3p=4wCQI32$P7{A)Cgl68LLHMFEeLEGfx_ zR-KNrP^e_l(N@%CF_V;R;H?$aY^83<2uqS!K)osz_#|K8!Tpd;(!gET7IEys1OAhNvR2q*EMQ%Ix$?moXGE$h+rYG2D8$kAmjlAjC?ir$dqttv5ysGm zM*HuECp^eWd?Aw~Tv22p$LA_Ym28F2Cce_$h#IqpvV)7X0U`VqW#uzr?h=iwi(yI6LN^)8}rk@YPSo!8r9FQIa(Zl9V5 z8Qr~bS&lyuL#r8f}qXH zbFx6$jl@hV}LQX+dVE%{_luL^+pHql= z-LcV#TMXd5tSq=f_a(TnL$^DXWWW#kMAE^f%%W5YDS{eaNdzEXjL8>@MyKM1IbX<= zinheQivHYaUarUG3svI#gBX{}bJ2Xl?~TP(NR&ZW9B3SUdk} zbA1wOc_kj3cZGc<7gz^pNs3-3UM}j0bCGm1FjZf-)cz9IvMK+}MzM_lpX)Lwj6(jg z;70MZ*xS`7$tlI;P67FD9}H zuDl#`wXwD98!eg@w9BkhtNp)<<$&hRN_yt$-fhss zDL$Z-L{~VS^n0?v8)d`T+U+&S|Nl|lV$k0~HC#z>eupp`#J-0$EobAvb5UC+`wVax zVP8ScH|m*CL@k^ZTw`R8S$V89votHG$o?$i zRy>A#^$1E~y!mibW(B-W*7kRn_3Ya{GJ>!3K{AqT;`O5fTybmn8~10}&v)R_LI(Mk zK-vS2Q$BJ?z~O=~c+)TR^3I6?WsJPvl>>hd5u^J+Y{JKbs&N#7h_Uhf&oh oWgV>k&a_j^W_OYghr#NH!Jz)h(0~5`MH&C7Kp#czgP+s>4TWuk6#xJL literal 0 HcmV?d00001 diff --git a/orchestration/datasets/DatasetB/catalog_info.json b/orchestration/datasets/DatasetB/catalog_info.json new file mode 100644 index 0000000..56f1340 --- /dev/null +++ b/orchestration/datasets/DatasetB/catalog_info.json @@ -0,0 +1,8 @@ +{ + "catalog_name": "DatasetB", + "catalog_type": "object", + "total_rows": 80, + "epoch": "J2000", + "ra_column": "ra", + "dec_column": "dec" +} diff --git a/orchestration/datasets/DatasetB/point_map.fits b/orchestration/datasets/DatasetB/point_map.fits new file mode 100644 index 0000000000000000000000000000000000000000..0acdd0bcf478a6eccd450f723133bfc6ea346a69 GIT binary patch literal 1581120 zcmeI)T~FI)6ae70(zL(vO1of_CKb@swUc@gAhcdXKqQsurDGUv3L;qtD zxqF)Q2Hk#da2aImRsEn{Z*I+q*B;yOhA&6KZn&+@2oNAZfB*pk1j;Xv-%hro-R7&X zvJo}gwT(u7Wvr@a;jr6_)AKNl4^NVid>LNcoQ`cW`EsuJ&+E6pe;(bBuT8suezEp4 zk9U&vj)uqi{dlLi6A#(;vmT>(>(kIH7iZ?yVWw!0@sNUFWP0w#K-o=pl*iBA4v!k}#@AvAXeykL;m+|p; zdEgtqYH#oE?_9)NTV4P2ZMNIJ;$@S;Jj1Nsl{fWTBcB_N$9oovrDxqQ$w_w*53{M; zcXHj`->UCN&DUAH{3+jZGTvSuC-l=!l6HGXVesKNIZgfuNq2ahO%q4^^wZF+x7zh= z8W^(qV6(O}%C9&#C*$?9(UFhl;c<6xgOfSC7;I(J>1q94ThHge`^lhx@@Z7%x1pEE zFC6lU#va?zek-3*&m7O&-E=SvBfaoz_-!>D#)G8u`&fvHmyOye-2d`hd-?qN>R(xY zjrca$B!$>=)Zk!NAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!89g1^gbnlx>qV0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+K*a?7U$BZbR5=6)5FkK+009C72oNAZ-~j}DUhn{j zLJ%N8fB*pk1PBl)set)FNgF3^0t5&UAV7cs0RjXF5FoGw0zOw*f+mzG1>>be&64Uj z0ps=?MJZlf*RSi=m!il8Bze{C>ib{5=5%#=@&4(IoQw4{R$EMf0D;m8xUSu;W!&!O zX0PQInAf$aK>uLx%|EkePRS>9xfa_5ex~K*M1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlya3SFT3KohpMwt-XWsSS_ju~Qub}-25FkK+009C7 z2oNAZfB*pk1Qsc9-RE1y`r>(!xjHrh0(TQ|{@%^)zI$Ege%<=}9^d{12oNAZfWYkq z=JoS=UdOq;!1O)Jmhbc`EM0bi+m5r_ZolmH%Z~s70t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+0D;FN@Z|B7s!{?32oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXFEL6bXcQ2IR!3hu` zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C^FYsjPGn60!0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5(DS-|($s@z?*5gg`Hi8d;N009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK;VG{{JqTsK?*~F009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oRXFfbW6M8QkFr5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF r5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXFR7v1J3;b!{ literal 0 HcmV?d00001 diff --git a/orchestration/datasets/DatasetB/provenance_info.json b/orchestration/datasets/DatasetB/provenance_info.json new file mode 100644 index 0000000..6f01c9d --- /dev/null +++ b/orchestration/datasets/DatasetB/provenance_info.json @@ -0,0 +1,47 @@ +{ + "catalog_name": "DatasetB", + "catalog_type": "object", + "total_rows": 80, + "epoch": "J2000", + "ra_column": "ra", + "dec_column": "dec", + "version": "0.2.1", + "generation_date": "2024.02.20", + "tool_args": { + "tool_name": "hipscat_import", + "version": "0.2.1", + "runtime_args": { + "catalog_name": "DatasetB", + "output_path": "../data-sample/hipscat/", + "output_artifact_name": "DatasetB", + "tmp_dir": "", + "overwrite": true, + "dask_tmp": "", + "dask_n_workers": 4, + "dask_threads_per_worker": 1, + "catalog_path": "../data-sample/hipscat/DatasetB", + "tmp_path": "../data-sample/hipscat/DatasetB/intermediate", + "epoch": "J2000", + "catalog_type": "object", + "input_path": "../data-sample/raw/B", + "input_paths": [ + "file:///home/singulani/projects/slurm_lsdb/import/../data-sample/raw/B/datasetB.parquet" + ], + "input_format": "parquet", + "input_file_list": [], + "ra_column": "ra", + "dec_column": "dec", + "use_hipscat_index": false, + "sort_columns": "z", + "constant_healpix_order": -1, + "highest_healpix_order": 7, + "pixel_threshold": 1000000, + "mapping_healpix_order": 7, + "debug_stats_only": false, + "file_reader_info": { + "input_reader_type": "ParquetReader", + "chunksize": 500000 + } + } + } +} diff --git a/orchestration/datasets/README b/orchestration/datasets/README new file mode 100644 index 0000000..a225db4 --- /dev/null +++ b/orchestration/datasets/README @@ -0,0 +1 @@ +# Directory that will contain the example datasets. \ No newline at end of file diff --git a/orchestration/pipelines/cross_lsdb_dev/VERSION b/orchestration/pipelines/cross_lsdb_dev/VERSION new file mode 100644 index 0000000..8a9ecc2 --- /dev/null +++ b/orchestration/pipelines/cross_lsdb_dev/VERSION @@ -0,0 +1 @@ +0.0.1 \ No newline at end of file diff --git a/orchestration/pipelines/cross_lsdb_dev/config.py b/orchestration/pipelines/cross_lsdb_dev/config.py new file mode 100644 index 0000000..4961cd0 --- /dev/null +++ b/orchestration/pipelines/cross_lsdb_dev/config.py @@ -0,0 +1,51 @@ +from pydantic import BaseModel +import os + +DATASETS_DIR = os.getenv("DATASETS_DIR", "/datasets") + + +class Instance(BaseModel): + processes: int = 1 + memory: str = "123GiB" + queue: str = "cpu" + job_extra_directives: list[str] = ["--propagate", "--time=2:00:00"] + + +class Adapt(BaseModel): + maximum_jobs: int = 10 + + +class LIneASlurm(BaseModel): + instance: Instance = Instance() + adapt: Adapt = Adapt() + + +class Local(BaseModel): + n_workers: int = 2 + threads_per_worker: int = 2 + memory_limit: str = "1GiB" + + +class Inputs(BaseModel): + photo: str = f"{DATASETS_DIR}/DatasetA" + specz: str = f"{DATASETS_DIR}/DatasetB" + + +class Executor(BaseModel): + local: Local = Local() + linea_slurm: LIneASlurm = LIneASlurm() + + +class Config(BaseModel): + output_dir: str = "./output" + executor: Executor = Executor() + inputs: Inputs = Inputs() + + +if __name__ == "__main__": + import yaml + + cfg = Config() + + with open('config.yml', 'w') as outfile: + yaml.dump(cfg.model_dump(), outfile) diff --git a/orchestration/pipelines/cross_lsdb_dev/environment.yml b/orchestration/pipelines/cross_lsdb_dev/environment.yml new file mode 100644 index 0000000..673503b --- /dev/null +++ b/orchestration/pipelines/cross_lsdb_dev/environment.yml @@ -0,0 +1,14 @@ +name: pipe_cross_lsdb_dev +channels: + - defaults +dependencies: + - python=3.10 + - pip: + - PyYaml + - dask==2024.1.0 + - distributed==2024.1.0 + - dask-jobqueue==0.8.2 + - hipscat==0.2.1 + - hipscat-import==0.2.1 + - lsdb==0.1.0 + diff --git a/orchestration/pipelines/cross_lsdb_dev/install.sh b/orchestration/pipelines/cross_lsdb_dev/install.sh new file mode 100755 index 0000000..11d0265 --- /dev/null +++ b/orchestration/pipelines/cross_lsdb_dev/install.sh @@ -0,0 +1,34 @@ +#!/bin/bash --login + +source `dirname $CONDA_EXE`/activate || { echo "Failed to activate Conda environment"; exit 1; } + +if [ ! -d "$PIPELINES_DIR" ]; then + echo "Error: PIPELINES_DIR not defined." + exit 1 +fi + +PIPE_BASE="$PIPELINES_DIR/cross_lsdb_dev" +HASENV=`conda env list | grep 'pipe_cross_lsdb_dev '` + +if [ -z "$HASENV" ]; then + echo "Create virtual environment..." + conda env create -f $PIPE_BASE/environment.yml + echo "Virtual environment created and packages installed." +else + if [ "$CONDA_FORCE_UPDATE" == "yes" ]; then + echo "Virtual environment already exists. Updating..." + conda env update --file $PIPE_BASE/environment.yml --prune + fi +fi + +conda activate pipe_cross_lsdb_dev + +export PATH=$PATH:"$PIPE_BASE/scripts/" + +if [ -z "$PYTHONPATH" ]; then + export PYTHONPATH="$PIPE_BASE/packages/" +else + export PYTHONPATH=$PYTHONPATH:"$PIPE_BASE/packages/" +fi + +echo "Conda Environment: $CONDA_DEFAULT_ENV" diff --git a/orchestration/pipelines/cross_lsdb_dev/packages/__init__.py b/orchestration/pipelines/cross_lsdb_dev/packages/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/orchestration/pipelines/cross_lsdb_dev/packages/executor.py b/orchestration/pipelines/cross_lsdb_dev/packages/executor.py new file mode 100755 index 0000000..fcb6746 --- /dev/null +++ b/orchestration/pipelines/cross_lsdb_dev/packages/executor.py @@ -0,0 +1,47 @@ +"""_summary_ """ + +from dask.distributed import LocalCluster +from dask_jobqueue import SLURMCluster +from utils import load_yml +import logging +from typing import Union + + +def get_executor_config( + executor_key: str, config_file: str +) -> Union[LocalCluster, SLURMCluster]: + """returns the configuration of where the pipeline will be run + + Args: + executor_key (str): executor key + config_file (str): config path + + Returns: + Union[LocalCluster, SLURMCluster]: Executor object + """ + + logger = logging.getLogger() + logger.info("Getting executor config: %s", executor_key) + + configs = load_yml(config_file) + + try: + config = configs["executor"][executor_key] + except KeyError: + logger.warning("The executor key not found. Using minimal local config.") + executor_key = "minimal" + + match executor_key: + case "local": + cluster = LocalCluster(**config) + case "linea-slurm": + icfg = config["instance"] + cluster = SLURMCluster(**icfg) + cluster.adapt(**config["adapt"]) + case _: + cluster = LocalCluster( + n_workers=1, + threads_per_worker=1, + ) + + return cluster diff --git a/orchestration/pipelines/cross_lsdb_dev/packages/utils.py b/orchestration/pipelines/cross_lsdb_dev/packages/utils.py new file mode 100755 index 0000000..2ce2487 --- /dev/null +++ b/orchestration/pipelines/cross_lsdb_dev/packages/utils.py @@ -0,0 +1,46 @@ +"""_summary_ """ + +import yaml +import logging +import os +import pathlib +from typing import Any + + +def setup_logger(name="pipeline-logger"): + """ + Configures the logger for recording events and messages. + + Returns: + logging.Logger: Configured logger instance. + """ + + logger = logging.getLogger(name) + logger.setLevel(logging.DEBUG) + + formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") + + logdir = os.getenv("LOG_DIR", ".") + + file_handler = logging.FileHandler(pathlib.Path(logdir, f"{name}.log")) + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(formatter) + + logger.addHandler(file_handler) + + return logger + + +def load_yml(filepath: str) -> Any: + """Load yaml file + + Args: + filepath (str): filepath + + Returns: + Any: yaml file content + """ + with open(filepath, encoding="utf-8") as _file: + content = yaml.safe_load(_file) + + return content diff --git a/orchestration/pipelines/cross_lsdb_dev/run.sh b/orchestration/pipelines/cross_lsdb_dev/run.sh new file mode 100755 index 0000000..093ce76 --- /dev/null +++ b/orchestration/pipelines/cross_lsdb_dev/run.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# Check if the argument was given +if [ $# -eq 0 ]; then + echo "Error: No arguments provided." + exit 1 +fi + +ARGS=$@ +shift $# + +if [ ! -d "$DASK_EXECUTOR_KEY" ]; then + export DASK_EXECUTOR_KEY=local +fi + +if [ ! -d "$PIPELINES_DIR" ]; then + echo "Error: PIPELINES_DIR not defined." + exit 1 +fi + +INSTALL_PIPE="$PIPELINES_DIR/cross_lsdb_dev/install.sh" + +if [ ! -f "$INSTALL_PIPE" ]; then + echo "Error: Installation script not found." + exit 1 +fi + +# Installing pipeline +echo "Installing pipeline..." +. "$INSTALL_PIPE" + +set -xe + +# Run the Python code with the given argument +# run-crossmatch $ARGS || { echo "Failed to run-crossmatch"; exit 1; } +run-crossmatch $ARGS + +echo $? >> return.code + +echo "Done." \ No newline at end of file diff --git a/orchestration/pipelines/cross_lsdb_dev/scripts/run-crossmatch b/orchestration/pipelines/cross_lsdb_dev/scripts/run-crossmatch new file mode 100755 index 0000000..831ec92 --- /dev/null +++ b/orchestration/pipelines/cross_lsdb_dev/scripts/run-crossmatch @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 + +import argparse +import time +import os +from pathlib import Path +from dask.distributed import Client +import lsdb + +from utils import setup_logger, load_yml +from executor import get_executor_config + + +def run(config_file): + """Run lsdb crossmatch + + Args: + config_file (str): lsdb parameters + """ + + logger = setup_logger(name="cross-lsdb") + + start_time_full = time.time() + + # Loading configurations + pipe_config = load_yml(config_file) + param = pipe_config.get("inputs") + logger.info("Parameters: %s", param) + + executor_key = os.getenv("DASK_EXECUTOR_KEY", "local") + cluster = get_executor_config(executor_key, config_file) + + with Client(cluster): + phot_dp0 = lsdb.read_hipscat(param.get("photo")) + spec_dp0 = lsdb.read_hipscat(param.get("specz")) + + cross = spec_dp0.crossmatch(phot_dp0) + data = cross.compute() + + os.makedirs(pipe_config.get("output_dir"), exist_ok=True) + outputfile = Path(pipe_config.get("output_dir"), "cross-output.parquet") + data.to_parquet(outputfile) + + logger.info("--> Object Count: \n%s", str(data.count())) + + cluster.close() + + logger.info("Time elapsed: %s", str(time.time() - start_time_full)) + + +if __name__ == "__main__": + # Create the parser and add arguments + parser = argparse.ArgumentParser() + parser.add_argument(dest="config_path", help="yaml config path") + + args = parser.parse_args() + config_path = args.config_path + + # Run pipeline + run(config_path) diff --git a/orchestration/pipelines/load_pipelines.sh b/orchestration/pipelines/load_pipelines.sh new file mode 100755 index 0000000..1749f37 --- /dev/null +++ b/orchestration/pipelines/load_pipelines.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +cat << EOF > ${PIPELINES_DIR}/pipelines.yaml +cross_lsdb_dev: + display_name: 'LSDB Crossmatch (dev)' + path: '${PIPELINES_DIR}/cross_lsdb_dev' + executor: 'local' # only to orchestration + runner: 'bash' + executable: 'run.sh' + schema_config: '${PIPELINES_DIR}/cross_lsdb_dev/config.py' + version: '0.0.1' +EOF \ No newline at end of file diff --git a/orchestration/pipelines/pipelines.yaml b/orchestration/pipelines/pipelines.yaml new file mode 100644 index 0000000..18c911c --- /dev/null +++ b/orchestration/pipelines/pipelines.yaml @@ -0,0 +1,8 @@ +cross_lsdb_dev: + display_name: 'LSDB Crossmatch (dev)' + path: '/pipelines/cross_lsdb_dev' + executor: 'local' # only to orchestration + runner: 'bash' + executable: 'run.sh' + schema_config: '/pipelines/cross_lsdb_dev/config.py' + version: '0.0.1' diff --git a/orchestration/pipelines/pipelines.yaml.template b/orchestration/pipelines/pipelines.yaml.template new file mode 100644 index 0000000..18c911c --- /dev/null +++ b/orchestration/pipelines/pipelines.yaml.template @@ -0,0 +1,8 @@ +cross_lsdb_dev: + display_name: 'LSDB Crossmatch (dev)' + path: '/pipelines/cross_lsdb_dev' + executor: 'local' # only to orchestration + runner: 'bash' + executable: 'run.sh' + schema_config: '/pipelines/cross_lsdb_dev/config.py' + version: '0.0.1' diff --git a/orchestration/rabbitmq/enabled_plugins b/orchestration/rabbitmq/enabled_plugins new file mode 100644 index 0000000..2843682 --- /dev/null +++ b/orchestration/rabbitmq/enabled_plugins @@ -0,0 +1 @@ +[rabbitmq_management, rabbitmq_management_visualiser]. \ No newline at end of file diff --git a/pipeline.py b/pipeline.py new file mode 100644 index 0000000..40b3872 --- /dev/null +++ b/pipeline.py @@ -0,0 +1,16 @@ +from core.models import ProductType +from django.db import models + + +class Pipeline(models.Model): + + name = models.CharField(max_length=255, unique=True) + display_name = models.CharField(max_length=255, null=True, blank=True) + description = models.TextField(null=True, blank=True) + created_at = models.DateTimeField(auto_now_add=True) + use_release = models.BooleanField(default=False) + product_types = models.ManyToManyField(ProductType, related_name="pipelines") + default_config = models.JSONField(null=True, blank=True) + + def __str__(self): + return f"{self.name}" \ No newline at end of file From cdb4c5ffc6622542c047eb6fb681787da008e923 Mon Sep 17 00:00:00 2001 From: Cristiano Singulani Date: Mon, 8 Jul 2024 21:26:58 +0000 Subject: [PATCH 02/20] Integration with orchestration completed. --- .devcontainer/devcontainer.json | 2 +- .devcontainer/docker-compose.yml | 11 +- .gitignore | 4 + README.md | 4 +- backend/Dockerfile | 1 + backend/core/_typing.py | 2 + backend/core/maestro.py | 428 ++++++++++++++++++ ...ocess_orchestration_process_id_and_more.py | 24 + ..._alter_process_orchestration_process_id.py | 18 + ...8_alter_process_path_alter_release_name.py | 22 + backend/core/models/process.py | 13 +- backend/core/models/release.py | 2 +- backend/core/product_steps.py | 279 ++++++++++++ backend/core/serializers/process.py | 17 +- backend/core/tasks.py | 111 +++++ backend/core/utils.py | 10 +- backend/core/views/__init__.py | 43 ++ backend/core/views/create_product.py | 155 ------- backend/core/views/pipeline.py | 5 +- backend/core/views/process.py | 89 +++- backend/core/views/product.py | 11 +- backend/core/views/registry_product.py | 115 ----- backend/pz-beat.sh | 15 + backend/pzserver/__init__.py | 3 + backend/pzserver/celery.py | 30 ++ backend/pzserver/settings.py | 59 ++- backend/pzserver/urls.py | 11 +- backend/requirements.txt | 3 + backend/start.sh | 1 - backend/worker.sh | 19 + docker-compose-development-orch.yml | 114 +++++ docker-compose-development.yml | 2 +- docker-compose-production.yml | 3 +- env_template | 24 +- nginx_development-orch.conf | 92 ++++ orchestration/.orchestration-env | 6 +- .../pipelines/cross_lsdb_dev/config.py | 4 +- .../pipelines/cross_lsdb_dev/config.yml | 19 + .../cross_lsdb_dev/packages/utils.py | 15 +- .../cross_lsdb_dev/scripts/run-crossmatch | 32 +- 40 files changed, 1479 insertions(+), 339 deletions(-) create mode 100644 backend/core/maestro.py create mode 100644 backend/core/migrations/0036_process_orchestration_process_id_and_more.py create mode 100644 backend/core/migrations/0037_alter_process_orchestration_process_id.py create mode 100644 backend/core/migrations/0038_alter_process_path_alter_release_name.py create mode 100644 backend/core/product_steps.py create mode 100644 backend/core/tasks.py delete mode 100644 backend/core/views/create_product.py delete mode 100644 backend/core/views/registry_product.py create mode 100755 backend/pz-beat.sh create mode 100644 backend/pzserver/celery.py create mode 100755 backend/worker.sh create mode 100644 docker-compose-development-orch.yml create mode 100644 nginx_development-orch.conf create mode 100644 orchestration/pipelines/cross_lsdb_dev/config.yml diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 1f9cd4b..0fe018e 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,7 +1,7 @@ // For format details, see https://aka.ms/devcontainer.json. For config options, see the // README at: https://github.com/devcontainers/templates/tree/main/src/postgres { - "name": "Python 3 & PostgreSQL", + "name": "Pz Server", "dockerComposeFile": [ "../docker-compose.yml", "docker-compose.yml" diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 1e91e4d..296c83c 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -5,13 +5,14 @@ services: build: context: . dockerfile: .devcontainer/Dockerfile + # dockerfile: backend/Dockerfile volumes: - ..:/workspaces:cached - - ./archive/log/backend:/archive/log - - ./archive/data:/archive/data - - ./orchestration/pipelines:/pipelines - - ./orchestration/processes:/processes - - ./orchestration/datasets:/datasets + - ./archive/log/backend:/archive/log:cached + - ./archive/data:/archive/data:cached + - ./orchestration/pipelines:/pipelines:cached + - ./orchestration/processes:/processes:cached + - ./orchestration/datasets:/datasets:cached # Overrides default command so things don't shut down after the process ends. command: sleep infinity diff --git a/.gitignore b/.gitignore index 3ccd2ab..1447fac 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,7 @@ orchestration/logs orchestration/rabbitmq/* !orchestration/rabbitmq/enabled_plugins +saml2 + +*.pyc +__pycache__ \ No newline at end of file diff --git a/README.md b/README.md index 697fb4d..bf32cf2 100644 --- a/README.md +++ b/README.md @@ -134,8 +134,8 @@ Another important detail is that the `CLIENT ID` and `SECRET KEY` value from the ``` bash # Client ID and Client Secret must be registered in Django Admin # after backend Setup, in the Django Oauth Applications interface -ORC_CLIENT_ID=wD85gkYeqGEQvVWv5o3Cx6ppBlfDl2S88dek8Exp -ORC_CLIENT_SECRET=eM2dhhxa2vovfaAXmMwqR1M8TdGhVmBjT7co5uaA9pI4aKPDZGxtBtDG5LHfhHvZUabbSP5aUDRpTLpUJAiGS0ScNuhktbuCwuSPiz0bmEftEROJ3ZzzKp2aDNO7Vx0k +ORCHEST_CLIENT_ID=wD85gkYeqGEQvVWv5o3Cx6ppBlfDl2S88dek8Exp +ORCHEST_CLIENT_SECRET=eM2dhhxa2vovfaAXmMwqR1M8TdGhVmBjT7co5uaA9pI4aKPDZGxtBtDG5LHfhHvZUabbSP5aUDRpTLpUJAiGS0ScNuhktbuCwuSPiz0bmEftEROJ3ZzzKp2aDNO7Vx0k ``` This is enough to have orchestration working with an image pinned to `orchestration/docker-compose.yml`. If you want to change the orchestration version, just change the image in `orchestration/docker-compose.yml` diff --git a/backend/Dockerfile b/backend/Dockerfile index 6c24b5c..8368457 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -15,6 +15,7 @@ RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ build-essential \ libpcre3 \ libpcre3-dev \ + iputils-ping \ && apt-get clean \ && apt-get autoclean \ && apt-get autoremove --purge -y \ diff --git a/backend/core/_typing.py b/backend/core/_typing.py index 7520a79..eb68665 100644 --- a/backend/core/_typing.py +++ b/backend/core/_typing.py @@ -3,3 +3,5 @@ FilePath = Union[str, "PathLike[str]"] Column = Union[str, int] +OpInt = Union[int, None] +OpStr = Union[str, None] diff --git a/backend/core/maestro.py b/backend/core/maestro.py new file mode 100644 index 0000000..0abb477 --- /dev/null +++ b/backend/core/maestro.py @@ -0,0 +1,428 @@ +""" +Classes to communicate with the Orchestration app +""" + +import base64 +import json +import os +from typing import Any +from urllib.parse import urljoin + +import requests + + +class Maestro: + + def __init__(self, url): + self.url = url + self.api = MaestroApi(self.url) + + def get_processes(self) -> list: + """Returns list of processes + + Returns: + list: processes list + """ + + url = f"{self.url}/api/processes/" + _response = self.api.get_request(url) + return self.__handle_action(_response) # type: ignore + + def start(self, pipeline, config=None) -> dict[str, Any]: # type: ignore + """Start process in Orchestration app. + + Args: + pipeline (str): pipeline name. + config (dict, optional): pipeline config. Defaults to None. + + Returns: + dict: process info + """ + + url = f"{self.url}/api/processes/" + + if config: + config = json.dumps(config) + + payload = json.dumps( + { + "pipeline": pipeline, + "used_config": config, + } + ) + + _response = self.api.post_request(url, payload=payload) + return self.__handle_action(_response) # type: ignore + + def status(self, orchest_process_id) -> dict: + """Get process status in Orchestration app. + + Args: + orchest_process_id (int): orchestration process ID + + Returns: + dict: process status + """ + + url = f"{self.url}/api/processes/{orchest_process_id}/status/" + _response = self.api.get_request(url) + return self.__handle_action(_response) # type: ignore + + def stop(self, orchest_process_id) -> dict: + """Stop process in Orchestration app. + + Args: + orchest_process_id (int): orchestration process ID + + Returns: + dict: process info + """ + + url = f"{self.url}/api/processes/{orchest_process_id}/stop/" + _response = self.api.get_request(url) + return self.__handle_action(_response) # type: ignore + + def pipelines(self) -> dict: + """Gets pipelines in Orchestration app. + + Returns: + dict: pipelines + """ + + url = f"{self.url}/api/pipelines/" + _response = self.api.get_request(url) + return self.__handle_action(_response) # type: ignore + + def sysinfo(self) -> dict: + """Gets Orchestration app information. + + Returns: + dict: Orchestration app information + """ + + url = f"{self.url}/api/sysinfo/" + _response = self.api.get_request(url) + return self.__handle_action(_response) # type: ignore + + def __handle_action(self, _response) -> dict: + """Handle actions related to orchestration processing control + + Args: + _response (request.Response): Response object + + Returns: + dict + """ + + if "success" in _response and _response.get("success") is False: + raise requests.exceptions.RequestException(_response) + + return _response.get("data") + + +class MaestroApi: + """Responsible for managing all requests to the Orchestration app.""" + + def __init__(self, url): + """Initializes communication with the Orchestration app. + + Args: + url (str): orchestration url. + """ + + self.__url = url + self.__token = self.get_token() + + @staticmethod + def __get_credential() -> bytes: + """Returns a credential to obtain a valid token in the orchestration + app. + + Returns: + credential + """ + + client_id = os.getenv("ORCHEST_CLIENT_ID") + client_secret = os.getenv("ORCHEST_CLIENT_SECRET") + + raw_credential = "{0}:{1}".format(client_id, client_secret) + return base64.b64encode(raw_credential.encode("utf-8")) + + def __check_response(self, api_response) -> dict: + """Checks for possible HTTP errors in the response. + + Args: + api_response (request.Response): Response object + + Returns: + dict: response content. + """ + status_code = api_response.status_code + + data = { + "status_code": status_code, + "message": str(), + "data": str(), + "response_object": api_response, + } + + content_type = api_response.headers.get("content-type", "") + + if 200 <= status_code < 300: + data.update({"success": True, "message": "Request completed"}) + if status_code != 204 and content_type.strip().startswith( + "application/json" + ): + data.update({"data": api_response.json()}) + else: + if content_type.strip().startswith("application/json"): + content = api_response.json() + detail = content.get("detail", content) + message = content.get("error", detail) + else: + message = api_response.text + data.update({"success": False, "message": message}) + + return data + + def __send_request( + self, + prerequest, + stream=False, + timeout=None, + verify=True, + cert=None, + proxies=None, + ) -> dict: + """Sends PreparedRequest object. + + Args: + prerequest (requests.PreparedRequest): PreparedRequest object + stream (optional): Whether to stream the request content. + timeout (float or tuple) (optional): How long to wait for the + server to send data before giving up, as a float, or a + (connect timeout, read timeout) tuple. + verify (optional): Either a boolean, in which case it controls + whether we verify the servers TLS certificate, or a string, + in which case it must be a path to a CA bundle to use + cert (optional): Any user-provided SSL certificate to be trusted. + proxies (optional): The proxies dictionary to apply to the request. + + Returns: + dict: response content + + Example: { + "status_code": int, + "message": str, + "data": str, + "success": bool, + "response_object": request.Response + } + """ + + data = { + "success": False, + "message": "", + "response_object": None, + } + + try: + api_session = requests.Session() + api_response = api_session.send( + prerequest, + stream=stream, + timeout=timeout, + verify=verify, + cert=cert, + proxies=proxies, + ) + data.update(self.__check_response(api_response)) + except requests.exceptions.HTTPError as errh: + message = f"Http Error: {errh}" + data.update( + { + "success": False, + "message": message, + } + ) + except requests.exceptions.ConnectionError as errc: + message = f"Connection Error: {errc}" + data.update( + { + "success": False, + "message": message, + } + ) + except requests.exceptions.Timeout as errt: + message = f"Timeout Error: {errt}" + data.update( + { + "success": False, + "message": message, + } + ) + except requests.exceptions.RequestException as err: + message = f"Request Error: {err}" + data.update( + { + "success": False, + "message": message, + } + ) + + return data + + def get_default_headers(self): + """Gets default header to Orchestration app.""" + + token_type = self.__token.get("token_type") + token = self.__token.get("access_token") + authorization = f"{token_type} {token}" + + headers = requests.utils.default_headers() + headers.update( + { + "Accept": "application/json", + "Content-Type": "application/json", + "Authorization": authorization, + } + ) + + return headers + + def get_request(self, url, params=None, headers=None) -> dict: + """Get a record from the API. + + Args: + url (str): url to get + params (dict, optional): params to get. Defaults to None. + headers (dict, optional): dictionary of headers to send. + Defaults to None. + + Returns: + dict: data of the request. + """ + + # if not headers or not isinstance(headers, dict): + if not headers: + headers = self.get_default_headers() + + _response = requests.Request("GET", url, params=params, headers=headers) + return self.__send_request(_response.prepare()) + + def post_request(self, url, payload, headers=None) -> dict: + """Posts a record to the API. + + Args: + url (str): url to post. + payload (str): payload to post. + headers (dict, optional): dictionary of headers to send. + Defaults to None. + + Returns: + dict: data of the request. + """ + + # if not headers or not isinstance(headers, dict): + if not headers: + headers = self.get_default_headers() + + req = requests.Request( + "POST", + url, + data=payload, + headers=headers, + ) + return self.__send_request(req.prepare()) + + def options_request(self, url, headers=None) -> dict: + """Returns the options and settings for a given endpoint. + + Args: + url (str): url to get + params (dict, optional): params to get. Defaults to None. + headers (dict, optional): dictionary of headers to send. + Defaults to None. + + Returns: + dict: data of the request. + """ + + if not headers or not isinstance(headers, dict): + headers = self.get_default_headers() + + req = requests.Request( + "OPTIONS", + url, + headers=headers, + ) + return self.__send_request(req.prepare()) + + def delete_request(self, url, headers=None) -> dict: + """Remove a record from the API. + + Args: + url (str): url to delete with the record id. + headers (dict, optional): dictionary of headers to send. + Defaults to None. + + Returns: + dict: status and message of the request. + """ + + req = requests.Request( + "DELETE", + url, + headers=headers, + ) + _response = self.__send_request(req.prepare()) + + if _response.get("status_code") == 400: + return { + "success": False, + "message": "The server failed to perform the operation.", + "status_code": 400, + } + + return _response + + def get_token(self) -> dict: + """Gets access token in Orchestration app + + Returns: + dict: _description_ + """ + + credential = self.__get_credential() + token_url = urljoin(self.__url, "o/token/") + token_auth = f"Basic {credential.decode()}" + payload = "grant_type=client_credentials" + + headers = requests.utils.default_headers() + headers.update( + { + "Cache-Control": "no-cache", + "Content-Type": "application/x-www-form-urlencoded", + "Authorization": token_auth, + } + ) + + _response = self.post_request(token_url, payload, headers=headers) + + if "success" in _response and _response.get("success") is False: + raise requests.exceptions.RequestException(_response.get("message")) + + return _response.get("data") # type: ignore + + def token_is_valid(self): + """ + Checks if the token is valid, otherwise stops class initialization. + """ + # TODO + return True + + +if __name__ == "__main__": + maestro = Maestro("http://orchestrator") + processes = maestro.get_processes() + print(processes) diff --git a/backend/core/migrations/0036_process_orchestration_process_id_and_more.py b/backend/core/migrations/0036_process_orchestration_process_id_and_more.py new file mode 100644 index 0000000..aeb180c --- /dev/null +++ b/backend/core/migrations/0036_process_orchestration_process_id_and_more.py @@ -0,0 +1,24 @@ +# Generated by Django 5.0.6 on 2024-06-25 20:45 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0035_remove_process_upload_product_type_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='process', + name='orchestration_process_id', + field=models.IntegerField(default=1000), + preserve_default=False, + ), + migrations.AlterField( + model_name='process', + name='status', + field=models.CharField(default='Pending', max_length=255), + ), + ] diff --git a/backend/core/migrations/0037_alter_process_orchestration_process_id.py b/backend/core/migrations/0037_alter_process_orchestration_process_id.py new file mode 100644 index 0000000..b19b73f --- /dev/null +++ b/backend/core/migrations/0037_alter_process_orchestration_process_id.py @@ -0,0 +1,18 @@ +# Generated by Django 5.0.6 on 2024-06-25 20:51 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0036_process_orchestration_process_id_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='process', + name='orchestration_process_id', + field=models.IntegerField(blank=True, default=None, null=True), + ), + ] diff --git a/backend/core/migrations/0038_alter_process_path_alter_release_name.py b/backend/core/migrations/0038_alter_process_path_alter_release_name.py new file mode 100644 index 0000000..c11ad17 --- /dev/null +++ b/backend/core/migrations/0038_alter_process_path_alter_release_name.py @@ -0,0 +1,22 @@ +# Generated by Django 5.0.6 on 2024-07-05 19:49 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("core", "0037_alter_process_orchestration_process_id"), + ] + + operations = [ + migrations.AlterField( + model_name="process", + name="path", + field=models.CharField(blank=True, default=None, max_length=255, null=True), + ), + migrations.AlterField( + model_name="release", + name="name", + field=models.CharField(max_length=255, unique=True), + ), + ] diff --git a/backend/core/models/process.py b/backend/core/models/process.py index 6226ccc..a206943 100644 --- a/backend/core/models/process.py +++ b/backend/core/models/process.py @@ -1,7 +1,7 @@ import pathlib import shutil -from core.models import Pipeline, Product, ProductStatus, Release +from core.models import Pipeline, Product, Release from django.conf import settings from django.contrib.auth.models import User from django.db import models @@ -35,14 +35,9 @@ class Process(models.Model): started_at = models.DateTimeField(null=True, blank=True) ended_at = models.DateTimeField(null=True, blank=True) task_id = models.CharField(max_length=255, null=True, blank=True, default=None) - status = models.IntegerField( - verbose_name="Status", - default=ProductStatus.REGISTERING, - choices=ProductStatus.choices, - ) - path = models.FilePathField( - verbose_name="Path", null=True, blank=True, default=None - ) + orchestration_process_id = models.IntegerField(null=True, blank=True, default=None) + status = models.CharField(max_length=255, default="Pending") + path = models.CharField(max_length=255, null=True, blank=True, default=None) comment = models.TextField(null=True, blank=True) def __str__(self): diff --git a/backend/core/models/release.py b/backend/core/models/release.py index 92f01e3..a025874 100644 --- a/backend/core/models/release.py +++ b/backend/core/models/release.py @@ -2,7 +2,7 @@ class Release(models.Model): - name = models.CharField(max_length=255) + name = models.CharField(max_length=255, unique=True) display_name = models.CharField(max_length=255) description = models.TextField(null=True, blank=True) created_at = models.DateTimeField(auto_now_add=True) diff --git a/backend/core/product_steps.py b/backend/core/product_steps.py new file mode 100644 index 0000000..81dfffe --- /dev/null +++ b/backend/core/product_steps.py @@ -0,0 +1,279 @@ +import logging +import pathlib + +from core.models import Product, ProductContent, ProductFile +from core.product_handle import NotTableError, ProductHandle +from core.serializers import ProductSerializer +from django.conf import settings +from rest_framework.reverse import reverse + + +class CreateProduct: + + def __init__(self, data, user): + """ Create a product with initial information + + Args: + data (django.http.request.QueryDict): Initial information coming + from an http request + user (django.contrib.auth.models.User): User + """ + + self.__log = logging.getLogger("products") + self.__log.debug(f"Creating product: {data}") + + serializer = ProductSerializer(data=data) + serializer.is_valid(raise_exception=True) + + self.__data = self.__perform_create(serializer, user) + self.__check_official_product(user) + + def save(self): + can_save = self.check_product_types() + + if not can_save.get("success"): + return can_save.get("message") + + self.__set_internal_name() + self.__create_product_path() + + self.__log.debug(f"Product ID {self.__data.pk} created") + + def __check_official_product(self, user): + """Checks if the product is official and if the user has permission + to save an official product. + + Args: + user (User): User object + + Raises: + ValueError: if the user no has permission + + Returns: + bool + """ + + is_official = self.__data.official_product + + if is_official: + if user.profile.is_admin() is False: + self.__delete() + raise ValueError( + "Not allowed. Only users with admin permissions " + "can create official products." + ) + + return True + + @property + def data(self): + return self.__data + + def get(self): + """Returns Product object + + Returns: + Product object + """ + return Product.objects.get(pk=self.__data.pk) + + def __set_internal_name(self): + """Sets the internal name based on the primary key and display name""" + + # change spaces to "_", convert to lowercase, remove trailing spaces. + name = self.__data.display_name.replace(" ", "_").lower().strip().strip("\n") + + # strip any non-alphanumeric character except "_" + name = "".join(e for e in name if e.isalnum() or e == "_") + self.__data.internal_name = f"{self.__data.pk}_{name}" + self.__data.save() + + def __create_product_path(self): + """Create product path""" + + # Create product path + relative_path = f"{self.__data.product_type.name}/{self.__data.internal_name}" + path = pathlib.Path(settings.MEDIA_ROOT, relative_path) + path.mkdir(parents=True, exist_ok=True) + + self.__data.path = relative_path + self.__data.save() + + def check_product_types(self): + """Checks product types by applying a certain business rule. + + Returns: + dict: {'message': {'entity':list(str)}, 'status': bool} + """ + + if not self.__data: + return {"message": {"product": ["No data."]}, "success": False,} + + # Release is not allowed in Spec-z Catalog + if ( + self.__data.release + and self.__data.product_type.name == "specz_catalog" + ): + self.__delete() + return { + "message": {"release": [ + "Release must be null on Spec-z Catalogs products." + ]}, "success": False, + } + + # Pzcode is only allowed in Validations Results and Photo-z Table + if self.__data.pz_code and self.__data.product_type.name in ( + "training_set", + "specz_catalog", + ): + dn = self.__data.product_type.display_name + pzc = self.__data.pz_code + self.__delete() + return { + "message": {"pz_code": [ + f"Pz Code must be null on {dn} products. '{pzc}'" + ]}, "success": False, + } + + return {"message": {"product_type": ["Success!"]}, "success": True,} + + def __perform_create(self, serializer, user) -> ProductSerializer: + """Add user""" + + uploaded_by = user + return serializer.save(user=uploaded_by) + + def __delete(self): + """Delete product""" + + if self.__data: + self.__data.path = f"{settings.MEDIA_ROOT}/{self.__data.path}" + self.__data.delete() + self.__data = None + + +class RegistryProduct: + log = None + + def __init__(self, product_id): + self.log = self.get_log() + self.main_file = None + + self.log.info("----------------------------") + self.log.info("Product ID: [%s]" % product_id) + self.product = Product.objects.get(pk=product_id) + self.log.info("Internal Name: [%s]" % self.product.internal_name) + + def get_log(self): + if not self.log: + # Get an instance of a logger + self.log = logging.getLogger("products") + return self.log + + def registry(self): + try: + # Alterar o Internal name + if self.product.internal_name is None: + self.product.internal_name = ( + f"{self.product.pk}_{self.product.internal_name}" + ) + self.product.save() + self.log.info( + "Internal Name Updated to: [%s]" % self.product.internal_name + ) + + # Recupera informação do arquivo principal + # pela tabela productFile + mf = self.product.files.get(role=0) + self.main_file = pathlib.Path(mf.file.path) + self.log.info("Main File: [%s]" % self.main_file) + + product_columns = list() + try: + # Le o arquivo principal e converte para pandas.Dataframe + df_product = ProductHandle().df_from_file(self.main_file, nrows=5) + # Lista de Colunas no arquivo. + product_columns = df_product.columns.tolist() + except NotTableError: + # Acontece com arquivos comprimidos .zip etc. + pass + + # Verifica se o product type é specz_catalog + # Para esses produtos é mandatório ter acesso as colunas da tabela + # Para os demais produtos é opicional. + if self.product.product_type.name == "specz_catalog": + if len(product_columns) == 0: + raise Exception( + "It was not possible to identify the product columns. for Spec-z Catalogs this is mandatory. Please check the file format." + ) + + # Registra as colunas do produto no banco de dados. + # é possivel ter produtos sem nenhum registro de coluna + # Essa regra será tratada no frontend. + self.create_product_contents(product_columns) + + # Salva as alterações feitas no model product + self.product.save() + + except Exception as e: + self.log.error(e) + raise Exception(e) + + def create_product_contents(self, columns): + """Registrar as colunas na tabela Product Contents + + Args: + columns (_type_): _description_ + """ + try: + cached_ucds = dict() + + # Remove todas as colunas caso exista + for col in self.product.contents.all(): + # Caso a coluna tenha valor de UCD esse sera mantido ao recriar a coluna com mesmo nome + cached_ucds[col.column_name] = {"ucd": col.ucd, "alias": col.alias} + col.delete() + + for idx, column_name in enumerate(columns): + ucd = None + alias = None + if column_name in cached_ucds: + ucd = cached_ucds[column_name]["ucd"] + alias = cached_ucds[column_name]["alias"] + + ProductContent.objects.create( + product=self.product, + column_name=column_name, + order=idx, + ucd=ucd, + alias=alias, + ) + + self.log.info(f"{len(columns)} product contents have been registered") + + except Exception as e: + message = f"Failed to register product content. {e}" + self.log.error(message) + raise Exception(message) + + def create_product_file(self, filepath, role=0): + """ Create product file + + Args: + filepath (str): Product path + """ + + _file = pathlib.Path(filepath) + + fileobj = ProductFile( + name=_file.name, + role=role, + product=self.product, + size=_file.stat().st_size, + file=str(_file), + product_id=self.product.pk, + extension=_file.suffix + ) + + return fileobj.save() + \ No newline at end of file diff --git a/backend/core/serializers/process.py b/backend/core/serializers/process.py index 3ffd00e..56bc962 100644 --- a/backend/core/serializers/process.py +++ b/backend/core/serializers/process.py @@ -1,4 +1,4 @@ -from core.models import Process, Product, Release +from core.models import Process, Release from rest_framework import serializers @@ -7,13 +7,9 @@ class ProcessSerializer(serializers.ModelSerializer): release = serializers.PrimaryKeyRelatedField( queryset=Release.objects.all(), many=False, allow_null=True, required=False ) - # upload = serializers.PrimaryKeyRelatedField( - # queryset=Product.objects.all(), many=False - # ) release_name = serializers.SerializerMethodField() pipeline_name = serializers.SerializerMethodField() pipeline_version = serializers.SerializerMethodField() - status = serializers.SerializerMethodField() owned_by = serializers.SerializerMethodField() is_owner = serializers.SerializerMethodField() # can_delete = serializers.SerializerMethodField() @@ -21,8 +17,11 @@ class ProcessSerializer(serializers.ModelSerializer): class Meta: model = Process - read_only_fields = ("pipeline_version", "is_owner", "upload", "status") - exclude = ("user", "path") + read_only_fields = ( + "pipeline_version", "is_owner", "upload", "status", + "orchestration_process_id", "started_at", "ended_at", "path" + ) + exclude = ("user", "task_id") def get_pipeline_name(self, obj): return obj.pipeline.name @@ -30,10 +29,6 @@ def get_pipeline_name(self, obj): def get_pipeline_version(self, obj): return obj.pipeline.version - def get_status(self, obj): - return obj.upload.status - # return "REG" - def get_release_name(self, obj): try: return obj.release.display_name diff --git a/backend/core/tasks.py b/backend/core/tasks.py new file mode 100644 index 0000000..bd7983f --- /dev/null +++ b/backend/core/tasks.py @@ -0,0 +1,111 @@ +import logging +import pathlib +import shutil + +from celery import shared_task +from core.maestro import Maestro +from core.models import Process +from core.models.product_file import FileRoles +from core.product_steps import RegistryProduct +from core.utils import load_yaml +from django.conf import settings +from django.utils import dateparse, timezone + +logger = logging.getLogger('beat') +maestro = Maestro(settings.ORCHEST_URL) + + +@shared_task() +def check_processes_finish(): + logger.info("Checking running processes...") + + procs_updated = [] + active_statuses = ['Pending', 'Running'] + procs_running = Process.objects.filter(status__in=active_statuses) + + for proc in procs_running: + logger.info(f"Consulting the {str(proc)} process status.") + proc_orches_id = proc.orchestration_process_id # type: ignore + + if not proc_orches_id: + message = f"Process {str(proc.pk)} without Orchestration ID." + logger.error(message) + proc.status = "Failed" + proc = update_dates(proc, {}) + proc.save() + continue + + proc_orchest = maestro.status(proc_orches_id) + proc_orchest_status = proc_orchest.get('status') # type: ignore + + logger.info(f"-> Process orchestration ID: {proc_orches_id}") + logger.info(f"-> Status: {proc_orchest_status}") + + if proc_orchest_status == 'Running' and not proc.status: + started_at = proc_orchest.get('started_at', str(proc.created_at)) + proc.started_at = dateparse.parse_datetime(started_at) + proc.save() + + if not proc_orchest_status in active_statuses: + proc.status = proc_orchest_status + proc = update_dates(proc, proc_orchest) + proc.save() + logger.info(f"-> Process {str(proc)} updated.") + procs_updated.append(proc_orches_id) + + return procs_updated + +def update_dates(process, data): + started_at = data.get('started_at', str(process.created_at)) + ended_at = data.get('ended_at', str(timezone.now())) + process.started_at = dateparse.parse_datetime(started_at) + process.ended_at = dateparse.parse_datetime(ended_at) + return process + + +def register_outputs(process_id): + """_summary_ + + Args: + process_id (_type_): _description_ + """ + + file_roles = dict(FileRoles.choices) + file_roles = {str(v).lower(): k for k, v in file_roles.items()} + + process = Process.objects.get(pk=process_id) + process_dir = pathlib.Path(settings.PROCESSING_DIR, process.path) + process_file = process_dir.joinpath("process.yml") + + reg_product = RegistryProduct(process.upload.pk) + + process_file_dict = load_yaml(process_file) + outputs = process_file_dict.get('outputs', None) + + try: + for output in outputs: + filepath = output.get('path') + rolename = output.get('role') + role_id = file_roles.get(rolename, file_roles.get('description')) + upload_path = copy_upload(filepath, process.upload.path) + reg_product.create_product_file(upload_path, role_id) + process.upload.save() + + reg_product.registry() + process.upload.status = 1 # Published status + except Exception as error: + process.upload.status = 9 # Failed status + logger.error("--> Failed to upload register <--") + logger.error(error) + + process.upload.save() + +def copy_upload(filepath, upload_dir): + filepath = pathlib.Path(filepath) + new_filepath = pathlib.Path(settings.MEDIA_ROOT, upload_dir, filepath.name) + shutil.copyfile(str(filepath), str(new_filepath)) + return str(new_filepath) + + +if __name__ == "__main__": + register_outputs(5) diff --git a/backend/core/utils.py b/backend/core/utils.py index 85237bc..dce0a14 100644 --- a/backend/core/utils.py +++ b/backend/core/utils.py @@ -12,10 +12,14 @@ logger = logging.getLogger() +def load_yaml(filepath, encoding="utf-8"): + with open(filepath, encoding=encoding) as _file: + return yaml.safe_load(_file) + + def get_pipelines(): sys_pipes_file = pathlib.Path(settings.PIPELINES_DIR, 'pipelines.yaml') - with open(sys_pipes_file, encoding="utf-8") as _file: - return yaml.safe_load(_file) + return load_yaml(sys_pipes_file) def get_pipeline(name): @@ -78,7 +82,7 @@ def get_returncode(process_dir): return -1 -def format_query_to_char(key, value, fields): +def format_query_to_char(key, value, fields) -> Q: condition = Q.OR if key.endswith("__or") else Q.AND values = value.split(",") query = Q() diff --git a/backend/core/views/__init__.py b/backend/core/views/__init__.py index bac1068..a12f595 100644 --- a/backend/core/views/__init__.py +++ b/backend/core/views/__init__.py @@ -1,3 +1,4 @@ +from core.maestro import Maestro from core.views.pipeline import PipelineViewSet from core.views.process import ProcessViewSet from core.views.product import ProductViewSet @@ -7,3 +8,45 @@ from core.views.release import ReleaseViewSet from core.views.user import (CsrfToOauth, GetToken, LoggedUserView, Logout, UserViewSet) +from django.conf import settings +from rest_framework import status +from rest_framework.response import Response +from rest_framework.views import APIView + + +class OrchestrationInfoView(APIView): + + http_method_names = ["get",] + + def get(self, request): + try: + maestro = Maestro(url=settings.ORCHEST_URL) + data = maestro.sysinfo() + processing_dir = data.get("processing_dir", "") + if processing_dir != settings.PROCESSING_DIR: + raise ValueError(( + f"PROCESSING_DIR ({settings.PROCESSING_DIR}) is " + f"different in the orchestration: {processing_dir}" + )) + code_status = status.HTTP_200_OK + except Exception as err: + data = {"error": str(err)} + code_status = status.HTTP_500_INTERNAL_SERVER_ERROR + + return Response(data, status=code_status) + + +class OrchestrationPipelinesView(APIView): + + http_method_names = ["get",] + + def get(self, request): + try: + maestro = Maestro(url=settings.ORCHEST_URL) + data = maestro.pipelines() + code_status = status.HTTP_200_OK + except Exception as err: + data = {"error": str(err)} + code_status = status.HTTP_500_INTERNAL_SERVER_ERROR + + return Response(data, status=code_status) \ No newline at end of file diff --git a/backend/core/views/create_product.py b/backend/core/views/create_product.py deleted file mode 100644 index 9d7f53f..0000000 --- a/backend/core/views/create_product.py +++ /dev/null @@ -1,155 +0,0 @@ -import logging -import pathlib - -from core.models import Product -from core.serializers import ProductSerializer -from django.conf import settings - - -class CreateProduct: - - def __init__(self, data, user): - self.__log = logging.getLogger("create_product") - self.__log.debug(f"Creating product: {data}") - - serializer = ProductSerializer(data=data) - serializer.is_valid(raise_exception=True) - - self.__data = self.__perform_create(serializer, user) - self.__check_official_product(user) - - def save(self): - can_save = self.check_product_types() - - if not can_save.get("success"): - return can_save.get("message") - - self.__set_internal_name() - self.__create_product_path() - - self.__log.debug(f"Product ID {self.__data.pk} created") - - def __check_official_product(self, user): - """Checks if the product is official and if the user has permission - to save an official product. - - Args: - user (User): User object - - Raises: - ValueError: if the user no has permission - - Returns: - bool - """ - - is_official = self.__data.official_product - - if is_official: - if user.profile.is_admin() is False: - self.__delete() - raise ValueError( - "Not allowed. Only users with admin permissions " - "can create official products." - ) - - return True - - @property - def data(self): - return self.__data - - def get(self): - """Returns Product object - - Returns: - Product object - """ - return Product.objects.get(pk=self.__data.pk) - - def __set_internal_name(self): - """Sets the internal name based on the primary key and display name""" - - # change spaces to "_", convert to lowercase, remove trailing spaces. - name = self.__data.display_name.replace(" ", "_").lower().strip().strip("\n") - - # strip any non-alphanumeric character except "_" - name = "".join(e for e in name if e.isalnum() or e == "_") - self.__data.internal_name = f"{self.__data.pk}_{name}" - self.__data.save() - - def __create_product_path(self): - """Create product path""" - - # Create product path - relative_path = f"{self.__data.product_type.name}/{self.__data.internal_name}" - path = pathlib.Path(settings.MEDIA_ROOT, relative_path) - path.mkdir(parents=True, exist_ok=True) - - self.__data.path = relative_path - self.__data.save() - - def check_product_types(self): - """Checks product types by applying a certain business rule. - - Returns: - dict: {'message': {'entity':list(str)}, 'status': bool} - """ - - if not self.__data: - return {"message": {"product": ["No data."]}, "success": False,} - - # Release is not allowed in Spec-z Catalog - if ( - self.__data.release - and self.__data.product_type.name == "specz_catalog" - ): - self.__delete() - return { - "message": {"release": [ - "Release must be null on Spec-z Catalogs products." - ]}, "success": False, - } - - # Pzcode is only allowed in Validations Results and Photo-z Table - if self.__data.pz_code and self.__data.product_type.name in ( - "training_set", - "specz_catalog", - ): - dn = self.__data.product_type.display_name - pzc = self.__data.pz_code - self.__delete() - return { - "message": {"pz_code": [ - f"Pz Code must be null on {dn} products. '{pzc}'" - ]}, "success": False, - } - - return {"message": {"product_type": ["Success!"]}, "success": True,} - - def __perform_create(self, serializer, user): - """Add user""" - - uploaded_by = user - return serializer.save(user=uploaded_by) - - def __delete(self): - """Delete product""" - - if self.__data: - self.__data.path = f"{settings.MEDIA_ROOT}/{self.__data.path}" - self.__data.delete() - self.__data = None - - - - - - - - - - - - - diff --git a/backend/core/views/pipeline.py b/backend/core/views/pipeline.py index 814c3aa..b462d85 100644 --- a/backend/core/views/pipeline.py +++ b/backend/core/views/pipeline.py @@ -1,9 +1,13 @@ +import logging + from core import models from core.serializers import PipelineSerializer from rest_framework import viewsets from rest_framework.decorators import action from rest_framework.response import Response +logger = logging.getLogger("django") + class PipelineViewSet(viewsets.ReadOnlyModelViewSet): queryset = models.Pipeline.objects.all() @@ -23,4 +27,3 @@ def api_schema(self, request): meta = self.metadata_class() data = meta.determine_metadata(request, self) return Response(data) - \ No newline at end of file diff --git a/backend/core/views/process.py b/backend/core/views/process.py index c0e515e..c5e2133 100644 --- a/backend/core/views/process.py +++ b/backend/core/views/process.py @@ -1,12 +1,20 @@ +import json +import logging +import pathlib + +from core.maestro import Maestro from core.models import Pipeline, Process +from core.product_steps import CreateProduct from core.serializers import ProcessSerializer from core.utils import format_query_to_char -from core.views.create_product import CreateProduct +from django.conf import settings from django_filters import rest_framework as filters from rest_framework import exceptions, status, viewsets from rest_framework.decorators import action from rest_framework.response import Response +logger = logging.getLogger("django") + class ProcessFilter(filters.FilterSet): release__isnull = filters.BooleanFilter( @@ -75,22 +83,69 @@ def create(self, request): instance = self.perform_create(serializer) print("INSTANCE: ", instance) - print("INSTANCE type: ", type(instance)) process = Process.objects.get(pk=instance.pk) process.save() + except Exception as e: + content = {"error": str(e)} + return Response(content, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + + try: + maestro = Maestro(url=settings.ORCHEST_URL) + + release_path = None + if process.release: + release_path = str(pathlib.Path( + settings.DATASETS_DIR, process.release.name + )) + + used_config = {} + if process.used_config: + used_config = process.used_config + + _inputs = process.inputs.all() + print("INPUTS: ", _inputs) + + inputfiles = [] + + for _input in _inputs: + print("INPUT: ", _input) + main_file = _input.files.get(role=0) + filepath = pathlib.Path(settings.MEDIA_ROOT, _input.path, main_file.name) + print("FILEPATH: ", filepath) + inputfiles.append(str(filepath)) + + used_config['inputfiles'] = inputfiles + used_config['inputs'] = {'release': release_path} + print("USED CONFIG: ", used_config) + + orchestration_process = maestro.start( + pipeline=process.pipeline.name, + config=used_config + ) + + print("ORCHESTRATION PROCESS: ", orchestration_process) + + process.orchestration_process_id = orchestration_process.get('id') + process.used_config = json.loads( + orchestration_process.get('used_config', None) + ) + process.path = orchestration_process.get('path_str') + process.save() data = self.get_serializer(instance=process).data return Response(data, status=status.HTTP_201_CREATED) - except Exception as e: - content = {"error": str(e)} + content = {"error": f"Orchestration API failure: {str(e)}"} return Response(content, status=status.HTTP_500_INTERNAL_SERVER_ERROR) def perform_create(self, serializer): """Add user and upload""" owned_by = self.request.user + + #TODO: testar path pro release + upload = self.create_initial_upload(serializer, owned_by) return serializer.save(user=owned_by, upload=upload) @@ -104,7 +159,7 @@ def create_initial_upload(self, serializer, user): "pz_code": data.get("pz_code", None), "official_product": data.get("official_product", False), "description": data.get("description", None), - "product_type": pipeline.output_product_type.pk, + "product_type": pipeline.output_product_type.pk, # type: ignore } product = CreateProduct(upload_data, user) check_prodtype = product.check_product_types() @@ -120,6 +175,30 @@ def api_schema(self, request): meta = self.metadata_class() data = meta.determine_metadata(request, self) return Response(data) + + @action(methods=["GET"], detail=True) + def stop(self, request): + try: + instance = self.get_object() + _id = instance.pk + process = Process.objects.get(pk=_id) + orchestration_process_id = process.orchestration_process_id + + if not orchestration_process_id: + raise ValueError(f"Process[{_id}]: orchestration process not found.") + + maestro = Maestro(url=settings.ORCHEST_URL) + orcdata = maestro.stop(orchestration_process_id) + process.status = orcdata.get("status", "Stopping*") + process.save() + data = self.get_serializer(instance=process).data + code_status = status.HTTP_200_OK + except Exception as err: + data = {"error": str(err)} + code_status = status.HTTP_500_INTERNAL_SERVER_ERROR + + logger.info("Process[%s]: %s", str(process), data) + return Response(data, status=code_status) def destroy(self, request, pk=None, *args, **kwargs): """Product can only be deleted by the OWNER or if the user diff --git a/backend/core/views/product.py b/backend/core/views/product.py index 1aa6c28..03860f1 100644 --- a/backend/core/views/product.py +++ b/backend/core/views/product.py @@ -9,13 +9,11 @@ from core.models import Product from core.product_handle import FileHandle, NotTableError +from core.product_steps import CreateProduct, RegistryProduct from core.serializers import ProductSerializer from core.utils import format_query_to_char -from core.views.create_product import CreateProduct -from core.views.registry_product import RegistryProduct from django.conf import settings from django.core.paginator import Paginator -from django.db.models import Q from django.http import FileResponse from django_filters import rest_framework as filters from rest_framework import exceptions, status, viewsets @@ -87,6 +85,13 @@ class ProductViewSet(viewsets.ModelViewSet): ordering = ["-created_at"] def create(self, request): + + print('PRODUCT -> ', request.data) + print('PRODUCT (type) -> ', type(request.data)) + + print("USER -> ", request.user) + print("USER (type) -> ", type(request.user)) + try: product = CreateProduct(request.data, request.user) check_prodtype = product.check_product_types() diff --git a/backend/core/views/registry_product.py b/backend/core/views/registry_product.py deleted file mode 100644 index 8752d80..0000000 --- a/backend/core/views/registry_product.py +++ /dev/null @@ -1,115 +0,0 @@ -import logging -import shutil -from pathlib import Path - -from core.models import Product, ProductContent -from core.product_handle import NotTableError, ProductHandle - - -class RegistryProduct: - log = None - product = None - main_file = None - - def __init__(self, product_id): - self.log = self.get_log() - - self.log.info("----------------------------") - - self.log.info("Product ID: [%s]" % product_id) - - self.product = Product.objects.get(pk=product_id) - - self.log.info("Internal Name: [%s]" % self.product.internal_name) - - def get_log(self): - if not self.log: - # Get an instance of a logger - self.log = logging.getLogger("registry_product") - return self.log - - def registry(self): - try: - # Alterar o Internal name - if self.product.internal_name is None: - self.product.internal_name = ( - f"{self.product.pk}_{self.product.internal_name}" - ) - self.product.save() - self.log.info( - "Internal Name Updated to: [%s]" % self.product.internal_name - ) - - # Recupera informação do arquivo principal - # pela tabela productFile - mf = self.product.files.get(role=0) - self.main_file = Path(mf.file.path) - self.log.info("Main File: [%s]" % self.main_file) - - product_columns = list() - try: - # Le o arquivo principal e converte para pandas.Dataframe - df_product = ProductHandle().df_from_file(self.main_file, nrows=5) - # Lista de Colunas no arquivo. - product_columns = df_product.columns.tolist() - except NotTableError: - # Acontece com arquivos comprimidos .zip etc. - pass - - # Verifica se o product type é specz_catalog - # Para esses produtos é mandatório ter acesso as colunas da tabela - # Para os demais produtos é opicional. - if self.product.product_type.name == "specz_catalog": - if len(product_columns) == 0: - raise Exception( - "It was not possible to identify the product columns. for Spec-z Catalogs this is mandatory. Please check the file format." - ) - - # Registra as colunas do produto no banco de dados. - # é possivel ter produtos sem nenhum registro de coluna - # Essa regra será tratada no frontend. - self.create_product_contents(product_columns) - - # Salva as alterações feitas no model product - self.product.save() - - except Exception as e: - self.log.error(e) - raise Exception(e) - - def create_product_contents(self, columns): - """Registrar as colunas na tabela Product Contents - - Args: - columns (_type_): _description_ - """ - try: - cached_ucds = dict() - - # Remove todas as colunas caso exista - for col in self.product.contents.all(): - # Caso a coluna tenha valor de UCD esse sera mantido ao recriar a coluna com mesmo nome - cached_ucds[col.column_name] = {"ucd": col.ucd, "alias": col.alias} - col.delete() - - for idx, column_name in enumerate(columns): - ucd = None - alias = None - if column_name in cached_ucds: - ucd = cached_ucds[column_name]["ucd"] - alias = cached_ucds[column_name]["alias"] - - ProductContent.objects.create( - product=self.product, - column_name=column_name, - order=idx, - ucd=ucd, - alias=alias, - ) - - self.log.info(f"{len(columns)} product contents have been registered") - - except Exception as e: - message = f"Failed to register product content. {e}" - self.log.error(message) - raise Exception(message) diff --git a/backend/pz-beat.sh b/backend/pz-beat.sh new file mode 100755 index 0000000..521b500 --- /dev/null +++ b/backend/pz-beat.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +set -o errexit +set -o pipefail +set -o nounset + +echo "Starting Celery Beat" + +rm -rf /tmp/celerybeat.pid + +celery -A pzserver beat \ + -l "${LOGGING_LEVEL}" \ + -s /tmp/celerybeat-schedule \ + --pidfile="/tmp/celerybeat.pid" \ + --logfile="/archive/log/celerybeat.log" \ No newline at end of file diff --git a/backend/pzserver/__init__.py b/backend/pzserver/__init__.py index e69de29..fb989c4 100644 --- a/backend/pzserver/__init__.py +++ b/backend/pzserver/__init__.py @@ -0,0 +1,3 @@ +from .celery import app as celery_app + +__all__ = ('celery_app',) diff --git a/backend/pzserver/celery.py b/backend/pzserver/celery.py new file mode 100644 index 0000000..4a547ca --- /dev/null +++ b/backend/pzserver/celery.py @@ -0,0 +1,30 @@ +import os + +from celery import Celery + +# Set the default Django settings module for the 'celery' program. +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'pzserver.settings') + +app = Celery('orchestration') + +# Using a string here means the worker doesn't have to serialize +# the configuration object to child processes. +# - namespace='CELERY' means all celery-related configuration keys +# should have a `CELERY_` prefix. +app.config_from_object('django.conf:settings', namespace='CELERY') + +# https://docs.celeryq.dev/en/stable/userguide/periodic-tasks.html +app.conf.beat_schedule = { + "check-finish": { + "task": "core.tasks.check_processes_finish", + "schedule": 60.0, + }, +} +app.conf.timezone = "UTC" + +# Load task modules from all registered Django apps. +app.autodiscover_tasks() + +@app.task(bind=True, ignore_result=True) +def debug_task(self): + print(f'Request: {self.request!r}') \ No newline at end of file diff --git a/backend/pzserver/settings.py b/backend/pzserver/settings.py index f5613b0..9922491 100644 --- a/backend/pzserver/settings.py +++ b/backend/pzserver/settings.py @@ -40,6 +40,8 @@ # "django.contrib.sites", # Third-party "corsheaders", + "django_celery_beat", + "django_celery_results", "django_filters", "rest_framework", "drf_spectacular", @@ -102,6 +104,27 @@ } } + +# rabbitmq +AMQP_HOST = os.getenv("RABBITMQ_HOST", "rabbitmq") +AMQP_PORT = os.getenv("RABBITMQ_PORT","5672") +AMQP_USER = os.getenv("RABBITMQ_DEFAULT_USER", "orcadmin") +AMQP_PASS = os.getenv("RABBITMQ_DEFAULT_PASS", "adminorc") +AMQP_VHOST = os.getenv("RABBITMQ_DEFAULT_VHOST", "/") + + +# Celery Configuration Options +CELERY_BROKER_URL = ( + f"amqp://{AMQP_USER}:{AMQP_PASS}@{AMQP_HOST}:{AMQP_PORT}{AMQP_VHOST}" +) +CELERY_CACHE_BACKEND = "django-cache" +CELERY_RESULT_BACKEND = "django-db" +CELERY_RESULT_EXTENDED = True +CELERY_TIMEZONE = "UTC" +CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP = True +CELERY_TASK_TRACK_STARTED = True +CELERY_TASK_TIME_LIMIT = 30 * 60 + # Password validation # https://docs.djangoproject.com/en/4.0/ref/settings/#auth-password-validators @@ -155,6 +178,13 @@ # https://docs.djangoproject.com/en/4.1/ref/settings/#csrf-cookie-name CSRF_COOKIE_NAME = "pzserver.csrftoken" +# Orchestration +ORCHEST_URL = os.getenv("ORCHEST_URL", None) + +if ORCHEST_URL: + ORCHEST_CLIENT_ID = os.getenv("ORCHEST_CLIENT_ID") + ORCHEST_CLIENT_SECRET = os.getenv("ORCHEST_CLIENT_SECRET") + REST_FRAMEWORK = { "DEFAULT_SCHEMA_CLASS": "drf_spectacular.openapi.AutoSchema", "DEFAULT_AUTHENTICATION_CLASSES": ( @@ -254,18 +284,26 @@ "backupCount": 5, "formatter": "standard", }, - "shibboleth": { + "beat": { "level": LOGGING_LEVEL, "class": "logging.handlers.RotatingFileHandler", - "filename": os.path.join(LOG_DIR, "shibboleth.log"), + "filename": os.path.join(LOG_DIR, "celerybeat.log"), "maxBytes": 1024 * 1024 * 5, # 5 MB "backupCount": 5, "formatter": "standard", }, - "registry_product": { + # "shibboleth": { + # "level": LOGGING_LEVEL, + # "class": "logging.handlers.RotatingFileHandler", + # "filename": os.path.join(LOG_DIR, "shibboleth.log"), + # "maxBytes": 1024 * 1024 * 5, # 5 MB + # "backupCount": 5, + # "formatter": "standard", + # }, + "products": { "level": LOGGING_LEVEL, "class": "logging.handlers.RotatingFileHandler", - "filename": os.path.join(LOG_DIR, "registry_product.log"), + "filename": os.path.join(LOG_DIR, "products.log"), "maxBytes": 1024 * 1024 * 5, # 5 MB "backupCount": 5, "formatter": "standard", @@ -287,13 +325,18 @@ "level": LOGGING_LEVEL, "propagate": True, }, - "shibboleth": { - "handlers": ["shibboleth"], + "beat": { + "handlers": ["beat"], "level": LOGGING_LEVEL, "propagate": True, }, - "registry_product": { - "handlers": ["registry_product"], + # "shibboleth": { + # "handlers": ["shibboleth"], + # "level": LOGGING_LEVEL, + # "propagate": True, + # }, + "products": { + "handlers": ["products"], "level": LOGGING_LEVEL, "propagate": True, }, diff --git a/backend/pzserver/urls.py b/backend/pzserver/urls.py index 768a7ec..9701e4e 100644 --- a/backend/pzserver/urls.py +++ b/backend/pzserver/urls.py @@ -15,9 +15,11 @@ """ # from core.api import viewsets as products_viewsets from core.views import (CsrfToOauth, GetToken, LoggedUserView, Logout, + OrchestrationInfoView, OrchestrationPipelinesView, PipelineViewSet, ProcessViewSet, ProductContentViewSet, ProductFileViewSet, ProductTypeViewSet, ProductViewSet, ReleaseViewSet, UserViewSet) +from django.conf import settings from django.contrib import admin from django.urls import include, path from drf_spectacular.views import (SpectacularAPIView, SpectacularRedocView, @@ -28,13 +30,14 @@ route.register(r"users", UserViewSet, basename="users") route.register(r"releases", ReleaseViewSet, basename="releases") -route.register(r"pipelines", PipelineViewSet, basename="pipelines") -route.register(r"processes", ProcessViewSet, basename="processes") route.register(r"product-types", ProductTypeViewSet, basename="product_types") route.register(r"products", ProductViewSet, basename="products") route.register(r"product-contents", ProductContentViewSet, basename="product_contents") route.register(r"product-files", ProductFileViewSet, basename="product_files") +if settings.ORCHEST_URL: + route.register(r"pipelines", PipelineViewSet, basename="pipelines") + route.register(r"processes", ProcessViewSet, basename="processes") from rest_framework.authtoken import views @@ -58,3 +61,7 @@ ), path("api/redoc/", SpectacularRedocView.as_view(url_name="schema"), name="redoc"), ] + +if settings.ORCHEST_URL: + urlpatterns.append(path("api/sysinfo/", OrchestrationInfoView.as_view())) + urlpatterns.append(path("api/orch_pipelines/", OrchestrationPipelinesView.as_view())) diff --git a/backend/requirements.txt b/backend/requirements.txt index 8b598c1..33b7c66 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,6 +1,9 @@ astropy==6.1.0 coverage==7.5.1 +celery Django==5.0.6 +django-celery-results +django-celery-beat django-cors-headers==4.3.1 django-filebrowser-no-grappelli==4.0.2 django-filter==24.2 diff --git a/backend/start.sh b/backend/start.sh index b0e1ee6..5cefecc 100644 --- a/backend/start.sh +++ b/backend/start.sh @@ -4,7 +4,6 @@ python manage.py migrate --noinput python manage.py collectstatic --noinput --clear # Para produção é necessário usar o uWSGI! -# uWSGI para servir o app e ter compatibilidade com Shibboleth # https://uwsgi-docs.readthedocs.io/en/latest/WSGIquickstart.html uwsgi \ --socket 0.0.0.0:8000 \ diff --git a/backend/worker.sh b/backend/worker.sh new file mode 100755 index 0000000..10de117 --- /dev/null +++ b/backend/worker.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -o errexit +set -o pipefail +set -o nounset + +host=$(hostname) + +sleep 5 + +echo "Starting Celery Worker" + +rm -rf /tmp/local-*.pid + +celery -A pzserver worker \ + -l "${LOGGING_LEVEL}" \ + --pidfile="/tmp/local-%n.pid" \ + --logfile="/archive/log/${host}%I.log" \ + --concurrency=2 \ No newline at end of file diff --git a/docker-compose-development-orch.yml b/docker-compose-development-orch.yml new file mode 100644 index 0000000..cb4a124 --- /dev/null +++ b/docker-compose-development-orch.yml @@ -0,0 +1,114 @@ +name: "pz" + +services: + database: + image: postgres:13.6-alpine + env_file: + - .env + expose: + # Deixa a porta do banco de dados acessivel + # para ferramentas externas como o dbeaver por exemplo. + - 5432 + networks: + - default + volumes: + - ./pg_data:/var/lib/postgresql/data + + backend: &pzbackend + build: + context: ./backend + args: + - "USERID=${USERID:-1000}" + - "GROUPID=${GROUPID:-1000}" + - "USERNAME=${USERNAME:-pzapp}" + command: /start.sh + user: "1000:1000" + env_file: + - .env + networks: + - default + - orchestration-network + volumes: + - ./backend:/app + - ./archive/log/backend:/archive/log + - ./archive/data:/archive/data + - ./orchestration/pipelines:/pipelines + - ./orchestration/processes:/processes + - ./orchestration/datasets:/datasets + extra_hosts: + - "host.docker.internal:host-gateway" + depends_on: + - database + - orchestration-api + + pz-beat: + <<: *pzbackend + hostname: "pzbeat" + command: /app/pz-beat.sh + depends_on: + - backend + + pz-worker: + <<: *pzbackend + hostname: "pzworker" + command: /app/worker.sh + depends_on: + - backend + + frontend: + image: node:lts + user: "1000:1000" + working_dir: /app + volumes: + - ./frontend:/app + - ./.env.local:/app/.env.local + stdin_open: true + command: yarn dev + networks: + - default + + orchestration-api: + extends: + file: ./orchestration/docker-compose.yml + service: orchestration-api + + orchestration-local-worker: + extends: + file: ./orchestration/docker-compose.yml + service: orchestration-local-worker + + orchestration-flower: + extends: + file: ./orchestration/docker-compose.yml + service: orchestration-flower + + rabbitmq: + extends: + file: ./orchestration/docker-compose.yml + service: rabbitmq + + orchestrator: + extends: + file: ./orchestration/docker-compose.yml + service: orchestrator + + nginx: + image: nginx:1.21.6-alpine + ports: + - 80:8080 + volumes: + - ./nginx_development-orch.conf:/etc/nginx/conf.d/default.conf:ro + - ./archive/data:/var/www/data + - ./backend/htmlcov:/var/www/coverage + networks: + - default + depends_on: + - backend + - frontend + - rabbitmq + - orchestration-flower + +networks: + orchestration-network: + external: true + driver: bridge diff --git a/docker-compose-development.yml b/docker-compose-development.yml index 770ad80..89df488 100644 --- a/docker-compose-development.yml +++ b/docker-compose-development.yml @@ -1,4 +1,4 @@ -version: "3.9" +name: "pz" services: database: diff --git a/docker-compose-production.yml b/docker-compose-production.yml index f9da851..dfda181 100644 --- a/docker-compose-production.yml +++ b/docker-compose-production.yml @@ -1,7 +1,6 @@ -version: "3.9" +name: "pz" services: - backend: # ATENÇÃO: Substitua a imagem pela versão desejada image: linea/pzserver:backend_ diff --git a/env_template b/env_template index 1f01b94..1e94226 100644 --- a/env_template +++ b/env_template @@ -1,4 +1,4 @@ -# Postgres +# Postgres POSTGRES_USER=pzadmin POSTGRES_PASSWORD=adminpz POSTGRES_DB=pzdev @@ -20,6 +20,14 @@ AUTORELOAD=1 # SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY=Z-cVO6l8catnpijVIKXcwwaKeGJTzJ2hiItn3lw2a4M +# AMQP +RABBITMQ_HOST="rabbitmq" +RABBITMQ_PORT="5672" +RABBITMQ_ERLANG_COOKIE="SWQOKODSQALRPCLNMEQG" +RABBITMQ_DEFAULT_USER="orcadmin" +RABBITMQ_DEFAULT_PASS="adminorc" +RABBITMQ_DEFAULT_VHOST="/" + # Database DB_ENGINE=django.db.backends.postgresql DB_USER=pzadmin @@ -38,12 +46,20 @@ DJANGO_CSRF_TRUSTED_ORIGINS=http://localhost http://127.0.0.1 # DJANGO_OAUTH_CLIENT_ID= # DJANGO_OAUTH_CLIENT_SECRET= +# Configure the ORCHEST_URL variable with an active instance of an orchestration app +# ORCHEST_URL= + +# Client ID and Client Secret must be registered in Django Admin +# after backend Setup, in the Django Oauth Applications interface +# ORCHEST_CLIENT_ID= +# ORCHEST_CLIENT_SECRET= + # Github OAuth # To enable Github OAuth authentication, # uncomment and fill in the following variables. -#GITHUB_CLIENT_ID= -#GITHUB_CLIENT_SECRET= -#GITHUB_ORG_NAME=linea-it +# GITHUB_CLIENT_ID= +# GITHUB_CLIENT_SECRET= +# GITHUB_ORG_NAME=linea-it # Shibboleth / Satosa Auth # Url para login utilizando Shibboleth diff --git a/nginx_development-orch.conf b/nginx_development-orch.conf new file mode 100644 index 0000000..b131819 --- /dev/null +++ b/nginx_development-orch.conf @@ -0,0 +1,92 @@ +upstream pzapi { + server backend:8000; +} + +upstream pzfrontend { + server frontend:3000; +} + +server { + listen 8080; + server_name localhost; + + client_max_body_size 200M; + + charset utf-8; + + gzip on; + gzip_comp_level 9; + gzip_min_length 128; + gzip_types text/plain application/javascript application/x-javascript text/javascript text/xml text/css application/json application/vnd.ms-fontobject application/x-font-ttf font/opentype image/svg+xml image/x-icon; + + proxy_send_timeout 120s; + proxy_read_timeout 120s; + fastcgi_send_timeout 120s; + fastcgi_read_timeout 120s; + + # access_log /var/log/nginx/host.access.log main; + + # Proxy pass to frontend development server with live relead + # Based in this article: https://nathanfriend.io/2018/05/14/live-reloading-an-angular-2-app-behind-nginx.html + location / { + proxy_pass http://pzfrontend; + + # proxy_pass_request_headers on; + # proxy_set_header HTTP_AUTHORIZATION $http_authorization; + + proxy_set_header X-Forwarded-For $remote_addr; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header Host $host; + + # live reload + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_cache_bypass $http_upgrade; + } + + location /ws { + proxy_pass http://frontend; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + } + + location /api { + include uwsgi_params; + uwsgi_pass pzapi; + } + + location /admin { + include uwsgi_params; + uwsgi_pass pzapi; + } + + location /django_static { + include uwsgi_params; + uwsgi_pass pzapi; + } + + location /archive/data { + alias /var/www/data/; + try_files $uri $uri/ /index.html; + autoindex off; + } + + location /coverage { + alias /var/www/coverage/; + try_files $uri $uri/ /index.html; + autoindex off; + } + + # Rabbitmq Management + location /rabbitmq/ { + proxy_pass http://rabbitmq:15672/; + rewrite ^/rabbitmq/(.*)$ /$1 break; + } + + # Celery Flower + location /flower { + proxy_pass http://orchestration-flower:5555; + } +} \ No newline at end of file diff --git a/orchestration/.orchestration-env b/orchestration/.orchestration-env index 963cb22..cf21b9b 100644 --- a/orchestration/.orchestration-env +++ b/orchestration/.orchestration-env @@ -5,13 +5,11 @@ LOGGING_LEVEL="DEBUG" AUTORELOAD=1 # CORS -DJANGO_ALLOWED_HOSTS="orchestration localhost 127.0.0.1 [::1]" -ALLOWED_HOSTS="orchestration localhost 127.0.0.1 [::1]" -DJANGO_CSRF_TRUSTED_ORIGINS="http://orchestration http://localhost http://127.0.0.1" +DJANGO_ALLOWED_HOSTS="orchestrator localhost 127.0.0.1 [::1]" +DJANGO_CSRF_TRUSTED_ORIGINS="http://orchestrator http://localhost" # AMQP RABBITMQ_HOST="rabbitmq" -# RABBITMQ_HOST="host-gateway" RABBITMQ_PORT="5672" RABBITMQ_ERLANG_COOKIE="SWQOKODSQALRPCLNMEQG" RABBITMQ_DEFAULT_USER="orcadmin" diff --git a/orchestration/pipelines/cross_lsdb_dev/config.py b/orchestration/pipelines/cross_lsdb_dev/config.py index 4961cd0..5b963ef 100644 --- a/orchestration/pipelines/cross_lsdb_dev/config.py +++ b/orchestration/pipelines/cross_lsdb_dev/config.py @@ -48,4 +48,6 @@ class Config(BaseModel): cfg = Config() with open('config.yml', 'w') as outfile: - yaml.dump(cfg.model_dump(), outfile) + data_json = cfg.model_dump() + print(data_json) + yaml.dump(data_json, outfile) diff --git a/orchestration/pipelines/cross_lsdb_dev/config.yml b/orchestration/pipelines/cross_lsdb_dev/config.yml new file mode 100644 index 0000000..079bc21 --- /dev/null +++ b/orchestration/pipelines/cross_lsdb_dev/config.yml @@ -0,0 +1,19 @@ +executor: + linea_slurm: + adapt: + maximum_jobs: 10 + instance: + job_extra_directives: + - --propagate + - --time=2:00:00 + memory: 123GiB + processes: 1 + queue: cpu + local: + memory_limit: 1GiB + n_workers: 2 + threads_per_worker: 2 +inputs: + photo: /datasets/DatasetA + specz: /datasets/DatasetB +output_dir: ./output diff --git a/orchestration/pipelines/cross_lsdb_dev/packages/utils.py b/orchestration/pipelines/cross_lsdb_dev/packages/utils.py index 2ce2487..d23b553 100755 --- a/orchestration/pipelines/cross_lsdb_dev/packages/utils.py +++ b/orchestration/pipelines/cross_lsdb_dev/packages/utils.py @@ -1,11 +1,12 @@ """_summary_ """ -import yaml import logging import os import pathlib from typing import Any +import yaml + def setup_logger(name="pipeline-logger"): """ @@ -44,3 +45,15 @@ def load_yml(filepath: str) -> Any: content = yaml.safe_load(_file) return content + + +def dump_yml(filepath, content, encoding="utf-8"): + """ Dump yaml file + + Args: + filepath (str): filepath output + content (dict): yaml content + """ + + with open(filepath, 'w', encoding=encoding) as _file: + yaml.dump(content, _file) \ No newline at end of file diff --git a/orchestration/pipelines/cross_lsdb_dev/scripts/run-crossmatch b/orchestration/pipelines/cross_lsdb_dev/scripts/run-crossmatch index 831ec92..43fcf19 100755 --- a/orchestration/pipelines/cross_lsdb_dev/scripts/run-crossmatch +++ b/orchestration/pipelines/cross_lsdb_dev/scripts/run-crossmatch @@ -1,14 +1,14 @@ #!/usr/bin/env python3 import argparse -import time import os +import time from pathlib import Path -from dask.distributed import Client -import lsdb -from utils import setup_logger, load_yml +import lsdb +from dask.distributed import Client from executor import get_executor_config +from utils import dump_yml, load_yml, setup_logger def run(config_file): @@ -27,6 +27,7 @@ def run(config_file): param = pipe_config.get("inputs") logger.info("Parameters: %s", param) + executor_key = os.getenv("DASK_EXECUTOR_KEY", "local") cluster = get_executor_config(executor_key, config_file) @@ -41,6 +42,8 @@ def run(config_file): outputfile = Path(pipe_config.get("output_dir"), "cross-output.parquet") data.to_parquet(outputfile) + register_outputs(outputfile) + logger.info("--> Object Count: \n%s", str(data.count())) cluster.close() @@ -48,6 +51,27 @@ def run(config_file): logger.info("Time elapsed: %s", str(time.time() - start_time_full)) +def register_outputs(filepath, role='main'): + """ Register outputs in process.yml + + Args: + filepath (str): output path + role (str, optional): role name. Defaults to 'main'. + """ + + outpath = str(Path(filepath).resolve()) + proc_yaml_file = str(Path('./process.yml').resolve()) + process_info = load_yml(proc_yaml_file) + process_info['outputs'] = [{"path": outpath, "role": role}] + dump_yml(proc_yaml_file, process_info) + + + + + + + + if __name__ == "__main__": # Create the parser and add arguments parser = argparse.ArgumentParser() From 75ea5d40d2d6f56af9076dde12a7d0e7ac83260b Mon Sep 17 00:00:00 2001 From: Cristiano Singulani Date: Tue, 9 Jul 2024 15:04:26 +0000 Subject: [PATCH 03/20] Fixed error in upload registration and test_settings.py --- backend/core/product_handle.py | 2 +- backend/core/tasks.py | 12 ++++++++---- backend/pzserver/test_settings.py | 7 +++++++ 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/backend/core/product_handle.py b/backend/core/product_handle.py index a68842b..d06af5f 100644 --- a/backend/core/product_handle.py +++ b/backend/core/product_handle.py @@ -42,7 +42,7 @@ def __init__(self, filepath: PathLike): match self.extension: case ".csv": self.handle = CsvHandle(fp) - case ".fits" | ".fit" | ".hf5" | ".hdf5" | ".h5" | ".pq": + case ".fits" | ".fit" | ".hf5" | ".hdf5" | ".h5" | ".pq" | ".parquet": self.handle = TableIOHandle(fp) case ".zip" | ".tar" | ".gz": self.handle = CompressedHandle(fp) diff --git a/backend/core/tasks.py b/backend/core/tasks.py index bd7983f..1f5a894 100644 --- a/backend/core/tasks.py +++ b/backend/core/tasks.py @@ -50,11 +50,13 @@ def check_processes_finish(): proc.status = proc_orchest_status proc = update_dates(proc, proc_orchest) proc.save() + register_outputs(proc.pk) logger.info(f"-> Process {str(proc)} updated.") procs_updated.append(proc_orches_id) return procs_updated + def update_dates(process, data): started_at = data.get('started_at', str(process.created_at)) ended_at = data.get('ended_at', str(timezone.now())) @@ -84,6 +86,7 @@ def register_outputs(process_id): try: for output in outputs: + logger.debug('-> output: %s', output) filepath = output.get('path') rolename = output.get('role') role_id = file_roles.get(rolename, file_roles.get('description')) @@ -93,16 +96,17 @@ def register_outputs(process_id): reg_product.registry() process.upload.status = 1 # Published status - except Exception as error: + process.save() + except Exception as _: process.upload.status = 9 # Failed status - logger.error("--> Failed to upload register <--") - logger.error(error) + process.save() + logger.exception("Failed to upload register!") - process.upload.save() def copy_upload(filepath, upload_dir): filepath = pathlib.Path(filepath) new_filepath = pathlib.Path(settings.MEDIA_ROOT, upload_dir, filepath.name) + logger.debug('new_filepath -> %s', str(new_filepath)) shutil.copyfile(str(filepath), str(new_filepath)) return str(new_filepath) diff --git a/backend/pzserver/test_settings.py b/backend/pzserver/test_settings.py index 451943c..b386118 100644 --- a/backend/pzserver/test_settings.py +++ b/backend/pzserver/test_settings.py @@ -151,6 +151,13 @@ # https://docs.djangoproject.com/en/4.1/ref/settings/#csrf-cookie-name CSRF_COOKIE_NAME = "pzserver.csrftoken" +# Orchestration +ORCHEST_URL = os.getenv("ORCHEST_URL", None) + +if ORCHEST_URL: + ORCHEST_CLIENT_ID = os.getenv("ORCHEST_CLIENT_ID") + ORCHEST_CLIENT_SECRET = os.getenv("ORCHEST_CLIENT_SECRET") + REST_FRAMEWORK = { "DEFAULT_SCHEMA_CLASS": "drf_spectacular.openapi.AutoSchema", "DEFAULT_AUTHENTICATION_CLASSES": ( From 60a0bd1984c9400d57d1280f0e89811ef7c7251f Mon Sep 17 00:00:00 2001 From: Cristiano Singulani Date: Tue, 9 Jul 2024 15:56:33 +0000 Subject: [PATCH 04/20] Fixed error in create product (tests) --- backend/core/product_steps.py | 9 ++++++++- backend/core/views/product.py | 6 +++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/backend/core/product_steps.py b/backend/core/product_steps.py index 81dfffe..ceb4977 100644 --- a/backend/core/product_steps.py +++ b/backend/core/product_steps.py @@ -8,6 +8,13 @@ from rest_framework.reverse import reverse +class NonAdminError(ValueError): + def __init__(self, message): + self.log = logging.getLogger("products") + self.log.debug('Debug: %s', message) + super().__init__(message) + + class CreateProduct: def __init__(self, data, user): @@ -58,7 +65,7 @@ def __check_official_product(self, user): if is_official: if user.profile.is_admin() is False: self.__delete() - raise ValueError( + raise NonAdminError( "Not allowed. Only users with admin permissions " "can create official products." ) diff --git a/backend/core/views/product.py b/backend/core/views/product.py index 03860f1..22e3ee5 100644 --- a/backend/core/views/product.py +++ b/backend/core/views/product.py @@ -9,7 +9,7 @@ from core.models import Product from core.product_handle import FileHandle, NotTableError -from core.product_steps import CreateProduct, RegistryProduct +from core.product_steps import CreateProduct, NonAdminError, RegistryProduct from core.serializers import ProductSerializer from core.utils import format_query_to_char from django.conf import settings @@ -105,6 +105,10 @@ def create(self, request): data = self.get_serializer(instance=product.data).data return Response(data, status=status.HTTP_201_CREATED) + except NonAdminError as e: + content = {"error": str(e)} + return Response(content, status=status.HTTP_403_FORBIDDEN) + except Exception as e: content = {"error": str(e)} return Response(content, status=status.HTTP_500_INTERNAL_SERVER_ERROR) From 38081c6d5c2d5a9ca81113c72cb04db58b7140f0 Mon Sep 17 00:00:00 2001 From: Cristiano Singulani Date: Tue, 9 Jul 2024 17:50:04 +0000 Subject: [PATCH 05/20] Fixed small bug --- docker-compose-development-orch.yml | 1 - nginx_development.conf | 39 ----------------------------- 2 files changed, 40 deletions(-) diff --git a/docker-compose-development-orch.yml b/docker-compose-development-orch.yml index cb4a124..dfbacd6 100644 --- a/docker-compose-development-orch.yml +++ b/docker-compose-development-orch.yml @@ -110,5 +110,4 @@ services: networks: orchestration-network: - external: true driver: bridge diff --git a/nginx_development.conf b/nginx_development.conf index 1e5f1e6..7238f18 100644 --- a/nginx_development.conf +++ b/nginx_development.conf @@ -2,10 +2,6 @@ upstream pzapi { server backend:8000; } -upstream orchestapi { - server orchestration:8000; -} - upstream pzfrontend { server frontend:3000; } @@ -83,39 +79,4 @@ server { autoindex off; } - # Rabbitmq Management - location /rabbitmq/ { - proxy_pass http://rabbitmq:15672/; - rewrite ^/rabbitmq/(.*)$ /$1 break; - } -} - -server { - listen 8080; - server_name orchestration; - - location /api { - include uwsgi_params; - uwsgi_pass orchestapi; - } - - location /admin { - include uwsgi_params; - uwsgi_pass orchestapi; - } - - location /o { - include uwsgi_params; - uwsgi_pass orchestapi; - } - - location /django_static { - include uwsgi_params; - uwsgi_pass orchestapi; - } - - # Celery Flower - location /flower { - proxy_pass http://celery_flower:5555; - } } From 924edec1db348ec7aa391cff40a2be6998e1130f Mon Sep 17 00:00:00 2001 From: Cristiano Singulani Date: Tue, 9 Jul 2024 14:54:34 -0300 Subject: [PATCH 06/20] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bf32cf2..dde6df9 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Clone the repository and access the directory: ```bash git clone https://github.com/linea-it/pzserver_app.git cd pzserver_app -mkdir -p archive/data archive/log/archive/log/backend +mkdir -p archive/data archive/log/backend ``` Copy the file `docker-compose-development.yml` and rename to `docker-compose.yml` From 18918b262f5b4408ba77a753aaaaa921f72af408 Mon Sep 17 00:00:00 2001 From: Cristiano Singulani Date: Tue, 9 Jul 2024 20:12:53 +0000 Subject: [PATCH 07/20] Fixed small bugs --- backend/core/fixtures/initial_data.yaml | 13 +++++++++- backend/core/product_steps.py | 4 +-- backend/core/tasks.py | 2 ++ docker-compose-development.yml | 33 +------------------------ 4 files changed, 17 insertions(+), 35 deletions(-) diff --git a/backend/core/fixtures/initial_data.yaml b/backend/core/fixtures/initial_data.yaml index 336d37b..e139db5 100644 --- a/backend/core/fixtures/initial_data.yaml +++ b/backend/core/fixtures/initial_data.yaml @@ -36,4 +36,15 @@ - model: auth.group pk: 1 fields: - name: Admin \ No newline at end of file + name: Admin +- model: core.pipeline + pk: 1 + fields: + name: cross_lsdb_dev + display_name: Cross LSDB (dev) + version: 0.0.1 + description: Test pipeline + created_at: 2022-05-18 15:36:59.830913+00:00 + system_config: {'executor': {'linea_slurm': {'adapt': {'maximum_jobs': 10}, 'instance': {'job_extra_directives': ['--propagate', '--time=2:00:00'], 'memory': '123GiB', 'processes': 1, 'queue': 'cpu'}}, 'local': {'memory_limit': '1GiB', 'n_workers': 2, 'threads_per_worker': 2}}, 'inputs': {'photo': '/datasets/DatasetA', 'specz': '/datasets/DatasetB'}, 'output_dir': './output'} + product_types_accepted: [1,2,4] + output_product_type: 2 \ No newline at end of file diff --git a/backend/core/product_steps.py b/backend/core/product_steps.py index ceb4977..1c43393 100644 --- a/backend/core/product_steps.py +++ b/backend/core/product_steps.py @@ -16,7 +16,6 @@ def __init__(self, message): class CreateProduct: - def __init__(self, data, user): """ Create a product with initial information @@ -201,7 +200,8 @@ def registry(self): df_product = ProductHandle().df_from_file(self.main_file, nrows=5) # Lista de Colunas no arquivo. product_columns = df_product.columns.tolist() - except NotTableError: + except NotTableError as err: + self.log.warn(err) # Acontece com arquivos comprimidos .zip etc. pass diff --git a/backend/core/tasks.py b/backend/core/tasks.py index 1f5a894..311980b 100644 --- a/backend/core/tasks.py +++ b/backend/core/tasks.py @@ -96,9 +96,11 @@ def register_outputs(process_id): reg_product.registry() process.upload.status = 1 # Published status + process.upload.save() process.save() except Exception as _: process.upload.status = 9 # Failed status + process.upload.save() process.save() logger.exception("Failed to upload register!") diff --git a/docker-compose-development.yml b/docker-compose-development.yml index 89df488..b00bf20 100644 --- a/docker-compose-development.yml +++ b/docker-compose-development.yml @@ -6,7 +6,7 @@ services: env_file: - .env expose: - # Deixa a porta do banco de dados acessivel + # Deixa a porta do banco de dados acessivel # para ferramentas externas como o dbeaver por exemplo. - 5432 volumes: @@ -40,34 +40,6 @@ services: stdin_open: true command: yarn dev - orchestration: - extends: - file: ./orchestration/docker-compose.yml - service: orchestration - - celery_local_worker: - extends: - file: ./orchestration/docker-compose.yml - service: celery_local_worker - - celery_flower: - extends: - file: ./orchestration/docker-compose.yml - service: celery_flower - - rabbitmq: - image: "rabbitmq:3.12.12-management" - hostname: "rabbitmq" - env_file: - - ./orchestration/.orchestration-env - ports: - - "15672:15672" - - "5672:5672" - volumes: - - "./orchestration/rabbitmq/enabled_plugins:/etc/rabbitmq/enabled_plugins" - - "./orchestration/rabbitmq/data/:/var/lib/rabbitmq/" - - "./orchestration/rabbitmq/log/:/var/log/rabbitmq/" - nginx: image: nginx:1.21.6-alpine ports: @@ -79,6 +51,3 @@ services: depends_on: - backend - frontend - - orchestration - - rabbitmq - - celery_flower \ No newline at end of file From 73e27187be49c962d84932e96b13e90906fe3933 Mon Sep 17 00:00:00 2001 From: Cristiano Singulani Date: Tue, 9 Jul 2024 17:45:27 -0300 Subject: [PATCH 08/20] Update README.md --- README.md | 70 ++++++++++++++++++++----------------------------------- 1 file changed, 25 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index dde6df9..d4b08d7 100644 --- a/README.md +++ b/README.md @@ -99,6 +99,26 @@ But if a local change is needed, copy the `nginx_development.conf` file to `ngin Also change the `docker-compose.yml` file in the ngnix service at the line `- ./nginx_development.conf:/etc/nginx/conf.d/default.conf:ro`. In this way, the ngnix.conf file represents your local environment, if you make any modifications that are necessary for the project, copy this modification to the template file, as the nginx.conf file is not part of the repository. +### Setting Up a New Application to manage authentication + +Go to Django ADMIN (for local installation, open a web browser and go to the URL: http://localhost/admin) and add a new Application with the following configuration: + +- `client_id` and `client_secret` should be left unchanged (copy these two tokens and paste them as the values for variables `DJANGO_OAUTH_CLIENT_ID` and `DJANGO_OAUTH_CLIENT_SECRET` inside **.env** and **.env.local**). +- `user` should be your superuser numeric ID (if you are the only user, it should be = 1) +- `redirect_uris` should be left blank +- `client_type` should be set to confidential +- `authorization_grant_type` should be set to **'Resource owner password-based'** +- `name` can be set to whatever you'd like +- checkbox for `Skip authorization` should remain unchecked +- `Algorithm`: keep the default option (No OIDC support) + +> **WARNING**: As mentioned above, **BEFORE** clicking on the SAVE button, you must edit the configuration files (**.env** and **.env.local**) and change the variables `DJANGO_OAUTH_CLIENT_ID` and `DJANGO_OAUTH_CLIENT_SECRET` in both files according to the values of `client_id` and `client_secret` respectively. only after editing the configuration files, the `SAVE` button must be pressed. + +![Adding new application](images/new_app.png) + +The installation is done, you can now test the newly configured application. + + ### Orchestration setup The Pz Server uses [orchestration](https://github.com/linea-it/orchestration/) to process its pipelines and for this you need to configure it: @@ -107,40 +127,20 @@ The Pz Server uses [orchestration](https://github.com/linea-it/orchestration/) t mkdir orchestration/db orchestration/logs orchestration/processes ``` -The next step is to add a virtual host to your local machine. On Linux, this must be done by adding the line `127.0.0.1 orchestration` in the `/etc/host`. The file should look like this: - ``` bash -127.0.0.1 localhost -127.0.0.1 orchestration - -# The following lines are desirable for IPv6 capable hosts -::1 ip6-localhost ip6-loopback -fe00::0 ip6-localnet -ff00::0 ip6-mcastprefix -ff02::1 ip6-allnodes -ff02::2 ip6-allrouters +cp docker-compose-development-orch.yml docker-compose.yml ``` -Start the orchestration with the command: - ``` bash -docker-compose up orchestration +docker-compose run orchestration-api bash ``` -And follow the procedure to add an authentication app in this [link](https://github.com/linea-it/orchestration?tab=readme-ov-file#how-to-use-using-client-credential). But be careful because when integrating with the Pz Server, the orchestration will have a different url than `http://localhost`, in this case it will be [http://orchestration/admin/oauth2_provider/application/add/](http://orchestration/admin/oauth2_provider/application/add/). - -Another important detail is that the `CLIENT ID` and `SECRET KEY` value from the previous procedure must be changed in the `.env` of the Pz Server, looking similar to this: - +Dentro do container, criar um usuário admin ``` bash -# Client ID and Client Secret must be registered in Django Admin -# after backend Setup, in the Django Oauth Applications interface -ORCHEST_CLIENT_ID=wD85gkYeqGEQvVWv5o3Cx6ppBlfDl2S88dek8Exp -ORCHEST_CLIENT_SECRET=eM2dhhxa2vovfaAXmMwqR1M8TdGhVmBjT7co5uaA9pI4aKPDZGxtBtDG5LHfhHvZUabbSP5aUDRpTLpUJAiGS0ScNuhktbuCwuSPiz0bmEftEROJ3ZzzKp2aDNO7Vx0k +python manage.py createsuperuser ``` -This is enough to have orchestration working with an image pinned to `orchestration/docker-compose.yml`. If you want to change the orchestration version, just change the image in `orchestration/docker-compose.yml` - -Once this is done, the development environment setup process is complete. +Seguir o passo a passo de adição de um aplicativo de autenticação (seguindo o que esta no readme do repo) só alterando a url de http://localhost para http://localhost:8088, e utilizando o usuário admin criado anteriormente Finally, to start the whole application: @@ -148,26 +148,6 @@ Finally, to start the whole application: docker-compose up ``` -### Setting Up a New Application to manage authentication - -Go to Django ADMIN (for local installation, open a web browser and go to the URL: http://localhost/admin) and add a new Application with the following configuration: - -- `client_id` and `client_secret` should be left unchanged (copy these two tokens and paste them as the values for variables `DJANGO_OAUTH_CLIENT_ID` and `DJANGO_OAUTH_CLIENT_SECRET` inside **.env** and **.env.local**). -- `user` should be your superuser numeric ID (if you are the only user, it should be = 1) -- `redirect_uris` should be left blank -- `client_type` should be set to confidential -- `authorization_grant_type` should be set to **'Resource owner password-based'** -- `name` can be set to whatever you'd like -- checkbox for `Skip authorization` should remain unchecked -- `Algorithm`: keep the default option (No OIDC support) - -> **WARNING**: As mentioned above, **BEFORE** clicking on the SAVE button, you must edit the configuration files (**.env** and **.env.local**) and change the variables `DJANGO_OAUTH_CLIENT_ID` and `DJANGO_OAUTH_CLIENT_SECRET` in both files according to the values of `client_id` and `client_secret` respectively. only after editing the configuration files, the `SAVE` button must be pressed. - -![Adding new application](images/new_app.png) - -The installation is done, you can now test the newly configured application. - - ### Some example commands Turn on background environment (if you have the application already running on the terminal, stop it with `CTRL + C` keys and up ir again, but in the background using `-d` argument): From e575a2dbde2d6b4be54df401ea8f31c853ed5a71 Mon Sep 17 00:00:00 2001 From: Cristiano Singulani Date: Wed, 10 Jul 2024 15:32:19 -0300 Subject: [PATCH 09/20] Update README.md --- README.md | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d4b08d7..300fa67 100644 --- a/README.md +++ b/README.md @@ -131,16 +131,28 @@ mkdir orchestration/db orchestration/logs orchestration/processes cp docker-compose-development-orch.yml docker-compose.yml ``` +Enter the orchestration-api container: ``` bash docker-compose run orchestration-api bash ``` -Dentro do container, criar um usuário admin +Inside the container, create the database and an admin user: ``` bash +python manage.py migrate python manage.py createsuperuser ``` -Seguir o passo a passo de adição de um aplicativo de autenticação (seguindo o que esta no readme do repo) só alterando a url de http://localhost para http://localhost:8088, e utilizando o usuário admin criado anteriormente +Start orchestration services: +``` bash +docker-compose up orchestrator +``` + +And then follow the steps to create an authentication application ([step by step](https://github.com/linea-it/orchestration/?tab=readme-ov-file#how-to-use-using-client-credential)) just by changing the url from http://localhost to http://localhost:8088, and using the admin user created previously. Note when creating an authentication application, we must change the `ORCHEST_CLIENT_ID` and `ORCHEST_CLIENT_SECRET` in the `.env` with the client_id and secret_id values ​​respectively. + +All that remains is to modify the ORCHEST_URL variable in the `.env` with the value http://orchestrator +``` bash +ORCHEST_URL=http://orchestrator +``` Finally, to start the whole application: From 6a102b239fed6807d7d314e500544bb72c3ad249 Mon Sep 17 00:00:00 2001 From: Cristiano Singulani Date: Wed, 10 Jul 2024 19:21:15 +0000 Subject: [PATCH 10/20] Fixed small bug --- .gitignore | 8 ++-- backend/README.md | 17 -------- orchestration/docker-compose.yml | 71 ++++++++++++++++++++++++++++++++ orchestration/nginx.conf | 29 +++++++++++++ 4 files changed, 105 insertions(+), 20 deletions(-) delete mode 100644 backend/README.md create mode 100644 orchestration/docker-compose.yml create mode 100644 orchestration/nginx.conf diff --git a/.gitignore b/.gitignore index 1447fac..e738640 100644 --- a/.gitignore +++ b/.gitignore @@ -12,10 +12,10 @@ venv.bak/ pg_data/ pg_backups/ -# Ngnix +# Ngnix nginx.conf -# Docker Compose +# Docker Compose docker-compose.yml .env.local @@ -25,8 +25,10 @@ orchestration/processes orchestration/logs orchestration/rabbitmq/* !orchestration/rabbitmq/enabled_plugins +!orchestration/docker-compose.yml +!orchestration/nginx.conf saml2 *.pyc -__pycache__ \ No newline at end of file +__pycache__ diff --git a/backend/README.md b/backend/README.md deleted file mode 100644 index 7b5ea11..0000000 --- a/backend/README.md +++ /dev/null @@ -1,17 +0,0 @@ -# Pz Server API -First version of Pz Server data access API for LSST - -## Requirements -- docker -- docker-compose - -## Getting Started - -```bash -docker-compose up -d -``` - -Open http://localhost with your browser to see the result. - -## License -[MIT](LICENSE) diff --git a/orchestration/docker-compose.yml b/orchestration/docker-compose.yml new file mode 100644 index 0000000..2719dbc --- /dev/null +++ b/orchestration/docker-compose.yml @@ -0,0 +1,71 @@ +version: "3.9" + +services: + orchestration-api: &orchestration + image: linea/orchestration:cdb4c5f + command: /app/sh/start.sh + user: "1000:1000" + env_file: + - .orchestration-env + networks: + - default + - orchestration-network + volumes: + - ./pipelines:/pipelines # pipelines repo + - ./datasets:/datasets # datasets repo + - ./processes:/processes + - ./logs:/logs + - ./db:/db + depends_on: + - rabbitmq + + orchestration-local-worker: &celery_local_worker + <<: *orchestration + hostname: "orchestration-local-worker" + command: /app/sh/local_worker.sh + depends_on: + - orchestration-api + + orchestration-flower: &celery_flower + <<: *orchestration + hostname: "flower" + command: /app/sh/flower.sh + ports: + - 5555:5555 + depends_on: + - orchestration-local-worker + + rabbitmq: + image: "rabbitmq:3.12.12-management" + hostname: "rabbitmq" + env_file: + - .orchestration-env + ports: + - "15672:15672" + - "5672:5672" + networks: + - default + - orchestration-network + volumes: + - "./rabbitmq/enabled_plugins:/etc/rabbitmq/enabled_plugins" + - "./rabbitmq/data/:/var/lib/rabbitmq/" + - "./rabbitmq/log/:/var/log/rabbitmq/" + + orchestrator: + image: nginx:1.21.6-alpine + ports: + - 8088:80 + volumes: + - ./nginx.conf:/etc/nginx/conf.d/default.conf:ro + networks: + - default + - orchestration-network + depends_on: + - orchestration-api + - orchestration-flower + - rabbitmq + +networks: + orchestration-network: + external: true + driver: bridge diff --git a/orchestration/nginx.conf b/orchestration/nginx.conf new file mode 100644 index 0000000..74d089f --- /dev/null +++ b/orchestration/nginx.conf @@ -0,0 +1,29 @@ +upstream orchestapi { + server orchestration-api:8000; +} + +server { + listen 80; + server_name localhost; + + location /api { + include uwsgi_params; + uwsgi_pass orchestapi; + } + + location /admin { + include uwsgi_params; + uwsgi_pass orchestapi; + } + + location /o { + include uwsgi_params; + uwsgi_pass orchestapi; + } + + location /django_static { + include uwsgi_params; + uwsgi_pass orchestapi; + } + +} From 9dbce8ae18da6d4d9db842872bb523ba6d34bf4a Mon Sep 17 00:00:00 2001 From: Cristiano Singulani Date: Wed, 10 Jul 2024 20:48:48 +0000 Subject: [PATCH 11/20] Fixed small bug --- backend/pzserver/settings.py | 148 +++++++++++++++-------------------- 1 file changed, 63 insertions(+), 85 deletions(-) diff --git a/backend/pzserver/settings.py b/backend/pzserver/settings.py index f317f59..c706d1d 100644 --- a/backend/pzserver/settings.py +++ b/backend/pzserver/settings.py @@ -12,9 +12,8 @@ import os -import saml2.saml - import saml2 +import saml2.saml # Build paths inside the project like this: BASE_DIR / 'subdir'. # BASE_DIR = Path(__file__).resolve().parent.parent @@ -109,7 +108,7 @@ # rabbitmq AMQP_HOST = os.getenv("RABBITMQ_HOST", "rabbitmq") -AMQP_PORT = os.getenv("RABBITMQ_PORT","5672") +AMQP_PORT = os.getenv("RABBITMQ_PORT", "5672") AMQP_USER = os.getenv("RABBITMQ_DEFAULT_USER", "orcadmin") AMQP_PASS = os.getenv("RABBITMQ_DEFAULT_PASS", "adminorc") AMQP_VHOST = os.getenv("RABBITMQ_DEFAULT_VHOST", "/") @@ -262,6 +261,7 @@ "level": LOGGING_LEVEL, "class": "logging.handlers.RotatingFileHandler", "filename": os.path.join(LOG_DIR, "celerybeat.log"), + }, "saml": { "level": LOGGING_LEVEL, "class": "logging.handlers.RotatingFileHandler", @@ -270,14 +270,6 @@ "backupCount": 5, "formatter": "standard", }, - # "shibboleth": { - # "level": LOGGING_LEVEL, - # "class": "logging.handlers.RotatingFileHandler", - # "filename": os.path.join(LOG_DIR, "shibboleth.log"), - # "maxBytes": 1024 * 1024 * 5, # 5 MB - # "backupCount": 5, - # "formatter": "standard", - # }, "products": { "level": LOGGING_LEVEL, "class": "logging.handlers.RotatingFileHandler", @@ -305,16 +297,12 @@ }, "beat": { "handlers": ["beat"], + }, "saml": { "handlers": ["saml"], "level": LOGGING_LEVEL, "propagate": True, }, - # "shibboleth": { - # "handlers": ["shibboleth"], - # "level": LOGGING_LEVEL, - # "propagate": True, - # }, "products": { "handlers": ["products"], "level": LOGGING_LEVEL, @@ -336,19 +324,19 @@ if os.getenv("AUTH_SHIB_URL", None): FQDN = os.getenv("URI", "http://localhost") CERT_DIR = "/saml2/certificates" - SIG_KEY_PEM = os.getenv("SIG_KEY_PEM", os.path.join(CERT_DIR, 'pzkey.pem')) - SIG_CERT_PEM = os.getenv("SIG_CERT_PEM", os.path.join(CERT_DIR, 'pzcert.pem')) + SIG_KEY_PEM = os.getenv("SIG_KEY_PEM", os.path.join(CERT_DIR, "pzkey.pem")) + SIG_CERT_PEM = os.getenv("SIG_CERT_PEM", os.path.join(CERT_DIR, "pzcert.pem")) ENCRYP_KEY_PEM = os.getenv("ENCRYP_KEY_PEM", SIG_KEY_PEM) ENCRYP_CERT_PEM = os.getenv("ENCRYP_CERT_PEM", SIG_CERT_PEM) - MIDDLEWARE.append('djangosaml2.middleware.SamlSessionMiddleware') + MIDDLEWARE.append("djangosaml2.middleware.SamlSessionMiddleware") # configurações relativas ao session cookie - SAML_SESSION_COOKIE_NAME = 'saml_session' + SAML_SESSION_COOKIE_NAME = "saml_session" SESSION_COOKIE_SECURE = True - # Qualquer view que requer um usuário autenticado deve redirecionar o navegador para esta url - LOGIN_URL = '/saml2/login/' + # Qualquer view que requer um usuário autenticado deve redirecionar o navegador para esta url + LOGIN_URL = "/saml2/login/" # Encerra a sessão quando o usuário fecha o navegador SESSION_EXPIRE_AT_BROWSER_CLOSE = True @@ -367,98 +355,88 @@ SAML_CSP_HANDLER = "" # URL para redirecionamento após a autenticação - LOGIN_REDIRECT_URL = '/' + LOGIN_REDIRECT_URL = "/" - SAML_ATTRIBUTE_MAPPING = { + SAML_ATTRIBUTE_MAPPING = { "eduPersonPrincipalName": ("username",), "sn": ("name",), "cn": ("full_name",), - "email": ("email",) + "email": ("email",), } SAML_CONFIG = { # Biblioteca usada para assinatura e criptografia - 'xmlsec_binary': '/usr/bin/xmlsec1', - 'entityid': FQDN + '/saml2/metadata/', + "xmlsec_binary": "/usr/bin/xmlsec1", + "entityid": FQDN + "/saml2/metadata/", # Diretório contendo os esquemas de mapeamento de atributo - 'attribute_map_dir': os.path.join(BASE_DIR, 'attribute-maps'), - 'description': 'SP Pz Server', - 'service': { - 'sp' : { - 'name': 'sp_pzserver', - 'ui_info': { - 'display_name': {'text':'SP Pz', 'lang':'en'}, - 'description': {'text':'Pz Service Provider', 'lang':'en'}, - 'information_url': {'text': f"{FQDN}/about", 'lang':'en'}, - 'privacy_statement_url': {'text': FQDN, 'lang':'en'} + "attribute_map_dir": os.path.join(BASE_DIR, "attribute-maps"), + "description": "SP Pz Server", + "service": { + "sp": { + "name": "sp_pzserver", + "ui_info": { + "display_name": {"text": "SP Pz", "lang": "en"}, + "description": {"text": "Pz Service Provider", "lang": "en"}, + "information_url": {"text": f"{FQDN}/about", "lang": "en"}, + "privacy_statement_url": {"text": FQDN, "lang": "en"}, }, - 'name_id_format': [ + "name_id_format": [ "urn:oasis:names:tc:SAML:2.0:nameid-format:persistent", "urn:oasis:names:tc:SAML:2.0:nameid-format:transient", ], # Indica os endpoints dos serviços fornecidos - 'endpoints': { - 'assertion_consumer_service': [ - (FQDN +'/saml2/acs/', - saml2.BINDING_HTTP_POST), + "endpoints": { + "assertion_consumer_service": [ + (FQDN + "/saml2/acs/", saml2.BINDING_HTTP_POST), ], - 'single_logout_service': [ - (FQDN + '/saml2/ls/', - saml2.BINDING_HTTP_REDIRECT), - (FQDN + '/saml2/ls/post', - saml2.BINDING_HTTP_POST), + "single_logout_service": [ + (FQDN + "/saml2/ls/", saml2.BINDING_HTTP_REDIRECT), + (FQDN + "/saml2/ls/post", saml2.BINDING_HTTP_POST), ], }, # Algoritmos utilizados #'signing_algorithm': saml2.xmldsig.SIG_RSA_SHA256, #'digest_algorithm': saml2.xmldsig.DIGEST_SHA256, - - 'force_authn': False, - 'name_id_format_allow_create': False, - + "force_authn": False, + "name_id_format_allow_create": False, # Indica que as respostas de autenticação para este SP devem ser assinadas - 'want_response_signed': True, - + "want_response_signed": True, # Indica se as solicitações de autenticação enviadas por este SP devem ser assinadas - 'authn_requests_signed': True, - + "authn_requests_signed": True, # Indica se este SP deseja que o IdP envie as asserções assinadas - 'want_assertions_signed': False, - - 'only_use_keys_in_metadata': True, - 'allow_unsolicited': False, + "want_assertions_signed": False, + "only_use_keys_in_metadata": True, + "allow_unsolicited": False, }, }, - # Indica onde os metadados podem ser encontrados - 'metadata': { - 'local': [os.getenv("IDP_METADATA")], + "metadata": { + "local": [os.getenv("IDP_METADATA")], }, - - 'debug': os.getenv("DEBUG", 1), - + "debug": os.getenv("DEBUG", 1), # Signature - 'key_file': SIG_KEY_PEM, # private part - 'cert_file': SIG_CERT_PEM, # public part - + "key_file": SIG_KEY_PEM, # private part + "cert_file": SIG_CERT_PEM, # public part # Encriptation - 'encryption_keypairs': [{ - 'key_file': ENCRYP_KEY_PEM, # private part - 'cert_file': ENCRYP_CERT_PEM, # public part - }], - - 'contact_person': [ - {'given_name': 'LIneA', - 'sur_name': 'Team', - 'company': 'LIneA', - 'email_address': 'itteam@linea.org.br', - 'contact_type': 'technical'}, + "encryption_keypairs": [ + { + "key_file": ENCRYP_KEY_PEM, # private part + "cert_file": ENCRYP_CERT_PEM, # public part + } ], - - # Descreve a organização responsável pelo serviço - 'organization': { - 'name': [('LIneA', 'pt-br')], - 'display_name': [('LIneA', 'pt-br')], - 'url': [('https://linea.org.br/', 'pt-br')], + "contact_person": [ + { + "given_name": "LIneA", + "sur_name": "Team", + "company": "LIneA", + "email_address": "itteam@linea.org.br", + "contact_type": "technical", + }, + ], + # Descreve a organização responsável pelo serviço + "organization": { + "name": [("LIneA", "pt-br")], + "display_name": [("LIneA", "pt-br")], + "url": [("https://linea.org.br/", "pt-br")], }, } From 1e1da261be6193b30088e2a6cb78d922b7860eaa Mon Sep 17 00:00:00 2001 From: Cristiano Singulani Date: Wed, 31 Jul 2024 22:10:51 +0000 Subject: [PATCH 12/20] Added training set maker pipeline --- .gitmodules | 3 + orchestration/pipelines | 1 + .../pipelines/cross_lsdb_dev/VERSION | 1 - .../pipelines/cross_lsdb_dev/config.py | 53 ------------ .../pipelines/cross_lsdb_dev/config.yml | 19 ----- .../pipelines/cross_lsdb_dev/environment.yml | 14 ---- .../pipelines/cross_lsdb_dev/install.sh | 34 -------- .../cross_lsdb_dev/packages/__init__.py | 0 .../cross_lsdb_dev/packages/executor.py | 47 ----------- .../cross_lsdb_dev/packages/utils.py | 59 ------------- orchestration/pipelines/cross_lsdb_dev/run.sh | 40 --------- .../cross_lsdb_dev/scripts/run-crossmatch | 84 ------------------- orchestration/pipelines/load_pipelines.sh | 12 --- orchestration/pipelines/pipelines.yaml | 8 -- .../pipelines/pipelines.yaml.template | 8 -- 15 files changed, 4 insertions(+), 379 deletions(-) create mode 100644 .gitmodules create mode 160000 orchestration/pipelines delete mode 100644 orchestration/pipelines/cross_lsdb_dev/VERSION delete mode 100644 orchestration/pipelines/cross_lsdb_dev/config.py delete mode 100644 orchestration/pipelines/cross_lsdb_dev/config.yml delete mode 100644 orchestration/pipelines/cross_lsdb_dev/environment.yml delete mode 100755 orchestration/pipelines/cross_lsdb_dev/install.sh delete mode 100644 orchestration/pipelines/cross_lsdb_dev/packages/__init__.py delete mode 100755 orchestration/pipelines/cross_lsdb_dev/packages/executor.py delete mode 100755 orchestration/pipelines/cross_lsdb_dev/packages/utils.py delete mode 100755 orchestration/pipelines/cross_lsdb_dev/run.sh delete mode 100755 orchestration/pipelines/cross_lsdb_dev/scripts/run-crossmatch delete mode 100755 orchestration/pipelines/load_pipelines.sh delete mode 100644 orchestration/pipelines/pipelines.yaml delete mode 100644 orchestration/pipelines/pipelines.yaml.template diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..f7fe3f4 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "orchestration/pipelines"] + path = orchestration/pipelines + url = https://github.com/linea-it/pzserver_pipelines diff --git a/orchestration/pipelines b/orchestration/pipelines new file mode 160000 index 0000000..cb0af2e --- /dev/null +++ b/orchestration/pipelines @@ -0,0 +1 @@ +Subproject commit cb0af2e291982542edd9088a1a714cb1696f556e diff --git a/orchestration/pipelines/cross_lsdb_dev/VERSION b/orchestration/pipelines/cross_lsdb_dev/VERSION deleted file mode 100644 index 8a9ecc2..0000000 --- a/orchestration/pipelines/cross_lsdb_dev/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.0.1 \ No newline at end of file diff --git a/orchestration/pipelines/cross_lsdb_dev/config.py b/orchestration/pipelines/cross_lsdb_dev/config.py deleted file mode 100644 index 5b963ef..0000000 --- a/orchestration/pipelines/cross_lsdb_dev/config.py +++ /dev/null @@ -1,53 +0,0 @@ -from pydantic import BaseModel -import os - -DATASETS_DIR = os.getenv("DATASETS_DIR", "/datasets") - - -class Instance(BaseModel): - processes: int = 1 - memory: str = "123GiB" - queue: str = "cpu" - job_extra_directives: list[str] = ["--propagate", "--time=2:00:00"] - - -class Adapt(BaseModel): - maximum_jobs: int = 10 - - -class LIneASlurm(BaseModel): - instance: Instance = Instance() - adapt: Adapt = Adapt() - - -class Local(BaseModel): - n_workers: int = 2 - threads_per_worker: int = 2 - memory_limit: str = "1GiB" - - -class Inputs(BaseModel): - photo: str = f"{DATASETS_DIR}/DatasetA" - specz: str = f"{DATASETS_DIR}/DatasetB" - - -class Executor(BaseModel): - local: Local = Local() - linea_slurm: LIneASlurm = LIneASlurm() - - -class Config(BaseModel): - output_dir: str = "./output" - executor: Executor = Executor() - inputs: Inputs = Inputs() - - -if __name__ == "__main__": - import yaml - - cfg = Config() - - with open('config.yml', 'w') as outfile: - data_json = cfg.model_dump() - print(data_json) - yaml.dump(data_json, outfile) diff --git a/orchestration/pipelines/cross_lsdb_dev/config.yml b/orchestration/pipelines/cross_lsdb_dev/config.yml deleted file mode 100644 index 079bc21..0000000 --- a/orchestration/pipelines/cross_lsdb_dev/config.yml +++ /dev/null @@ -1,19 +0,0 @@ -executor: - linea_slurm: - adapt: - maximum_jobs: 10 - instance: - job_extra_directives: - - --propagate - - --time=2:00:00 - memory: 123GiB - processes: 1 - queue: cpu - local: - memory_limit: 1GiB - n_workers: 2 - threads_per_worker: 2 -inputs: - photo: /datasets/DatasetA - specz: /datasets/DatasetB -output_dir: ./output diff --git a/orchestration/pipelines/cross_lsdb_dev/environment.yml b/orchestration/pipelines/cross_lsdb_dev/environment.yml deleted file mode 100644 index 673503b..0000000 --- a/orchestration/pipelines/cross_lsdb_dev/environment.yml +++ /dev/null @@ -1,14 +0,0 @@ -name: pipe_cross_lsdb_dev -channels: - - defaults -dependencies: - - python=3.10 - - pip: - - PyYaml - - dask==2024.1.0 - - distributed==2024.1.0 - - dask-jobqueue==0.8.2 - - hipscat==0.2.1 - - hipscat-import==0.2.1 - - lsdb==0.1.0 - diff --git a/orchestration/pipelines/cross_lsdb_dev/install.sh b/orchestration/pipelines/cross_lsdb_dev/install.sh deleted file mode 100755 index 11d0265..0000000 --- a/orchestration/pipelines/cross_lsdb_dev/install.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash --login - -source `dirname $CONDA_EXE`/activate || { echo "Failed to activate Conda environment"; exit 1; } - -if [ ! -d "$PIPELINES_DIR" ]; then - echo "Error: PIPELINES_DIR not defined." - exit 1 -fi - -PIPE_BASE="$PIPELINES_DIR/cross_lsdb_dev" -HASENV=`conda env list | grep 'pipe_cross_lsdb_dev '` - -if [ -z "$HASENV" ]; then - echo "Create virtual environment..." - conda env create -f $PIPE_BASE/environment.yml - echo "Virtual environment created and packages installed." -else - if [ "$CONDA_FORCE_UPDATE" == "yes" ]; then - echo "Virtual environment already exists. Updating..." - conda env update --file $PIPE_BASE/environment.yml --prune - fi -fi - -conda activate pipe_cross_lsdb_dev - -export PATH=$PATH:"$PIPE_BASE/scripts/" - -if [ -z "$PYTHONPATH" ]; then - export PYTHONPATH="$PIPE_BASE/packages/" -else - export PYTHONPATH=$PYTHONPATH:"$PIPE_BASE/packages/" -fi - -echo "Conda Environment: $CONDA_DEFAULT_ENV" diff --git a/orchestration/pipelines/cross_lsdb_dev/packages/__init__.py b/orchestration/pipelines/cross_lsdb_dev/packages/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/orchestration/pipelines/cross_lsdb_dev/packages/executor.py b/orchestration/pipelines/cross_lsdb_dev/packages/executor.py deleted file mode 100755 index fcb6746..0000000 --- a/orchestration/pipelines/cross_lsdb_dev/packages/executor.py +++ /dev/null @@ -1,47 +0,0 @@ -"""_summary_ """ - -from dask.distributed import LocalCluster -from dask_jobqueue import SLURMCluster -from utils import load_yml -import logging -from typing import Union - - -def get_executor_config( - executor_key: str, config_file: str -) -> Union[LocalCluster, SLURMCluster]: - """returns the configuration of where the pipeline will be run - - Args: - executor_key (str): executor key - config_file (str): config path - - Returns: - Union[LocalCluster, SLURMCluster]: Executor object - """ - - logger = logging.getLogger() - logger.info("Getting executor config: %s", executor_key) - - configs = load_yml(config_file) - - try: - config = configs["executor"][executor_key] - except KeyError: - logger.warning("The executor key not found. Using minimal local config.") - executor_key = "minimal" - - match executor_key: - case "local": - cluster = LocalCluster(**config) - case "linea-slurm": - icfg = config["instance"] - cluster = SLURMCluster(**icfg) - cluster.adapt(**config["adapt"]) - case _: - cluster = LocalCluster( - n_workers=1, - threads_per_worker=1, - ) - - return cluster diff --git a/orchestration/pipelines/cross_lsdb_dev/packages/utils.py b/orchestration/pipelines/cross_lsdb_dev/packages/utils.py deleted file mode 100755 index d23b553..0000000 --- a/orchestration/pipelines/cross_lsdb_dev/packages/utils.py +++ /dev/null @@ -1,59 +0,0 @@ -"""_summary_ """ - -import logging -import os -import pathlib -from typing import Any - -import yaml - - -def setup_logger(name="pipeline-logger"): - """ - Configures the logger for recording events and messages. - - Returns: - logging.Logger: Configured logger instance. - """ - - logger = logging.getLogger(name) - logger.setLevel(logging.DEBUG) - - formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") - - logdir = os.getenv("LOG_DIR", ".") - - file_handler = logging.FileHandler(pathlib.Path(logdir, f"{name}.log")) - file_handler.setLevel(logging.DEBUG) - file_handler.setFormatter(formatter) - - logger.addHandler(file_handler) - - return logger - - -def load_yml(filepath: str) -> Any: - """Load yaml file - - Args: - filepath (str): filepath - - Returns: - Any: yaml file content - """ - with open(filepath, encoding="utf-8") as _file: - content = yaml.safe_load(_file) - - return content - - -def dump_yml(filepath, content, encoding="utf-8"): - """ Dump yaml file - - Args: - filepath (str): filepath output - content (dict): yaml content - """ - - with open(filepath, 'w', encoding=encoding) as _file: - yaml.dump(content, _file) \ No newline at end of file diff --git a/orchestration/pipelines/cross_lsdb_dev/run.sh b/orchestration/pipelines/cross_lsdb_dev/run.sh deleted file mode 100755 index 093ce76..0000000 --- a/orchestration/pipelines/cross_lsdb_dev/run.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash - -# Check if the argument was given -if [ $# -eq 0 ]; then - echo "Error: No arguments provided." - exit 1 -fi - -ARGS=$@ -shift $# - -if [ ! -d "$DASK_EXECUTOR_KEY" ]; then - export DASK_EXECUTOR_KEY=local -fi - -if [ ! -d "$PIPELINES_DIR" ]; then - echo "Error: PIPELINES_DIR not defined." - exit 1 -fi - -INSTALL_PIPE="$PIPELINES_DIR/cross_lsdb_dev/install.sh" - -if [ ! -f "$INSTALL_PIPE" ]; then - echo "Error: Installation script not found." - exit 1 -fi - -# Installing pipeline -echo "Installing pipeline..." -. "$INSTALL_PIPE" - -set -xe - -# Run the Python code with the given argument -# run-crossmatch $ARGS || { echo "Failed to run-crossmatch"; exit 1; } -run-crossmatch $ARGS - -echo $? >> return.code - -echo "Done." \ No newline at end of file diff --git a/orchestration/pipelines/cross_lsdb_dev/scripts/run-crossmatch b/orchestration/pipelines/cross_lsdb_dev/scripts/run-crossmatch deleted file mode 100755 index 43fcf19..0000000 --- a/orchestration/pipelines/cross_lsdb_dev/scripts/run-crossmatch +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import os -import time -from pathlib import Path - -import lsdb -from dask.distributed import Client -from executor import get_executor_config -from utils import dump_yml, load_yml, setup_logger - - -def run(config_file): - """Run lsdb crossmatch - - Args: - config_file (str): lsdb parameters - """ - - logger = setup_logger(name="cross-lsdb") - - start_time_full = time.time() - - # Loading configurations - pipe_config = load_yml(config_file) - param = pipe_config.get("inputs") - logger.info("Parameters: %s", param) - - - executor_key = os.getenv("DASK_EXECUTOR_KEY", "local") - cluster = get_executor_config(executor_key, config_file) - - with Client(cluster): - phot_dp0 = lsdb.read_hipscat(param.get("photo")) - spec_dp0 = lsdb.read_hipscat(param.get("specz")) - - cross = spec_dp0.crossmatch(phot_dp0) - data = cross.compute() - - os.makedirs(pipe_config.get("output_dir"), exist_ok=True) - outputfile = Path(pipe_config.get("output_dir"), "cross-output.parquet") - data.to_parquet(outputfile) - - register_outputs(outputfile) - - logger.info("--> Object Count: \n%s", str(data.count())) - - cluster.close() - - logger.info("Time elapsed: %s", str(time.time() - start_time_full)) - - -def register_outputs(filepath, role='main'): - """ Register outputs in process.yml - - Args: - filepath (str): output path - role (str, optional): role name. Defaults to 'main'. - """ - - outpath = str(Path(filepath).resolve()) - proc_yaml_file = str(Path('./process.yml').resolve()) - process_info = load_yml(proc_yaml_file) - process_info['outputs'] = [{"path": outpath, "role": role}] - dump_yml(proc_yaml_file, process_info) - - - - - - - - -if __name__ == "__main__": - # Create the parser and add arguments - parser = argparse.ArgumentParser() - parser.add_argument(dest="config_path", help="yaml config path") - - args = parser.parse_args() - config_path = args.config_path - - # Run pipeline - run(config_path) diff --git a/orchestration/pipelines/load_pipelines.sh b/orchestration/pipelines/load_pipelines.sh deleted file mode 100755 index 1749f37..0000000 --- a/orchestration/pipelines/load_pipelines.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -cat << EOF > ${PIPELINES_DIR}/pipelines.yaml -cross_lsdb_dev: - display_name: 'LSDB Crossmatch (dev)' - path: '${PIPELINES_DIR}/cross_lsdb_dev' - executor: 'local' # only to orchestration - runner: 'bash' - executable: 'run.sh' - schema_config: '${PIPELINES_DIR}/cross_lsdb_dev/config.py' - version: '0.0.1' -EOF \ No newline at end of file diff --git a/orchestration/pipelines/pipelines.yaml b/orchestration/pipelines/pipelines.yaml deleted file mode 100644 index 18c911c..0000000 --- a/orchestration/pipelines/pipelines.yaml +++ /dev/null @@ -1,8 +0,0 @@ -cross_lsdb_dev: - display_name: 'LSDB Crossmatch (dev)' - path: '/pipelines/cross_lsdb_dev' - executor: 'local' # only to orchestration - runner: 'bash' - executable: 'run.sh' - schema_config: '/pipelines/cross_lsdb_dev/config.py' - version: '0.0.1' diff --git a/orchestration/pipelines/pipelines.yaml.template b/orchestration/pipelines/pipelines.yaml.template deleted file mode 100644 index 18c911c..0000000 --- a/orchestration/pipelines/pipelines.yaml.template +++ /dev/null @@ -1,8 +0,0 @@ -cross_lsdb_dev: - display_name: 'LSDB Crossmatch (dev)' - path: '/pipelines/cross_lsdb_dev' - executor: 'local' # only to orchestration - runner: 'bash' - executable: 'run.sh' - schema_config: '/pipelines/cross_lsdb_dev/config.py' - version: '0.0.1' From 3126774ffd02a16e720c8dedaf0f58c99e9dd5a3 Mon Sep 17 00:00:00 2001 From: Cristiano Singulani Date: Wed, 31 Jul 2024 22:12:04 +0000 Subject: [PATCH 13/20] Added training set maker pipeline --- .devcontainer/docker-compose.yml | 1 + backend/core/admin.py | 2 +- backend/core/fixtures/initial_data.yaml | 6 +- backend/core/views/process.py | 37 ++++++-- .../DatasetA/Norder=0/Dir=0/Npix=0.parquet | Bin 5074 -> 0 bytes .../DatasetA/Norder=0/Dir=0/Npix=11.parquet | Bin 5074 -> 0 bytes .../DatasetA/Norder=0/Dir=0/Npix=4.parquet | Bin 7618 -> 0 bytes .../DatasetA/Norder=0/Dir=0/Npix=8.parquet | Bin 5802 -> 0 bytes .../datasets/DatasetA/_common_metadata | Bin 3305 -> 0 bytes orchestration/datasets/DatasetA/_metadata | Bin 6627 -> 0 bytes .../datasets/DatasetA/point_map.fits | Bin 1581120 -> 0 bytes .../datasets/DatasetA/provenance_info.json | 47 ---------- .../DatasetB/Norder=0/Dir=0/Npix=4.parquet | Bin 7130 -> 0 bytes .../DatasetB/Norder=0/Dir=0/Npix=8.parquet | Bin 5731 -> 0 bytes .../datasets/DatasetB/_common_metadata | Bin 3294 -> 0 bytes orchestration/datasets/DatasetB/_metadata | Bin 4947 -> 0 bytes .../datasets/DatasetB/catalog_info.json | 8 -- .../datasets/DatasetB/provenance_info.json | 47 ---------- .../Norder=0/Dir=0/Npix=4.parquet | Bin 0 -> 6929 bytes .../datasets/mini_dataset/_common_metadata | Bin 0 -> 3639 bytes orchestration/datasets/mini_dataset/_metadata | Bin 0 -> 4574 bytes .../catalog_info.json | 4 +- .../datasets/mini_dataset/partition_info.csv | 2 + .../{DatasetB => mini_dataset}/point_map.fits | Bin 1581120 -> 1581120 bytes .../mini_dataset/provenance_info.json | 50 +++++++++++ orchestration/docker-compose.yml | 1 + orchestration/pipelines | 2 +- .../pipelines.old/cross_lsdb_dev/VERSION | 1 + .../pipelines.old/cross_lsdb_dev/config.py | 53 +++++++++++ .../pipelines.old/cross_lsdb_dev/config.yml | 19 ++++ .../cross_lsdb_dev/environment.yml | 14 +++ .../pipelines.old/cross_lsdb_dev/install.sh | 34 +++++++ .../cross_lsdb_dev/packages/__init__.py | 0 .../cross_lsdb_dev/packages/executor.py | 47 ++++++++++ .../cross_lsdb_dev/packages/utils.py | 59 ++++++++++++ .../pipelines.old/cross_lsdb_dev/run.sh | 40 +++++++++ .../cross_lsdb_dev/scripts/run-crossmatch | 84 ++++++++++++++++++ orchestration/pipelines.old/load_pipelines.sh | 12 +++ orchestration/pipelines.old/pipelines.yaml | 8 ++ .../pipelines.old/pipelines.yaml.template | 8 ++ 40 files changed, 472 insertions(+), 114 deletions(-) delete mode 100644 orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=0.parquet delete mode 100644 orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=11.parquet delete mode 100644 orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=4.parquet delete mode 100644 orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=8.parquet delete mode 100644 orchestration/datasets/DatasetA/_common_metadata delete mode 100644 orchestration/datasets/DatasetA/_metadata delete mode 100644 orchestration/datasets/DatasetA/point_map.fits delete mode 100644 orchestration/datasets/DatasetA/provenance_info.json delete mode 100644 orchestration/datasets/DatasetB/Norder=0/Dir=0/Npix=4.parquet delete mode 100644 orchestration/datasets/DatasetB/Norder=0/Dir=0/Npix=8.parquet delete mode 100644 orchestration/datasets/DatasetB/_common_metadata delete mode 100644 orchestration/datasets/DatasetB/_metadata delete mode 100644 orchestration/datasets/DatasetB/catalog_info.json delete mode 100644 orchestration/datasets/DatasetB/provenance_info.json create mode 100644 orchestration/datasets/mini_dataset/Norder=0/Dir=0/Npix=4.parquet create mode 100644 orchestration/datasets/mini_dataset/_common_metadata create mode 100644 orchestration/datasets/mini_dataset/_metadata rename orchestration/datasets/{DatasetA => mini_dataset}/catalog_info.json (63%) create mode 100644 orchestration/datasets/mini_dataset/partition_info.csv rename orchestration/datasets/{DatasetB => mini_dataset}/point_map.fits (99%) create mode 100644 orchestration/datasets/mini_dataset/provenance_info.json create mode 100644 orchestration/pipelines.old/cross_lsdb_dev/VERSION create mode 100644 orchestration/pipelines.old/cross_lsdb_dev/config.py create mode 100644 orchestration/pipelines.old/cross_lsdb_dev/config.yml create mode 100644 orchestration/pipelines.old/cross_lsdb_dev/environment.yml create mode 100755 orchestration/pipelines.old/cross_lsdb_dev/install.sh create mode 100644 orchestration/pipelines.old/cross_lsdb_dev/packages/__init__.py create mode 100755 orchestration/pipelines.old/cross_lsdb_dev/packages/executor.py create mode 100755 orchestration/pipelines.old/cross_lsdb_dev/packages/utils.py create mode 100755 orchestration/pipelines.old/cross_lsdb_dev/run.sh create mode 100755 orchestration/pipelines.old/cross_lsdb_dev/scripts/run-crossmatch create mode 100755 orchestration/pipelines.old/load_pipelines.sh create mode 100644 orchestration/pipelines.old/pipelines.yaml create mode 100644 orchestration/pipelines.old/pipelines.yaml.template diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 296c83c..bc1b17f 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -8,6 +8,7 @@ services: # dockerfile: backend/Dockerfile volumes: - ..:/workspaces:cached + - ./backend:/app:cached - ./archive/log/backend:/archive/log:cached - ./archive/data:/archive/data:cached - ./orchestration/pipelines:/pipelines:cached diff --git a/backend/core/admin.py b/backend/core/admin.py index 96e783a..2b2af82 100644 --- a/backend/core/admin.py +++ b/backend/core/admin.py @@ -58,7 +58,7 @@ class ProductAdmin(admin.ModelAdmin): "status", ) - search_fields = ("name", "display_name") + search_fields = ("internal_name", "display_name") form = ProductAdminForm diff --git a/backend/core/fixtures/initial_data.yaml b/backend/core/fixtures/initial_data.yaml index e139db5..514fa09 100644 --- a/backend/core/fixtures/initial_data.yaml +++ b/backend/core/fixtures/initial_data.yaml @@ -1,9 +1,9 @@ - model: core.release pk: 1 fields: - name: lsst_dp0 - display_name: LSST DP0 - description: LSST Data Preview 0 + name: mini_dataset + display_name: Small Dataset + description: Small dataset for example runs created_at: 2022-05-18 15:36:16.234786+00:00 - model: core.producttype pk: 1 diff --git a/backend/core/views/process.py b/backend/core/views/process.py index c5e2133..ea6a663 100644 --- a/backend/core/views/process.py +++ b/backend/core/views/process.py @@ -19,8 +19,8 @@ class ProcessFilter(filters.FilterSet): release__isnull = filters.BooleanFilter( field_name="release", lookup_expr="isnull") - pipeline__or = filters.CharFilter(method="filter_type_name") - pipeline = filters.CharFilter(method="filter_type_name") + pipeline__or = filters.CharFilter(method="filter_pipeline") + pipeline = filters.CharFilter(method="filter_pipeline") release_name__or = filters.CharFilter(method="filter_release") release_name = filters.CharFilter(method="filter_release") @@ -113,10 +113,17 @@ def create(self, request): main_file = _input.files.get(role=0) filepath = pathlib.Path(settings.MEDIA_ROOT, _input.path, main_file.name) print("FILEPATH: ", filepath) - inputfiles.append(str(filepath)) - used_config['inputfiles'] = inputfiles - used_config['inputs'] = {'release': release_path} + ra = self.__get_mapped_column(_input, 'RA') + dec = self.__get_mapped_column(_input, 'Dec') + + _file = {'path': str(filepath), 'columns': {'ra': ra, 'dec': dec }} + inputfiles.append(_file) + + used_config['inputs'] = { + 'dataset': {'path': release_path}, + 'specz': inputfiles + } print("USED CONFIG: ", used_config) @@ -139,6 +146,26 @@ def create(self, request): content = {"error": f"Orchestration API failure: {str(e)}"} return Response(content, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + def __get_mapped_column(self, product, column): + """ Get mapped column by column name + + Args: + product (Product): Product object + column (str): column name + """ + + columns = product.contents.filter(alias=column) + + if columns.count() != 1: + logger.warn(f"Column {column} was not mapped for product {product}.") + logger.warn(f"Column {column}: value {column.lower()} will be used.") + value = column.lower() + else: + obj = columns[0] + value = obj.column_name + + return value + def perform_create(self, serializer): """Add user and upload""" diff --git a/orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=0.parquet b/orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=0.parquet deleted file mode 100644 index 90a0e15b3bebd761c8139db6657db07dc3ff352d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5074 zcmcIo&uiOu6qlXMbK0~GU1e-ABN!?MOB>sX+qf`R*@53+)RP&2KWXCx%ddvND}1v9 z08o6c)Z3Dv-=>fgn|8P^_&>iLCTH!`E7A!1?|jt1ZvpyA6DJs+ir!Do+l4bbKMnaU z;=NhK58}HE-OIT!>9x@L3eA=zw=lwf;E9KNI!C?qhh6KZf*7k_RM$UDkN=GC^Lr?nQIMBv2DV+as1D!7Kp6DxK?r&2QiF!w56QX#cKk{7%e&&A^!9BAtHr1}Cw~N#fT|3`%A`Jk0Ls+$)K-yRTVU#$YID3JqE8AigGdc!}@up#CT#>!7aaHMI7@{mhNE z$nmiQW*kMN#iR9OC@h`}MOAI`om;DuC`JS$Fi?2MOMu1-EdoPe(v!tJV^@ka>B;1| z94Y$Lh=%Ob;xP}uNkyG^ zD_6TrRNyhnx=Q*hpu}xy-KHQH1KM4x`uhF)R?@7 zm>Wa+2BPuL(!z3vrh7E3gC{luPg|)1RH|2D69<*@7SL^2pSsOxYM0BFR?G3ip^z(e z1bs`{R#F0^CYyXYbySIGqUE@$!>G|Vlg>AUct0kvi7F4axN=<;2h1%d(*awFy1+g_ z>$?)$Z*Qh)h0|13$QG-Dmg#VNpqEX=glrGlv9J`XQcmq*SvnsRG!^U@RYvL6z+Ox! zrxN9rVnWO!yR;(2^A9<;5H;m7Rmx@{o?@M8G;8u8uL@e>kjnyIgejdbl{PA!?Q}j_ zEsbG6Li+4VTN&a?=LeNN2I10pw$$b_$!>*>sYs8}^L6Y`Pt^sjSQlf-s9a8&GL35s zkfTClS3&U#dZ{Dk)X|tUA)BmA>y@dPLBB2J09$Sh%K^u3VS5w#OXr6&F`I$C%N&j7 zhtv9pmGW-9kjAmVKE!%*=@h(dDOzERbt$_s+_&mp59QK{UsDgvd~*iJ_7BQ&d!K2A zXU({$VS)NMEiZ%l*h7dV3VUGHNI$umM)m3Vy00Y%P^*KJu#Cy4?Ui0aK3C24 zleHzWYFApXnfv%Uc{wN=qH;pbII^jA{)*hh#^kSsCDG%{CiW>cvn^&-Gwze~a&pa> zd9&rOwO8eM-4Jty2K1&iCr-px<+Ex}j2G%sCTT!_sj^wS=aV;U4v1O_YCbuz=0=-m zq5sSIgxT+@1keLvMZ~orX{CV}R}TdR%@n3z*jw#wHfRO)3FG85&Xw~=Je%xd8x5wr zm0mq!^e9YSG=z6yEW=!ei5~@m&ZGI9Y9n$7-SAPc-dsLhUWS(xUB^s3#L+_Q+D6YQ zxuxmcZ8%htSS_~$B=Wh-;PWMmx%N?mjKj5o>jg_5g3o0PD#ZXb>i+>!81cW3-iujv zKo>Sa)AT30{NY1qc(lQK6vam3P=vh|aYQsgsv)}=S{fO>cOUGnA;mO0*vzC^=No!Q zM;e_nIJkp89AL1%hnYx7(-`S`Wr)A;aNYP}hBW9c!Scr-C$Jjq)#w~LKjGSSg5Fp^ z1<#;jrqzt6X2t{SaY*p|fCw%ceCuIX?%WU!;|BhSShPM87R}Fx(Bm*cO#TtKuMorz H{5S7EM+X;@ diff --git a/orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=11.parquet b/orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=11.parquet deleted file mode 100644 index 3e434eb7be30c9dce4d90de714e5f5d861119b56..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5074 zcmcIoO>7!h5MHqB;`~}^7Js*3s}64waA6Zqlf+sQwK_I;8duaVvb;LnjWm=wZp(iYc-{czgCJ|#-R2>2IM)blBTKWXC_%csNt`PZXm z0D$7NrQVeIJSK&ln6$;U!TFq>^9G=wG;xe!NAw=@l36&@mzyku!AJQwWAUx&*)s zDd3D&>gSiNbWJ5?9nHrs?`JOR+l$ou7ZIG{+7T)gs!TRZn1#3Fj zl>0bKI<8ljsjn|%T;}2tGpFMsh{NB0fAQTnPPj}N3RKs!;X0bLa9!D{$<4OFb#`P; zltvMLGDp3-YGh6~Gh)yQq5q>O;hSj-HA&^jD+GDrh8xWdoj^?pZI4*82(thPqjYX~ z*S>tq10zfqqy4)P0M`8>3+K+pv*EU+UbjEse|Db3i#p%oLY-^#Q)kh<)Y&y0b%sqs zol9YRfAM(Tl*jwQ>~(_hFZgDfoF;N@--n*3LEJMtLPP1QT03}e{&8@;or3Qz%m;O` z-d_^ddK%O?RRVpjt0K2^wm7tY428k7p{OVgu5)L362*vM1O^K4 zU;)q=p#@Mbq>a z{LuA)rkCk>l?GmwPE;{39v_qU5OZ}XUqv+j8CqD*&~%T6b@0SS;AtwAfQt7@Y;?C& zTnD-h>*IGBRq2+~h2>%-x5sA+9bQ|PH{>|aD6s}tjPI8ssZcSZ>o7{F%_OpQKGF~K zY_!6Gt#YxZ2m|I0lj?x2XpLtdq4iyn?YGwwv|Lsdg-_=zyqfBiw?Qu*4fE+9vSVP$ zRm6e*<47MhZQlMf_U;ZrrxMZ zgRH`TD#ku}WbK`##cVm)i0WS0XzoZ8Hd$%CUuZITh=c*sy~1 z7%f}F{gvJ_Fb? zV_0^}>^inLk-tQCC==2t*t^vJXnx9Sf3H;BisTYF7TAYyPb!>%mo0=!Y`!L@SBLvn z+3ulSTJfvOuAc95v&{B)ixG35X_;g7xSg;-eVmk+Ca(=~cFM4a5K9R5z^IXaY%PK6 z)AIFDjSZky2ghLSY0QaZu~oRV(i0-NnwW|; zp}!PKuif*>n=uCjwE#6A8yIt=&9Tt`#cWjX_jnZO0ly^RS`gL3K!_-Nyo_cF)6Z=$ z_txsPjQWJJ@);=?v-=zy>tY*qrn{b4-e`5bQ}aueO~ zQIKpb?kz6DOPsD@CLZEwp>b`Xr$cUOI&&8el^9mbYyydVZZY_LNn@^g)F9(SvY zk|*GE5rc{|K#lr;gcL^nC((O8tqkbg8ffbN1e-s6=nRiGSdXGujqVAsw*roc21r$8 z7eh-RqYoZ}y%nUGKnI(iG~;|j@90RQ>5gl6*= d{)m`2J`(2j&ja{*Ztm1OuYQR3a(}1i7r5_e&rU#2}zahzI6^1d(-O9idL<5>&{EUp`hFU=dbxl=DnGDGv9e{ zX5JGfQF-w!PgbK3Yo|{(tB%1U`V+)S&zmxj;}=Ut-jXqqT2xsbur?eGP1ndjn;L|^ zkiL=c;Sq~Mg5F+Ux@{mz8}QAJL&LA}4-IF;`xNp~<9P=P_#_1tkBt58Ju(Fu*Z*br z;9wq-DC=K*=?X%XM_PC8_#C0vmRyT{F^WR!kCtuVY6M8?x9|J5=@eQz^Zeb4pDA>E zNLka}CJH4YeM4?0g=}wmogO5hP~=zQs5yxgI{&g#a%2RB{{H-ucZUPbTXj371?(*= zxHYsfhC;E6ENiDHP^kICnLjNbN+A(i_*~|*6lz*O|AIA(LN`2G|DHdLLbIN2O1%6& z*l#I%XHf%%{DUipy=SA)aPQ2~^IsZ|{#v+oQSE|pC}u`9bLAi*vPB7}WM34b4=(;7 zpOhy+^SADq|HW4VWZiSJ`uaE_n%w;Q?2vXLI@4&(Ejlj^+9n1y#nP z=Jb!3L@kO&*1)rWEhvme8@`S4P}j$zXXZ}cKllDX^yQCHRrbv+q+LEKVcnM@sCMuE zkIGgLM)#%}Kl`J-A8OHA;=Pmlq8CQ?5Bg>W3+=TmVE$`;B>G_N&cKlRFeKuKN#1Np zLj0d(8_$hNM!&6INlxUJqt^J9$2RsaM{g%f`rfFsBkRs+?yco^#QH+!p%>c`Z{;z= z8NMCuA91F6x50+~R&uk1&bOh7izQz_Kh1^~q^G~*-)2L%5C7-*b5oBy?K*($B$O@&-b6J`Re zwT=;uvn_z-&GHUslN})0WJfxM5K+Bw34+MECg}sDj=^|_RZ}w)MX~*tEKu#$r3_xN zL?c))AGPY`qWzaE$FEtTOB?Z~q~W!a&q|tRG_Ep;{&C9m$vdWlSAUyJuQ6vvhb?bj z2gyzl+$GF#R^QM>!aE?8?O{=}JVNRiA=i9EYM9)A`Ebeq3%<~3s3wBtLBLl45Q6Bv zx)!FL{`|cQh(6i=<*(ZpBl-|o)=#jTPxpJXeMrMAskD76XTUq>2GQU0LYG}^@uc6q zx##-F%Q4YDnBfn}2rsp61`S7_hEc$ZGh`Yj9gXqTO_@ToW`0d-wmb{@)h$Y ztO=%l->Q>`E)Awd(}=M8@A}h&2OQ_D)dkY?cFQ(pXZEMBCI_`Fyc9@l@2%J|ZF)cY zcfcm)_9}vBga?<^({yE^!i;_4M!aZ`@JqY)Vj;TZ>SCaa^GUdo%^%)Ey!(m z?EG|HydvY4gI0W8Dj35=F^4w=NY8sCi26oiX&}m9*;nnm_dK6nRS@L6+ZQd|x-jCI z$lZ=MpZ?$d(C31qC{8rAZ_HVK!~KoFU4P>iKlbe1#xo6@9PO2gxj$}bKM3Zq5b^yHvaCdYvQ6>esIkFnVS7l_*w^H@8fkwe8Z75 zZJ8`$w#9MOR?bVwI?6vUoR)f3Aj5I(BD);-hh_KKccuy{PK?AHz3`3#c0 zKH_(;0A@^amXHxZuHC!Fi_P3@KRAvNz?^x%SyDS%Q|a$Rl1)oQUUIVe#UwA1JeBb(SXjjYwle({ zuK>~#C-XYO)aD<~WUL`?NDg>4F!K_&NC5l5%rKBQShm5kZ9KNu9kT8DZ@eN%ON+oO zh50EeM2t9caH%{ph55>re+pp5-muR=xB59NKy3;;5jlyx`P~d~=lPZo$qcOV!waMf zE=AUmH_0Z@JgZCaiejFwjiVS5GIXM@{ zz@+uwURZf=RS>ZFOMgveu$i~(en^H*%d6CZBw_z@_5x?Eh)2Tv=$f2xgm=f+;6ztjok2uO!NvQfEC;Ugbc_2 z@TWI0q)UWPq+cFz;FFD}Q$UGYpUn)ivF2>?sH>RG z8+L$52fAU-41b`6VRB(Vp!7KArVyYt;nlm?3^FQqxko5g`#QfCYR;bH83rX=()0{e z+jlA-XmCWXx-Zl!Ndpn&?U9|GF@TR2$z-Bc5hnki9t z8Duo^%Ygm^vD0s-hGJo?_7a0!fzqbbK8JmtUdP90q9^cPDi#W+#W$#)yCxGIvRU5|_$1DjDRG4R5TSjoEaG zuo|t1-VaXx;mGzA={C z43y3tuwq;m&>(W@cudv%PB`gX-+Vs}<~3Wde4@iNdt-whDDis2o6CTrM&!S{2_0s-aRpJsmtE3~fo?E$Q}5(#}dkzjjU)jF+F$?_*yeIq^a>)SQJ4+VHSW0qb6 z?`asgn^_QoI*Kir*S+(jb8frUC*&gy-L_Ym+dDLnNE4`+tCl~J_M|xAQ5p}X3 z7mw>f*3(fJ{~>2~Fq_hI`#RZ=a|SK4rzbbF7dO2f6ZYfcKh6<;Hudz>!F{5x9x9g} zS=dwl|90=J0*h8@E3-H&#h_PNl$I)7DCR1q#Zp#*V=vcQtokw&=Ef(EiW?QjfkfuG zop%%RK$gInL$H}tUS5uDjO!v~xAu$0pOM8PilTDi2Y)6|)M!djOaZT$5*9mo0>K0F z3d~IJl&9md^T(~_l#3~6j+P*EUDEG>{vgs~OY3zEe~gI$p!9WCcc z%2k%|rCcz(fG3KR^ISS&L!4DC%qmgwD&tkM#3CiwQpj}%b(J_t zEV6;EM4d`D1+TYjWR=!zA*ELs4F*+4UXjWuvMKUGFC#Htl~IB1xUoozG?|79r%ad? zuQD3Iex5QJ#>OsLsH4DZPr9HA^9r#JS}022F+t;>pvAOUjD1Rapj= zQBtnR0K6EJFiR>;FRzs4`82-^<@cGeJ3tqR#id17~?W3Wm2m`lxi=K#T&4m z*p#Jn`ZEc1Dr25b9iJMfl?z-lqr$3!J(85<>T$d(lhmfpG<1*2sLDvyY0?UM#SHpZ zRVHA|d;m+SLN?K9ugCrgvpQw!3=y22XmJ$*maJ7W?xG4 z%xe83a;8hxYo7m)y@`K7{->}cRw(5z?7ienjyl8Oiu=*~^5~p#?OTWZPo0&Vr!%WF zB_&Xs?mqEIY(>fpLxq|r(P>1fW~eWN)|I2~CKNPM8e#Qm~>BUu>>T zbKh@xj;}OIU{=64=)(mD>nog0ETkwW(zJq3{FPnYhw(!XDWN8+-2TMa39JTt#Z)G~ zKjD+x6?A&qWAIcJxVF_5Pq8Z=Snmu8=s1EMVOCmZ*|oM9wb>lwd_+XJ9|;kz=R@cV S@LRW@{|KrdK}>?b-u@Sk`Yfsd diff --git a/orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=8.parquet b/orchestration/datasets/DatasetA/Norder=0/Dir=0/Npix=8.parquet deleted file mode 100644 index 65207d6ab56ab7ad564a8869479104b063a83829..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5802 zcmcIo4R90J5kASn93BUZK|-dU;%Go26JtxVg)O_7JIU4`vW$hXelo$;NwOq!`mug2 z3x@<6AQV%ew6y%R6b3f~v?VYDg{DpC(xx>043m~=phKI4bfyp{ZPS)ClmhL(JN;Rb z9WsR*>%Dio`*y$Gx4Tc)a?&Cf3WOi?g?IQSVVw}G#IQ5Gk59k#%^BqH?A^qMt9#Tp z?rVwCPfNZiCT=V}_266By?tupP!b!x*YX5G8R!4zf=x_RLx;_onN--Pj$r8 zJ6{!8<}M-z=3F*?y46VRs(tF)jWP#ObH>bYhQ@D7&NF zDcEQvW;R#)j_)!O{0%b}IQAL|D_I`<%aa;ny+kDcsIG%}<%M^9#>^eW>qqn(Ph3+I zd#4_M>(w_pi2a(XqZFnkgcvxXsD+_ISSl>77UOA8#CX~V5w@@h6~nMOHa?7XyeU&_ zHj`Ua3y=J2?}+TY;RjQs7X{VAvMME3T2UqDN1Q^ws7`c7AbL|EJy0UWKaZ8FS_n5L zYz=UDN!hiXtuUJmr>SV5LIC=KKPYleV*oS>PwI+s;>9)Ujj%2W-X9h>t>zQ>| zyqE6&m1K^X z^=>Qk+fd)sOP{ndqYvJ`_>Et;GG(2W%g&!_WsbcF3Xioir~BIM^6#`V{D1CvdfuuQ z=ER*&=l9=fVP3mkZ}610FoBZ90duE}X}h@U$KPEnW@b$PmvqzERHiWeX^t3A`-H+@ zNS!qN=gYF;FL8TZIq}~=O}YskbD_ow>`Zh3$8++(w(XP4Hb8{!?B3PQ!^^LqJGO*} z*G|7NU_#VQ#=~tu?HW6{6sDDBo3t=ZxmB-$$#Hg{8m7`$=B?o2f@gM5!v*>iFl;yq z!)`AOjV2iGlQ8s2r{U$B6Xl&eqI~+|>s>q?zkT7_6dd1n{k18Zzx2c&$8u!ua?-=j zg~3h36ypdXl0{AzQh_mXZf~fl77Eh2?RTeRd#^-#b-HwOImEV5;RI%i#ugWrn}mF} zvk1x&M)0FjD9YC8Vr-!R(Z%cti)l4n9rEfhDN|W3RnqR~(CpxecJrinXGzb`LU1yB zbF_Rmtn{v8u+ls+o?hj_+A${!OJENBhuJ7`xDJb?cZ4ji^yVBjMRCD>@Q0jwID=v9 ze%8=*Wj%%w+M~*|FTwosfA$^NjA2akrC+@BRhS#!{M!620IKQu##>M;bDNu&jKXWr z2!E7^>t*6qhv1bM{L$XNtuR+%`=LshufKi>(mio+3|@$)6Q^KW_e<5!(c7H)H-Qz~ z{~-^rV7|TMg`Yx^{C@7d_j!1D>~Ak$2PN3GFYg=K2GBk@^5DXDc>nK~XYXv!uaeYp zusn1eX-6ihh{M&{SWfTr`NL6{GuG!1x;?3=3PkG!(y=c{4~x^Mov*%FRI9csF>z_s zYylr#CfowGHMj~8$?t~y0T!urUm!C5a25{tGRHN=Av1AwtO5#DkE9CY_Ksp!?gp(OYh#K&?Vu&8_#GGzt%n9Nn5>F7s{Xs9< zyRt{Ss9CahZ5GUU6q1M&?H`9CMax6cPlueb#f^n1au_)z9%aar`NQ4Vg@&^9JY#WC+gupF40$qR&>0IYrHDn$6gwtlhoets z2;HxRmeSNmYAUZ>3bL?}|9_ijt03ZW#zGOcQfNOFaYmA;P{K)PBoay>--kSrs6P}$ zRJpQFRwt7{B1=-nbC?R?HAvQC;&Rer>9V$P_n-Kq8TIIS#;YYsvKv0=Sw@nLq}odY z%}Z*$EKRK*$4?+)M~2^l*6b%G#qtd#nIK^w%&Zca(?WJwsS|dqa<$#o3F|2AS2q~~ zbljoqZL}%OLzKQZMg=?lU4AuXpj9EKO})mh(8_EIPR2mXq6Up|fKm+0DXX&I3A!9M zAMHvS78|rN(53WI)|F^~+-)6>nlz-}5uj;GXX&Q`+L&Vn$mx`FN|!)-QdrFWZun1_ z`P}XHqL@KGO6dWc zejJw74r?c?uONOJV}|F_Y2oa&YjWe`2n-L|ZQTm9hV=!`L7wpRPJ-9kE3;cIKDVwT zb8h`B5-65g{{!@DF5Zr?!#cd$rbwSN>33Q=zl&jk{Fp2*VJeux8FRoHf-f>SgH(^s z{u!~6kK;cS7G=U|M3q0Y*t@P%D^^8nOls!vRat>3Am z6E20>=hmviP+zo%%iYHlw^Sc+1$rUpRmoJ}h&ruM|2CtNt9P{$)=8?t#pZ%L(3^BA z=po9FdWvD#yrMB-8X*0sP7K+2DjYWB8mCniXLSr1;+>ktHHM%Jx-M$M-O!eyFGI(d z!GbiR{;ZCo<#IHzS3!HIeyF}4Ueu(IC9+K%ZA?8~Xf6_4lGHcBrJ`cR^vhw1Y<3&i z`$fl6(^m~r4o?T0FDz#eY}T`&)CO4P+FywTa_qOGcZ-frl4cV~a`s$#_Sj1&bG5;K zm*f4 diff --git a/orchestration/datasets/DatasetA/_common_metadata b/orchestration/datasets/DatasetA/_common_metadata deleted file mode 100644 index 787a029039ad634578bbfda9b77dcfcfe18001e5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3305 zcmcIn{Z8XX5VtslP8GUJtvChg{y>)at8gTN+>w(ib?qdMa}Xdv?D$k6eLpvNDMmRZawW@wP`9`}Gv z78(yrwTzSrzcG|+Eh9160+8#kjA$Z$DIQj5%Rp240@x7T9xtbFB7O-B%6Azl6Rv<+ zwM+zu$zK8@n_?O1N%~Ep7PiY$6)x-lGd$e|p{>V*knR-h8lfJhWTOPB9)^R2#6Gga z$Q}4ZS}48Gzt8hf$b1Gqqb!2%kbmTIo2b<~Q$DdKmHYeq19JV;t09Cg;7hL02pu7z zhd|atVvouMVNRbAVRfRfk}>^eR#Yz|lpvS~cj^T0PevUW2}wsO-FDPd7)LN)_$YgL zs7dW3wb&dP^>%Fdr|y|67&0yobXBrgY!nO+gZ~zm z-J}Qp3Wh3_)WcTEl!#yC8pXz)rZn?RU%-}BgLqnw+#mGpw1Ew;InpHHOE`&*cDve% z&%{Q#+n(coPwbV$$er*K8)@fKCcHddX-8VEJnSe1OzdR8;ZQtZa15{In1yoQRt2W> zw8(%QHTxGXiP!Mkv02BnF?ohmcC1QgC1<4{MOs$hdqWlKPxtU3ujcYBhb1%bb4ITKR2)dBo-~ z*OsN=p;hUzb^MyV+_nPKeWqrnx|MbQFS#kq>A#9gDbZENeMQZjnG$AkKcAQ9Ylh|R zN&i)Ar52sQtT+3xH?uzREVr&M;lwO99jjIjV1Hqo)$ae3x2zACUK?t@oMwF^(iPbM zYNN#VyHJ90Y8;xh7A&uwnngS^T+&nIar5#hIq4&p>=SvK&!VO_?scU+q(1uc@Kijy zm;F3+UDAXvpe;jRhK`?y0cw!`EJS2@PJZZ7P#GMI4i4Z!Kn@ksCQc@1*BQC5$Sp$k zk8r4zsagFTh6M9Mrss=9rB6o&sLnkQh% z@XQ4~dgx4!HkePMsFp@1tgT5SLO`iU{8DNnar*fR{2dcRksNF+Y1#Qkp2?Ahgg}E| zu!jQ-<|kB0ln7C#mCl6!cq(1w4?U!hPK^vtCLk~y{Pj?soS$%AOoOggUf`29htd diff --git a/orchestration/datasets/DatasetA/_metadata b/orchestration/datasets/DatasetA/_metadata deleted file mode 100644 index 04bdc4b181ee8445eddd4962f74b309f257d923b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6627 zcmcIpYit`?6`nXI>ojfp^wd#9i7Yx~*)*}8IE@#T&Df3|dlM&dTfZt4&BL)L89(Yr z9H(0ppj4{`iQN@~{ejAARiyr?R%I!oKhP>#B2=mstkj|+;0IJZ7gUOPEm^d1?wuKr zotZr34I^jf_?&z1ch0%zyZ25!O^5AfW7n6Rgr%*>rIQ)P+)0@CoG@4n1x6BD`6Ne} zjYuW%gxLT>TPRoHlZ6xH#ErwILj)n2TfIWzL^r5LYbs+XFDz})b+9rEnO2`&smNSNW-7D4Zw(SM}wTU_2-$wg>te4&AQEdx|~GYfiC2w*+3e|+b!g3 z3pGNSx-F(6xBS*2r*Bl1NTOFqOLX;_jb@05fZG9h5X83Ft40+YC8iGiW-GPQ3h?=F zzt(o|=3kysB?Pb4HUt0W6WuUda_9nIhyHpS^}$B~eTKa{4S^7iHL0#@Z5GDkHE=x^n z2I_973WY1R4(s7Q>WzK7h_!w|#oEAtI@FgBP!IPjsPF&rxIu~#OF9vf@0dsfWg?$% zKSB_jGH0m9oaZ68jm-I43;DQ%T-i(grX6$V-&bEUO;U@%9q-Zm-+TMX$Iqbh(Q0*^ z*@QbP`dXKYE_0^_|LRd{Z6B2HOZOi9&AD(8@W1H&`47MOyU|sIuhlk#|LEiD6u?pj zX_w&NZKv*9smsRz{?VuIhorhX#vU9-5`x!in}L7h6MF0c7d}f~X`${@)D5a>`lw0% z<&$cVGJOC7K37|)N4?aWy-IbV;F{u(iuH1zily+U26pW!>hGV#VA4{6z!a&O^sSAk zU|IqJ?41tsl@998Fm>ak9OLLuzSX-H#B0O{y?_1as17XomIIOl6k4-wMWK-zh2E~) z0Hn>QpS*1(*NoJwt(ZY~zPg|OX?zAMxb0u9$1S73#|+YHb)4CRK`Qv&JsJin=vUjR zD{WB0`#b+hUAgwes>O8g3!e|)dUp~NNULoI{&L+KCJ_ju|EHDuStkW+i^cTmf82{n zb@l5Xy?z==h(B6wGw7>_^!U?H;@#&D2dUMAP18rs@WUf&mC5w!C#BtI{V;X+@P}rP z3iaYq6-r@`f_0sw-ZNt?WrMSr6seiqUGG-0wB!L)Be`Ovt{tPUTa*a@`@;G6zJj>3 z^8LY64?d@ZO1^W1><8-HIwDc$W4h-yyS>Jp?GA1~xP8p~C$0$8Q6yA7imYmNe6ef` zzJrt7`{xahr@kHl?kEz%S7cS&6#n){w;%sveMlmYA|ZH1w$!Ai;CJQhwr!Gk@$j}S z*3M3E9Vnx4Sxk!5T>9#kJ}rR^@)A*hd|Un1Z*PA0JBUD%qyyV{eCxBc<(Vwc6fIve zU9{qNqggH|mNVI+_357T*2dQ+>u36Utdd!Jk4oZJ7p++)lZ5u!vM54}IUyGvwI$Bv(TEOtQrAOo@T!7p=)GG#9cd)c4$3-^mf{ zrAw+ajb2y_4C>$Dg{<4)#hjR9O2b21FX{|+ZlLfk^?^5XXjXU^(;~^rU4{~PEs~_$ z03>8}BdV#libr|{)&i;e4Pa#$c0`-LntCfRc+Jux$x0m4>A-w0m8=FuHWOM*_`zS~lP4I7 zjKxb+o}ge>wKHs*cicyRyao}&HG$=;Fp+`YWM$> zH+dc4G7+eGS5;m&icA#v9}7AqzPp_uSJ^=h*8-o3R5^#Z$O>pp2`q)q3{@uPX#w#h zpytz&hy|CKsH=>}m=BaEJwwZZtPNIOv`TmZ}g z?36nIQa%1T)SwREi@t~bVwDa}KuZaa+kl5Ro!Zt0{ZSNS&P5L9mctRz;8F^Wi(T`e zL7zPf9hHwVEU*;X?G1PBlyK!5;&{0k)0$y)iS z`ZjFrmaFxh-AZX=D65y@tkrIGZ$iIub{U1}TmN=)I@DxzbD{I+?dj*wgZcQ@xcT$z ztv5-$%cyp8@j`aIxP8{+c<<os*NtxD zD(Xkw@#}?m55?OW@f*Y&ALYKc^@DP$vR@nDztMQNL*`2>x@=}gt=B&tmj?CNNLMez zNxB`rte9Vc-@r`wFWt@e57eZGjUqTfT*>R-gm#KC(y-B2yn z>ZN!Y7~=I{b?0!3lG7p>kSF6Q`Vupcj{$JKXhJ6ZppM7_@C*Flzd zLobP+IwTnlJr2qzwPZz|Ii3$%-CjQo^ujOU*UfO&=ta%nhC+;NR(1yA{@1^)H?Ll8 z{u%eL(wHV(C|CDSYkThx<9Z~YB$W5c^-;fayBU3pEAdxIkH^J{*>Bs9^euUDwEK5y zZ*cu?9DJ-)Y9BuRP~X}=H}Pi@|3=-$`F&Z_rg7xsos)Wbr;-M?K2Ie)6Cglfxdhf` z^H`rv7*PliAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oRW|!1@eh2}OVafn^eKe|uatI6cluD*^-v5FkK+009C72oNAZfB*pk1PBly zKwxnKet)qzYiA-rfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlykR{;%f@R6rPk;ac z0t5&UAV7cs0RjYyE8z2j;?`c~1PBlyK!5-N0tBoFi~#}!2oNAZfB*pk1PBlyKww&d z^z(%DdD{1DUrYhxrI>Zkd)CHnzA8AWUXzaJE8FbeJ~huD-#_W~&)>U})$T<`SAT4u z(QM^*7cK9i<-2kNRzgz=81GYY-8JTRM`2@y1Qs=}7j>Q$D$QI{QHQy(-Ua1*M`42i z0RjZ(DB$PK9C3vvK!5-N0t5&UAV7cs0RkBU*5es4b`v15=Kf$1%u~Slny0SVc^AmK zKC<@bJ>R_czv%VI`d)fo>$#|Y7j@jEd8f~d@^&5q1SSi(E+-r1p@ll;p-TF=ssgUR zRjsVN9uhF_AJTcgW9ssJ_RdUz009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7W-Z|V3e8$n zyr&2lk5AF_UIGLN5Xig0tbPv8Do)-!<(;>R=`R5S1PBlyK!5-N0t5&UAV7e?u)w6x zx6nWPwGE|3#f=R<+Dqe`l4q zIExjScAQPyezDm&8vz0Y2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0tB)J*0T>eLVy4P0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pkGZgUm-7~}#iU0uu1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FoIE0_!W7 zlavS$AV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV45L0=~zVpIYfD z0RjXF5Fk)&0rR$E*I)Jo2oNAZfB*pk1PBlyK!5-N0t5&UAV6Rl1^oVE8LOgC1PBly zK!5;&{0Nv&=BHMAN`L?X0t5&UNCeE^i~|C$17m;y0RjXF5FkK+009C72oNAZfB*pk z1PIJi;HU4wU(E9)ViO=hfB*pk1PBl)xPb406?_0lpTH6dOnYCNw*3;OtV;w45FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1TqEu z{Y)l}0|W>VAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7csfu$Dk zJ+yn@T>`MYERV7Y7mG zef8JZH8%+$Y5ZZyll=sE_1weyvUmbKaeKTtkxPJ^x(8#|atN>?zyVyEM1TcXG4ec| zrvq5onaKpm_|lPleJTMw%6Vq`1Olu#WO%kz5kU0Dhf9}!2KAF78`ql% z@Iiyq=(VGHpxJro{e&hSxK|$)n4HE1gKpgV`ouR}K&(D>|MqJE4UfVYzVD5s5OCE z_AmI191~b_wl-i`rwNSMl%5n|Gy&1>{VmG*CU8l5m%FdQ2x3Yemt8t< z1Qyl$6wWmx7#7^(nmS~w?Yl|r*oy}er7cE}KN)}*U1 zuD%85uo_%^8LmDY%xQ#=w?l!H;lM0ro^)YeaA9>hv*`a97(O8|a507h20tSp!LR`X zS)Akh!TLr(S}b*byWWc?CzR}M-Ews_S)Q=v`0w92lA`#Bf-hG4ljgQ*gFdq*tu2n@#^dgfI z%um-`@*wB6onY6f-N|!rw~yq`rFf3O_8LHJm(_Dp+*TX(;*1(7dx3 ziym4$2f0rV&iK_bbH-?L~^77l3BwVFVR5$_g@<@D)?psUEr^}p{x7S*NC*2I~TK0}**zskH5>mS?lhV}L zAMx*5K6i}1_Ud#uFtKXg-ni>dAeJWG5Iol%oFCf8cw9RKe7@`9TEqB*meB!z9HnEU zWyhPp5q^{FaaG7ybvHh|hnEu$hdV?)vc+I+@g|UyiaLv|&tR*w=NYxf2JGvA8x{*?%K*vf9(oBq|FtJNj}zsQj`8&yW9y!-Pv0@h;+Q?hmRK zd~@Y{JL*zfaM_`Q*yHANxYjw~p3A02Tsz+72qWbfzI4^-jBC@6;@?&nCsuF2g?q-v z|8nNgQM`RhO4=M+3;vHRy@s>pHvawj%dS$+RopQ;NLtm=fuETF!^!gcR=nk7*P5o1 zW*pqVd8%-56CRu>B=&vSfDcnG;y5pBz`0Soa;ryg$H#gcx*w@-#|KBaPn+@O1$?oW zxp9mK^jl2%V)uwhE#c0%5Cn=XnRd+u{Xc(|9qGc$W+h31u? zO2&FG56G^ZP`N$$(t%LlZ>G+=6uGet)#3+ZdqbF3N(Jx%7v>2U*77l|T}&9j_6DB= zW558WGu#pw4pjI$x27>O3jG;CF5V}~%>r^Im-9AicM zod?tDHF6gk-L345)Rji3lSjJ-)7>!Pp;NBj^#0KBBB+q{l0? z6#7rE`#>g`qHG>a5BckiEnKvq`_-ZZ8ePk*aFipvCsr-aMRvbj^S1&>!Lar<3u*Mv zUV9L|8?hrtt(Y7K>1?;!BuKHtdA#{(ymwjqYBVlff1gGV1L8aHuS3JUA#Z#F5k4l$ zAq;S{CG8Mx#O)2CycMUQv`qEp!Dgg2D{;n9KBTFtC&>axnZAbNUh&{5&DSEV&ztuhp_wv+xd@4HRmz2P`)wpwE_ z%B7~+8ofek4b06h%%v`@pF>%dLEQ;sXMdo;^m&(qc{>^9YNbxf7=|%i10B%w(b_@R zueC=jg%3g~>Mj_8%!D1DboeU`d>TS7%Gy9*$O1hqDr27EJp;Q}$}INr^;z&zpRlZ# z`>c4WCI5w#Y}Sh_v#b|b8d)zTu9?dwGK*PsX7xlb4D$)`b}~ry3aQb1C4Ig>l`^yC z`C7A1Z}gw;_qu=oZ1SHK;^%J_ThmKPY0dZ7OLa=9uQzM8P$O5P)GB88=UJ`HHkBBV zqQ6@Lv{!m+Q))`8A1o^T&rQC%0I;BafkeZ}WeZId^57jk#6`Grw5jQ!?f8oMj zF#S>Z7f8|keki(m&!EWF=1Wb}BJ5H05%ht9wzn_@qUqk6Kdb@vmb#fQ*i@;#A&T{a z%~&5j_O^QY&logo^rk3#2llZ4$bGOh+FR;o4q#@WgF<$!{68{;o+$Q~Y}^-W>Y0SG zi$i<)pEghBL4i_g$}gaDMXQk&NDE3(su)V71qJy{9!2j zcN^~`WJB3h|Am;h8&Bjn0>5aZ9t=aba0(tByb$W)$#*c`|l!xJ=B+mBO_oS!%7k zL@-SdXM$^S)G~1>wK9HMu1ptalB7dDJ||qpFGA~dV@b+Y#A}Nv z8BZ84(`n)QXDejJ-B$nlI(N=!ljdnMMgS?ICyq(rG5P)(G?eFXQi@}c+_6tIfNG}MSp-- zEDFmOr>PbE*q(FCO)o;e?6d#;=V6)l|Lm7!*;KgyEWz{e^pg+dAB9+JR>Y{DcUIvr zro)o0zB5T+TtWYa!MNy-+v4a19?BV8oD-TfzP7{~ixPuGTs$|6J%DVWxY3IYGW{Q% zQ-Aq?g(XAJH7Kpv?PGW@r%0M*-S_kTVvAAxytDevChwnj&xzq&DPLP8XD6u@anS~t zH`?c78>rTx{C`P)%YyMO&?S{f#BpJ=sR9)D@Jx#h*)Y%L`ngn{fjJEO!a_v}l}Btz zmatSRjy6;EATKsNA&)R8!n%TTzSo};tz4gEkg+4EH4-%uMG1L{np~oct9!RTtCCh_uMFA6Hg z@~BG!UM^^WKGue`xXE(9Hi8<@EP(paR9BP&yM64Vi>mi3f?XtwO*6<nWjCe#Q`oR0l)~K!$sS2TwD0>2e z-Geug+WLTa`Dxnhq8NVn`aHBK)Pwf3;jKuZ3lV{(=EJVdK6Pk)>{Gjv>#T0H;)nAo x{BTw`#k~Ct(t>$rrD?L9hQ#( zrVJB8I9oy+U@&c9h})FmD5W7s*Oa3rX{JmVXoqyhG-RNJQl)xSl&p>&x%Nm$ zjC=6p$+;J=tH1igC!c<^TSNE@N=iD)#}U2P{`BagO+;MCS&=0!oJUMNb8VdLi#Y^! z`Rmq&Q)UyYTUgim-A3Z2=WqP&{z@Y;^~)<;@2@cu`I8>6>3qvbRNMKOV!e_0a16%8 z-ZT;=T{}B%yNtw}r)CnjpVJaOU*~%6&Z#7JjmNr~By;@izC2C0nwh?1I2sFna8p2*Irn@fLV9#XW#$nr3K9L=@?O6BV)eloo;%oOvI$qoY05_ z3G9gGbm_1(-yGdfbC%oVO5ynkQTNC)FogM*m}9 z<1uyiE0zy#DZ16uq^mc**V3zg?V$C%;`9;q0TYqu{PoA`%ijG5{@&WB-v0ZY;TeZ6 zs>=>9{qC8oSJhQlJMs_dAE_IEG3##c(LVLbd%K>^-*H_%b^TX$zkD`}nDO$~Q}?8I z)H7b@3HE=Wp4qqedhIjU)RU_gJrsoxs>^;?Zo07jw7RbM>Zpl5J?cMx=i=C&AAg}v z=Q-g}BuGF-`%jk+%k#+z{XFwJ5-fi&Te4-mfVLz;FQ_$PSvc%!~9GAR{v$ztAQ`}_41)ctAYGvp!jCj9f z-sQCrmHdhF>^gXFez1Gr1`O*e|Mb+Q*Wp`vdh@i6U{`i{{Y`jdQ|8Zq@eTN1(#d}V z-k40Z{B52k-dN631rxg zZL8l0bL`TTseSN$WyS8!E#UOeJ&!-$I=CJ<<~bc=J_&i~R(|xEe%i*@H+h3$morlD z^}9VBfqhxROIeabQzV~Hjvs8A`%DBDY*J#PoUp6O<8$KUF?`H)KDsHm3F>9sjMo9Z zA5`(go-V9@H%|a}E8GGgMcFmI5JW^$#GdRR*&x{cTA{R{u;A6FdRs}JZjY8c&DJy` ztwl0&?LabuB`kR=c`e+QCzMJALMAUC!wQN;IYFo2?F@@H3p&MY$<(_7bkye$ix(6v z6A!E=@hineVouGKmxs0L6#Jb%57_&oGz}IFUJvcAA1LFZ)JNKb$WR>hdtCvyM-0~f zs4v)_R0ODIk1K-YK2OByc1E0F-YNF@!QAU_L~EBW(aoMOUbU(p%s>?4kQ1#RfFdp% zgrb2CI3s1H=_pbZDKOCXRuu!9_}0YG1JWbKg(DoQBRvo+8sRY9<(wX?Up^>c)a#GT zPbV-b9*#a}jp>o%!VF3O zw3iAwL+z+kg6+;wC=f$?Z}o)2-hdxT<;pp-IWjS%vN#?*MX3N?iFg$z8cUkZwbljP z{U_?1Fc&?uNF7O%b?_fO3rVt+)HEVlBdKjN0v@C4U8k;Fao1C&L8=T=+rrga((CZi zG^IB;P(EG6u^8<1N;##EAwO{}rUrKv9b1;Ysnr@)4VD4eJCMD# zB&fqsTvGM$xCdy}Sa>-RPk4?kre2y>$9GRNy zylB6a-x6_E(W!g#QF>LAyTYCkGuVfzD!^7X085L*TEqHF<4RfsqoSm*Cl^=(% zt<`R;Q<${uUf>+$F^^>!yjF|MZZ$W#^_9tUYgimZT&CvRaP2hy(Hp_?1Ck#PhaIRjoxelgM+@Yu3~4TrN6-4r^PB zO_4#DDR2CiAmtzCoCfOuBP>h&nxKcno}R-k%9zu}?R#im5;>Y)cN}jC{lK~#B8MBC zdOGG(n3~)=RS@ci9uixoUW4lYDe-25{5JSZ?GCF>MwJ*)-ou%;1?^DhE`JN#XP|~* zZfte8vvs6!*o+-ct18O&gXT(kbu$^Qfxd!jzMoGA?ed#~l%kaNQR6L*RX5jo8`%Er zLan$I_3F83WORGxYOW=xSjX zX`5j-9}FwC>?Hx03;I9<_aG}xtxHdrvfm3sV6S4QmPIg4iH|O-{;LS4NLOhNx*97M zgC!TQssm;z@w8}7YCQJ-Nz^B+1W2&8I&|;Bg9dF3`GbjuY(p|~DH`BDOFZn*sW*8t2fj3ScN|BUyGcA9mKc8tw^E^5rJjb!!!|}J+zeg?5LQ+esYOj?Zy$ z63QF2Z_=mf6ZBC!vra;S1FBS73+(aC?96XwXJ(yqp_$uS-~O%0Wi|#@*YV7)H{90u zyK9-X&`gL|wqqsKcN1=F4MZD_erP-4?%UHpetP{Q$8lThz z5%0})*9``i8C|=+?JP*&a@(8Rdzp>E^lfvH`Tg}B&or*BJ{@}gfd91fm0w=W|FXBk zC*s7K!?f;r-}D^N_lJ~%M%#6$eZ9;}yj(})02TS6?^=D^;X&IUdck-u=#!pfMacJ@ z$h6JK1ob=K@j>17JGl1hOLhO4zrTL~vy6fdOfYY9)%oT1{Z4~-R z_|G}Qwn%%LabMKcqlQV_{Nwdm5DJ3WL0(6Y zI*ccZOY#_*mASlf0W`kD#jG&7A_%bn>)^?Zz;mXg10}|~TD;M_CxfJ(3^gh+1r$V9U_D)EX;CN;Lvo z#V%2=aQ)C$M}t#Ia1D=AA~)N_t478p=*h(#kz=$o#nNcoCp2au(p*FIKJV$N03h^|%O0VB>#x+X3#;qX(Uc@BTTCIv6ok_J) zyS0RUg8J&=z@6igYGeIUL0q1xwgyJEG}P4`MLor@b(ue3>=LipwQ{Abqlt;kGX?~5 z)aYHf7%%Z#k##~BWAcbx>e^*}C1%hckQ2amvV`TvQ0vTIDt}UKF0|J%Tm>|a^(|QK4+REx`vun$h`M$L;W6b43{Qak}9R1IEIU|PEUmsyRb`!31bOupQAPt zyp?D3T@Xs=9}f=j^+I7Q*$|%}6`+cvy_%lSFwfuP(ZUuChQItzUzUb#v2xGYcYOK24I zz`qWsqJRe?f@SN0ocU*)w&0&dWqC;&6a28A;fGbzCzsg?%<$XLiT14^*k>OOJJSz? Xo#c~Y``33Im;U4a+~l}(_+|4q4AlRM diff --git a/orchestration/datasets/DatasetB/_metadata b/orchestration/datasets/DatasetB/_metadata deleted file mode 100644 index 9036c270383717f43228ff0ecfbffe7390b5b351..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4947 zcmcgwYit`;7M>)gIIxtK;K5n4B^WZeYTDRNoYqLyX6(d{P2x0e>#+?eni-GdNoK~z zkJwJBw7c3}A;e1v5R{4{BqWfk3RV0-1)+c{fk6BM0Yd#zkN{EogGYHttlB;2j-MH) zyE5G!B{OHvIro0&oO`}|N8ZOqoEB66LvG5tDKDlamA7oBEY}QmTf1|-F7zZNQJ1<_ zDN8q)Hia{}q?8+?XO8!oZ=fj3VghZ?q?{WX0M$fYWeR8I@;cq-g|wW_i+o{0){;__ zm43dff4KjC>$VYn`n`@3m=W$c$=!P3TV$R>jym1>sm=OXmYhCID>Th>6ph%}SVI5(Z zb3+76&jhh*QYTo?I~!Q*7(h^8=%xSJ%X~J%9I;n3T>Icf`{@v7xOUP0{hub#C+FHnDg9)OUFKp&qMcI_uHjQ@9h1W zbdOQnCYBAD(*Qr$MB6HJYUpp8nUAl6=KbZZq3|=~)-|iy{^g}(ub!JF%rR;^fj@P9 zBX`0a0{*y(d1N#564LgTe|$Ut+O{6Z-E?pCjq|Q!BzL2>6YvXL+cC!u8y7I==vL;! zmX4XzDDYFTHLYWgoz|IiqK~+kvVfyHN%N7UcUJLqsW|z*GG4M^4B(~^kaRr9hkG$hs+taZp!jn zlFwToHXpK)V`)LmC`DDv+wL5^+tzww+3p=4wCQI32$P7{A)Cgl68LLHMFEeLEGfx_ zR-KNrP^e_l(N@%CF_V;R;H?$aY^83<2uqS!K)osz_#|K8!Tpd;(!gET7IEys1OAhNvR2q*EMQ%Ix$?moXGE$h+rYG2D8$kAmjlAjC?ir$dqttv5ysGm zM*HuECp^eWd?Aw~Tv22p$LA_Ym28F2Cce_$h#IqpvV)7X0U`VqW#uzr?h=iwi(yI6LN^)8}rk@YPSo!8r9FQIa(Zl9V5 z8Qr~bS&lyuL#r8f}qXH zbFx6$jl@hV}LQX+dVE%{_luL^+pHql= z-LcV#TMXd5tSq=f_a(TnL$^DXWWW#kMAE^f%%W5YDS{eaNdzEXjL8>@MyKM1IbX<= zinheQivHYaUarUG3svI#gBX{}bJ2Xl?~TP(NR&ZW9B3SUdk} zbA1wOc_kj3cZGc<7gz^pNs3-3UM}j0bCGm1FjZf-)cz9IvMK+}MzM_lpX)Lwj6(jg z;70MZ*xS`7$tlI;P67FD9}H zuDl#`wXwD98!eg@w9BkhtNp)<<$&hRN_yt$-fhss zDL$Z-L{~VS^n0?v8)d`T+U+&S|Nl|lV$k0~HC#z>eupp`#J-0$EobAvb5UC+`wVax zVP8ScH|m*CL@k^ZTw`R8S$V89votHG$o?$i zRy>A#^$1E~y!mibW(B-W*7kRn_3Ya{GJ>!3K{AqT;`O5fTybmn8~10}&v)R_LI(Mk zK-vS2Q$BJ?z~O=~c+)TR^3I6?WsJPvl>>hd5u^J+Y{JKbs&N#7h_Uhf&oh oWgV>k&a_j^W_OYghr#NH!Jz)h(0~5`MH&C7Kp#czgP+s>4TWuk6#xJL diff --git a/orchestration/datasets/DatasetB/catalog_info.json b/orchestration/datasets/DatasetB/catalog_info.json deleted file mode 100644 index 56f1340..0000000 --- a/orchestration/datasets/DatasetB/catalog_info.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "catalog_name": "DatasetB", - "catalog_type": "object", - "total_rows": 80, - "epoch": "J2000", - "ra_column": "ra", - "dec_column": "dec" -} diff --git a/orchestration/datasets/DatasetB/provenance_info.json b/orchestration/datasets/DatasetB/provenance_info.json deleted file mode 100644 index 6f01c9d..0000000 --- a/orchestration/datasets/DatasetB/provenance_info.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "catalog_name": "DatasetB", - "catalog_type": "object", - "total_rows": 80, - "epoch": "J2000", - "ra_column": "ra", - "dec_column": "dec", - "version": "0.2.1", - "generation_date": "2024.02.20", - "tool_args": { - "tool_name": "hipscat_import", - "version": "0.2.1", - "runtime_args": { - "catalog_name": "DatasetB", - "output_path": "../data-sample/hipscat/", - "output_artifact_name": "DatasetB", - "tmp_dir": "", - "overwrite": true, - "dask_tmp": "", - "dask_n_workers": 4, - "dask_threads_per_worker": 1, - "catalog_path": "../data-sample/hipscat/DatasetB", - "tmp_path": "../data-sample/hipscat/DatasetB/intermediate", - "epoch": "J2000", - "catalog_type": "object", - "input_path": "../data-sample/raw/B", - "input_paths": [ - "file:///home/singulani/projects/slurm_lsdb/import/../data-sample/raw/B/datasetB.parquet" - ], - "input_format": "parquet", - "input_file_list": [], - "ra_column": "ra", - "dec_column": "dec", - "use_hipscat_index": false, - "sort_columns": "z", - "constant_healpix_order": -1, - "highest_healpix_order": 7, - "pixel_threshold": 1000000, - "mapping_healpix_order": 7, - "debug_stats_only": false, - "file_reader_info": { - "input_reader_type": "ParquetReader", - "chunksize": 500000 - } - } - } -} diff --git a/orchestration/datasets/mini_dataset/Norder=0/Dir=0/Npix=4.parquet b/orchestration/datasets/mini_dataset/Norder=0/Dir=0/Npix=4.parquet new file mode 100644 index 0000000000000000000000000000000000000000..1ec63cea18f9006a8182e538e4df2184afeb16f2 GIT binary patch literal 6929 zcmcH;33O9c_NS!b7h33+kV1hHY^YFZn=Z60_Wz{KMw+&vWJ4&~nxrIYNRy@o3q2gf z4yb@DBZ}w&WSkk7;fOGd_A4mpjDj#5qYk2iMHaDx2#PxQ{hu^VlQM7y{(;>0?!E7B z?=J70){&YbE}Pqx$X%DH;?{AobPS^sE?2UinYV1g30*RAVe+=)oP+}6%hLTjn|F^R zp8XAe?(E%{D);vpwhnKsBo@4CdiE*2oEWQHmOCj^MDWkAb+7-LAU5SMNqGHTf^cPB zuUwT!5|26F`@(seB#dYNw)*-$l9+w;+Q}|CNnB6HP~AU>W- z68D|LsAF)>O<-LgPA7?mH`vs|Vv@*^-*q;iB#Cto51%)G3Q277He{ULNfO${Pp)0N z3;5MD&usCN#N(fF#%~%gA$o_We6RXSLR>jno?ELC6T`mz=Dovz78B%#zkGjbu7sE` z&TZlKNr;-)S(fHQbBKyhazA;*F^M?w?uh#kEz-dD190&!&1JGDRcjV6wM=gv$m8$qnTxM}}K8@U7*<3c?1A*88X4wsY7 z$HSWVcsPbUEH4fg!!Y4v3GiFT;*2D6lB-PF^5^qaU(BhytER!WNWas4?^m5m@*cK2 zw+gRwvbiI&N-<7q7C*sf;wJFwcxO1g%N*e!5{3A!D9y@;G{CqC82qFHz_7S!p}RNx z@hxWvs;B2b&MS{pP*XZ~f4%+GcxsJm`=OtYvnj3on&Pz$0xEE>Y}j+(q*IH2TzBsa zThgeN{;%J+zc+~*Uv95*ZWu#p%W^OxgG&`!M*O{eHkVR;pgmdtXa@Dc6N)9TrE{qd z_y236`Hz{DXkjfj{6Z$x`^B-7y%#g7#okYUdFWUsRdncwi3g5nQiordzvSB1OiHZ^ z{Ic=mbZXhT!=k;8H0tR6=f}w8>D2FcWnWy`n?^N!Q}mW)(P--3dd12=aZ;#pDa}Vt z1h4gE3`?6et>N3Ad2_xjv3ieI9@(!DeP{2hbgi9Sns;F&v0%)*7x)vtt=xO$LFY`uf67|7=2DNk((b6SatPhJnUxdVANA~gp_{Yr{QjP4Q%@K4bwAKE>QMJeRefL2 zRE@*1S$3u8x9p0~XU|HGlNMnL9}mYCr9~|ER?_0d(UG(mX0=%cA%MN8Q+v9b% z)3$D8@iw!Bo7057so6t_iE9`1v8%%(eJp>a;emUQY18dW2@6kt{;h450A30lnhRjb zFNYNX&Q3Zj1u&&SUj^W!Rm0{1_;lQEB@4&THpmiF@uu#}i8#Kaaz`T0wk}BGvtKSr z!r513NjP>e1!td{o0Q7d-UBGh)Fd3=*DXyX@WZRi;LHixqi|-k0MMrDM-o$$|D!&r zVs#~JieFZP_;K>FAbyu`cOss=Tg0zn@$rkZ|C*SS>^m-&vr?1aAQ}l^o3K=$coknU zxfLW{{&;aN#%4bS64Sa;67z8Fn%>;JIhXN$w4)~-#g)k6*!Ba0)ri8!k^5+cE<6?s zgqvHIV&xB^HSTrd+~jbYv6GR>aZ-%VFn&1C2By}Y zaoOC2P-=11Ek3KwM_bs=>tYH2Jyy79EXu8c=i?d?mFvg|^u;j*XDBGb;T6Dv2K{q< zBwWL)oQyS0s$a?$QrUE>htCd>^1saryC2W2Fm1>ZT$>Xyi>;0>!L1X87bo0m34YFw zl;9;dJScw7`*u8mVT4dym`MU$A3Woi0RF-O@fCnwqLJscRj}_)dg%nfu9Nl|=Kx~G zQx0O)fPb-b@hE`AI}MGa0d`H>kdTG&AIx7*!we&PEbOu1zTr?Jdo4*&!CfDkMnLUQ z2m7G7Fy-`-P-!Jki~b^EY!@Se zX{UlJ7VhFb!L}g1haX!(WrHYV>^1H|5H zmaIVg-QDLmpncnx?^$>up}YLpuhF)7gg9@{ zgiH*}%I9;uCXdzB%D;lI5YR>0XlZc;+@4m!tek~{=z0|_%FhunWTt>^G}j7&$K`spzon6o%on9{J?2H%_ZH2&RLg%Bb2;l}<(d23|`O8XTtr$QUU{yc@D)Yir@z+ zL$;SA{^)$^N8d;pH1t?aA!Hmi@iOA(LWdqNv6{lz+p20%WyU0YEdBphPZyleX7acA z=%REv%|4T_1J$Ir!{qa|1d;D;Heai=#e=X#WiyIq6bhhb1R>=Eh@u&VGl~VB7(bcR zXliw{m{HE(n^u6n5>iHzWIg;x-#n5mA;or5N|JU`VyAIpaTGm>kk$S4c|lmCJ$$qj znyw(pAPMIH(n0`+nGCQJ2MxN?r3QTsNy-gyRyS?d&= z6|RaFlU}^sAd(j9MGTL^RoJSKD4Wfq_9C;cw8;drjCzO5(xE6*Nc|wI)M3^wK<5Ki zU3;raLOP9Zm&+{EG@0E}zi~eB$x4gNvLKQZvPIowmAis8O`Np zR-&wE(ni_79P#UlTb=!ONt7Li`3hv0+oaRB8l@EhgRaPh_!J(cgVyH}JIrp4!%|dH zXw!=sn%mfFh8U@v>z&A7vq$T<$Xx?{a+_rp4r`SmuFb&TYLIb|ObT}=T|O{AMt6IgL0>OYOK4x<9*TlC?G5bJX$uWHjl(Ld?!ULD`9T!Ri2v@= zph<5Z%ASBl<|-L7J|3;t?74wG(R9BN?hWT-?s?Ep zV6AMkcF;8~GU}DfO}dHz-J6=LMXKiNKn?Ub)PExJG`cJvwbv{vp=H!KOM2^$$Y4;KP3;X9z zvI+?Uq5X&IYiJ>H{gEpjh@B)2q4RVqz@E8nlAJpiu%au5Vo>WRjsOOFG@wE{)jB~SK9;}XiKF^d+dNQ4J4s0zN}V}Z-9wyTw`KL zl{eT&*hkn0*eBcHJ97*MV?bSP*J|N-X3l)y%$YeuE=i|!uz&bRg(*B3n*-a^4}M?{ zzJIn?*bDV!@ttLx%)vg0_Wi(_K6`rp=g+_V#4w3$&#{<;Jy1Sqk3!21pFIKf!&Lp< znG>$<*l*{(JiZ<{!PwNJYsa_j6^tj$;iJPBg$IG|Tl%>0_rotNwYWA%?!@!Q?AxPX z+3aTaw--ljqE5Wo)ancC>z)nv{={{`qUYGIb)A(ZDA&;}Ktp!oJLbr;S+MpeUNB1) zBV%Zr5z0L~(k(sG!Tbwr`(W<)ecb!$WBuhD_WpiJCd+~ib=;p}F_*2e=(!_3dR5M| zSW&Fd!0_&00GoMeEF51RX)gT6P*@*%tc7$9Y~uJE!&=IBMdes|phUI?vjrs;?*d6m zIuEpzuYn==H)x-&msGq93_2x`G#74w*;El>raq;++y=MxKNT+il zzfJDQ!e2k0;8P|dA><1FEKIK2Cg}iUP<)`d{qX9{0a9@EO|OU7l@;Iu4{;$p5fIa z?E~0}m69RG=x5HQ-Lo2QOm#xDWOy$4@3@i^_rYJuQ22^++Nqc#`X!FRH9u=|yO`)p zt|itXp3XoTj{5ej=^9@9Ruh3QauS-|ZcU9Ygl4tZ&2Ya%d--(iq`ZXYO#L7sFHe`d zW367DsB+0gJIQYjD4x#`46idVOVy&S@QKdT#s=i5J-l)-Uc>K3X2V^L$uq?2z&cYm zat8ac(Ex6Z43`^CZc%@^{0YreXNq+=yZYT~el%}-t14GqTcEMPIh10%yMwRXEvj;7 zV2QQ#+O3^%X-?Lmqowi?|>w43w`zu z*wRluU36oUYY(h?HGm#=w{qKb@8MnaKjlqPgj$~lD%?4ye?#v) zxKCi+ytQVur#Ve&-sy66Lhq(wjXNLKCM~$*@cvomQ*%wf9T;4h`e-@j_pX}amEMV)Mn zr_mgsIU(wNKP?4#PfnYVckh_OQJ{yPCwBDG41$;R2hq{|2hUORQ|R#brwp_4AM^Y% K!(76jjQ;>LT4-_r literal 0 HcmV?d00001 diff --git a/orchestration/datasets/mini_dataset/_metadata b/orchestration/datasets/mini_dataset/_metadata new file mode 100644 index 0000000000000000000000000000000000000000..8ef1ea397d17230f59e1a64681c82cfe9a351ee5 GIT binary patch literal 4574 zcmcIoO>7%g5ZwY-o-dAghFs2} zRc5T<_%$@^9D4twp`RbV^7u6eb*_(;8aZztv;!jwZo}}PsDA|i__;PXCaVMehi5#y z2>;1o@cxy{#{iyv!}ZR)4`%Nnyj6M-_**Aju!1IcvO)y^?L&^8L!O=Eo}Y$Y(||Py zOz?8HMhC(A!`a4ym2Wb=f5h|o3!a^m!1U3TZxbfYXO*vKaD+^)(u1JyzUZO-Q8W|?l;emx=poN^fIx! z>DGBo27A4ts$h{-WK~-0mYFDP<+_1}?uxF61xa>;wO-MTdP`9d)@8Aba!oGt5?|)Q z{H9yh!CcXEIQROs@P$k6?d^6j-BGxUJkIZmq9N;vBC8ho^7NE7iVj6b7)WpXJVeu= z=7xjAiqsH3WhgwxtyoQI4_HOf`_gL5_hY31G5K8OS~V5*&D1tZFvtE#(oLy zyZ1{=ydM}G309nYD%9fR8M7EHoR5;pW#Wt6=l9$D3YR76`{x% z>sXUUoi7#(Rph=Y7fVV(N2zam+&AuZL(RAw;2lc;xOaTgy-hif(aGczd)fSyaejGx z0LBF&j_~SP?HsLk( zK2?f@flo3YTQB6~dR!H>#3mPkcrm6>Je8Wyl$S#BU^dki`xe@>lO?4US14Z3ti~`d zjb~FOE*z|6Sf7e^F+H9qcsi38v}9iN1-){bF?AYO5+Fy3^%VvA3VNz6M%7MEnh*)* zrMXPsn8CgzL?O0lS1cPGyGZ<5@)wG?bYdh7YZu=O2G0nX8**89@ zFYl7c=ZW%?jHq?9E;xf@Ya3~QJ7+CjzdhW&2G%{(t$(6euIY>+o3UH#X`{WL_RdSQ zUaV(;nYX6C`wZBlp0a#It&09cUJ3^ds9|-_*!rD&cozLnc}qjB#k547W5ZryGKMt> zd%b34;LH*A4RYqec>?43rc@_2?dQ_*EuIZl$l0_$?_XG-uPnkDhv!c_pPVY{2}AHt z5g&`nRCQr}QOQED2w_u*Vw<47X77Mju!#L4giV4L@HW@%H_%a*FHTJ0>q?At8q!o+ ztqV+XYS`K@K&oJ?Ba@b|y*e>*2}JWD(zKx2;S1i(3$2(XJWI&?JYY@)$uPDI!w!6C zl5i2PWCSe_JS7b=$M{=#^Xo9&50CW2W{pAzUO;<7P3pmm5|j=+3_YT5z#Z(Xm`^Yv ze9)31mREc6+xbKzGk%7go$wOD`LH1HY%oi-=Fge=B3?W41wEW*uouv^%|?E5kQ6|N zc)*WH&!IDL+M2YoNSyTL6OHH5oTf94`D81=ySdslJu^c&M;pIwTo4W80{I{sZG7;I Xnx8@=w_Oy~_mBGKI7KbPFFSt&P*pR_ literal 0 HcmV?d00001 diff --git a/orchestration/datasets/DatasetA/catalog_info.json b/orchestration/datasets/mini_dataset/catalog_info.json similarity index 63% rename from orchestration/datasets/DatasetA/catalog_info.json rename to orchestration/datasets/mini_dataset/catalog_info.json index 6debd90..c5fdc3c 100644 --- a/orchestration/datasets/DatasetA/catalog_info.json +++ b/orchestration/datasets/mini_dataset/catalog_info.json @@ -1,7 +1,7 @@ { - "catalog_name": "DatasetA", + "catalog_name": "mini_dataset", "catalog_type": "object", - "total_rows": 100, + "total_rows": 40, "epoch": "J2000", "ra_column": "ra", "dec_column": "dec" diff --git a/orchestration/datasets/mini_dataset/partition_info.csv b/orchestration/datasets/mini_dataset/partition_info.csv new file mode 100644 index 0000000..27dbb88 --- /dev/null +++ b/orchestration/datasets/mini_dataset/partition_info.csv @@ -0,0 +1,2 @@ +Norder,Npix,Dir +0,4,0 diff --git a/orchestration/datasets/DatasetB/point_map.fits b/orchestration/datasets/mini_dataset/point_map.fits similarity index 99% rename from orchestration/datasets/DatasetB/point_map.fits rename to orchestration/datasets/mini_dataset/point_map.fits index 0acdd0bcf478a6eccd450f723133bfc6ea346a69..b0edd31800acb0cc5b5234642ccab7614d001f48 100644 GIT binary patch delta 117 zcmX?bAmPA)gblm-*!*0BLtI^&5AwAi d31W;u%ml>DK+FQftU$~L#O!#aw*#d$4FCqKC>a0% delta 332 zcmX?bAmPA)gblm-*n&L$+!Yj>5AwAi#Y=S0OfC)r$uulh)9w6cem;edB5S;G6gF|CGyCuf~u;vQJ?LdJD zMxf>lHjuIm5U~NIHiHSoGypMA$Td$8Yo8#-2*gZ4%nZaVK+L**f*9M3!w@AvBP8VE z%#IoebMXW*4iIYxCx|>C0U`sqz-+Jx2iO@M;*6}*AME6i2O1K=4U(OK@YM Union[LocalCluster, SLURMCluster]: + """returns the configuration of where the pipeline will be run + + Args: + executor_key (str): executor key + config_file (str): config path + + Returns: + Union[LocalCluster, SLURMCluster]: Executor object + """ + + logger = logging.getLogger() + logger.info("Getting executor config: %s", executor_key) + + configs = load_yml(config_file) + + try: + config = configs["executor"][executor_key] + except KeyError: + logger.warning("The executor key not found. Using minimal local config.") + executor_key = "minimal" + + match executor_key: + case "local": + cluster = LocalCluster(**config) + case "linea-slurm": + icfg = config["instance"] + cluster = SLURMCluster(**icfg) + cluster.adapt(**config["adapt"]) + case _: + cluster = LocalCluster( + n_workers=1, + threads_per_worker=1, + ) + + return cluster diff --git a/orchestration/pipelines.old/cross_lsdb_dev/packages/utils.py b/orchestration/pipelines.old/cross_lsdb_dev/packages/utils.py new file mode 100755 index 0000000..d23b553 --- /dev/null +++ b/orchestration/pipelines.old/cross_lsdb_dev/packages/utils.py @@ -0,0 +1,59 @@ +"""_summary_ """ + +import logging +import os +import pathlib +from typing import Any + +import yaml + + +def setup_logger(name="pipeline-logger"): + """ + Configures the logger for recording events and messages. + + Returns: + logging.Logger: Configured logger instance. + """ + + logger = logging.getLogger(name) + logger.setLevel(logging.DEBUG) + + formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") + + logdir = os.getenv("LOG_DIR", ".") + + file_handler = logging.FileHandler(pathlib.Path(logdir, f"{name}.log")) + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(formatter) + + logger.addHandler(file_handler) + + return logger + + +def load_yml(filepath: str) -> Any: + """Load yaml file + + Args: + filepath (str): filepath + + Returns: + Any: yaml file content + """ + with open(filepath, encoding="utf-8") as _file: + content = yaml.safe_load(_file) + + return content + + +def dump_yml(filepath, content, encoding="utf-8"): + """ Dump yaml file + + Args: + filepath (str): filepath output + content (dict): yaml content + """ + + with open(filepath, 'w', encoding=encoding) as _file: + yaml.dump(content, _file) \ No newline at end of file diff --git a/orchestration/pipelines.old/cross_lsdb_dev/run.sh b/orchestration/pipelines.old/cross_lsdb_dev/run.sh new file mode 100755 index 0000000..093ce76 --- /dev/null +++ b/orchestration/pipelines.old/cross_lsdb_dev/run.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# Check if the argument was given +if [ $# -eq 0 ]; then + echo "Error: No arguments provided." + exit 1 +fi + +ARGS=$@ +shift $# + +if [ ! -d "$DASK_EXECUTOR_KEY" ]; then + export DASK_EXECUTOR_KEY=local +fi + +if [ ! -d "$PIPELINES_DIR" ]; then + echo "Error: PIPELINES_DIR not defined." + exit 1 +fi + +INSTALL_PIPE="$PIPELINES_DIR/cross_lsdb_dev/install.sh" + +if [ ! -f "$INSTALL_PIPE" ]; then + echo "Error: Installation script not found." + exit 1 +fi + +# Installing pipeline +echo "Installing pipeline..." +. "$INSTALL_PIPE" + +set -xe + +# Run the Python code with the given argument +# run-crossmatch $ARGS || { echo "Failed to run-crossmatch"; exit 1; } +run-crossmatch $ARGS + +echo $? >> return.code + +echo "Done." \ No newline at end of file diff --git a/orchestration/pipelines.old/cross_lsdb_dev/scripts/run-crossmatch b/orchestration/pipelines.old/cross_lsdb_dev/scripts/run-crossmatch new file mode 100755 index 0000000..43fcf19 --- /dev/null +++ b/orchestration/pipelines.old/cross_lsdb_dev/scripts/run-crossmatch @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 + +import argparse +import os +import time +from pathlib import Path + +import lsdb +from dask.distributed import Client +from executor import get_executor_config +from utils import dump_yml, load_yml, setup_logger + + +def run(config_file): + """Run lsdb crossmatch + + Args: + config_file (str): lsdb parameters + """ + + logger = setup_logger(name="cross-lsdb") + + start_time_full = time.time() + + # Loading configurations + pipe_config = load_yml(config_file) + param = pipe_config.get("inputs") + logger.info("Parameters: %s", param) + + + executor_key = os.getenv("DASK_EXECUTOR_KEY", "local") + cluster = get_executor_config(executor_key, config_file) + + with Client(cluster): + phot_dp0 = lsdb.read_hipscat(param.get("photo")) + spec_dp0 = lsdb.read_hipscat(param.get("specz")) + + cross = spec_dp0.crossmatch(phot_dp0) + data = cross.compute() + + os.makedirs(pipe_config.get("output_dir"), exist_ok=True) + outputfile = Path(pipe_config.get("output_dir"), "cross-output.parquet") + data.to_parquet(outputfile) + + register_outputs(outputfile) + + logger.info("--> Object Count: \n%s", str(data.count())) + + cluster.close() + + logger.info("Time elapsed: %s", str(time.time() - start_time_full)) + + +def register_outputs(filepath, role='main'): + """ Register outputs in process.yml + + Args: + filepath (str): output path + role (str, optional): role name. Defaults to 'main'. + """ + + outpath = str(Path(filepath).resolve()) + proc_yaml_file = str(Path('./process.yml').resolve()) + process_info = load_yml(proc_yaml_file) + process_info['outputs'] = [{"path": outpath, "role": role}] + dump_yml(proc_yaml_file, process_info) + + + + + + + + +if __name__ == "__main__": + # Create the parser and add arguments + parser = argparse.ArgumentParser() + parser.add_argument(dest="config_path", help="yaml config path") + + args = parser.parse_args() + config_path = args.config_path + + # Run pipeline + run(config_path) diff --git a/orchestration/pipelines.old/load_pipelines.sh b/orchestration/pipelines.old/load_pipelines.sh new file mode 100755 index 0000000..1749f37 --- /dev/null +++ b/orchestration/pipelines.old/load_pipelines.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +cat << EOF > ${PIPELINES_DIR}/pipelines.yaml +cross_lsdb_dev: + display_name: 'LSDB Crossmatch (dev)' + path: '${PIPELINES_DIR}/cross_lsdb_dev' + executor: 'local' # only to orchestration + runner: 'bash' + executable: 'run.sh' + schema_config: '${PIPELINES_DIR}/cross_lsdb_dev/config.py' + version: '0.0.1' +EOF \ No newline at end of file diff --git a/orchestration/pipelines.old/pipelines.yaml b/orchestration/pipelines.old/pipelines.yaml new file mode 100644 index 0000000..18c911c --- /dev/null +++ b/orchestration/pipelines.old/pipelines.yaml @@ -0,0 +1,8 @@ +cross_lsdb_dev: + display_name: 'LSDB Crossmatch (dev)' + path: '/pipelines/cross_lsdb_dev' + executor: 'local' # only to orchestration + runner: 'bash' + executable: 'run.sh' + schema_config: '/pipelines/cross_lsdb_dev/config.py' + version: '0.0.1' diff --git a/orchestration/pipelines.old/pipelines.yaml.template b/orchestration/pipelines.old/pipelines.yaml.template new file mode 100644 index 0000000..18c911c --- /dev/null +++ b/orchestration/pipelines.old/pipelines.yaml.template @@ -0,0 +1,8 @@ +cross_lsdb_dev: + display_name: 'LSDB Crossmatch (dev)' + path: '/pipelines/cross_lsdb_dev' + executor: 'local' # only to orchestration + runner: 'bash' + executable: 'run.sh' + schema_config: '/pipelines/cross_lsdb_dev/config.py' + version: '0.0.1' From b497ea47e0f44345f7c5105144316c6bdebab1ba Mon Sep 17 00:00:00 2001 From: Cristiano Singulani Date: Wed, 31 Jul 2024 19:16:40 -0300 Subject: [PATCH 14/20] Removed unnecessary dir. --- .gitignore | 2 +- .../pipelines.old/cross_lsdb_dev/VERSION | 1 - .../pipelines.old/cross_lsdb_dev/config.py | 53 ------------ .../pipelines.old/cross_lsdb_dev/config.yml | 19 ----- .../cross_lsdb_dev/environment.yml | 14 ---- .../pipelines.old/cross_lsdb_dev/install.sh | 34 -------- .../cross_lsdb_dev/packages/__init__.py | 0 .../cross_lsdb_dev/packages/executor.py | 47 ----------- .../cross_lsdb_dev/packages/utils.py | 59 ------------- .../pipelines.old/cross_lsdb_dev/run.sh | 40 --------- .../cross_lsdb_dev/scripts/run-crossmatch | 84 ------------------- orchestration/pipelines.old/load_pipelines.sh | 12 --- orchestration/pipelines.old/pipelines.yaml | 8 -- .../pipelines.old/pipelines.yaml.template | 8 -- 14 files changed, 1 insertion(+), 380 deletions(-) delete mode 100644 orchestration/pipelines.old/cross_lsdb_dev/VERSION delete mode 100644 orchestration/pipelines.old/cross_lsdb_dev/config.py delete mode 100644 orchestration/pipelines.old/cross_lsdb_dev/config.yml delete mode 100644 orchestration/pipelines.old/cross_lsdb_dev/environment.yml delete mode 100755 orchestration/pipelines.old/cross_lsdb_dev/install.sh delete mode 100644 orchestration/pipelines.old/cross_lsdb_dev/packages/__init__.py delete mode 100755 orchestration/pipelines.old/cross_lsdb_dev/packages/executor.py delete mode 100755 orchestration/pipelines.old/cross_lsdb_dev/packages/utils.py delete mode 100755 orchestration/pipelines.old/cross_lsdb_dev/run.sh delete mode 100755 orchestration/pipelines.old/cross_lsdb_dev/scripts/run-crossmatch delete mode 100755 orchestration/pipelines.old/load_pipelines.sh delete mode 100644 orchestration/pipelines.old/pipelines.yaml delete mode 100644 orchestration/pipelines.old/pipelines.yaml.template diff --git a/.gitignore b/.gitignore index cbf151c..bf6e661 100644 --- a/.gitignore +++ b/.gitignore @@ -32,4 +32,4 @@ saml2 *.pyc __pycache__ - +_backup diff --git a/orchestration/pipelines.old/cross_lsdb_dev/VERSION b/orchestration/pipelines.old/cross_lsdb_dev/VERSION deleted file mode 100644 index 8a9ecc2..0000000 --- a/orchestration/pipelines.old/cross_lsdb_dev/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.0.1 \ No newline at end of file diff --git a/orchestration/pipelines.old/cross_lsdb_dev/config.py b/orchestration/pipelines.old/cross_lsdb_dev/config.py deleted file mode 100644 index 5b963ef..0000000 --- a/orchestration/pipelines.old/cross_lsdb_dev/config.py +++ /dev/null @@ -1,53 +0,0 @@ -from pydantic import BaseModel -import os - -DATASETS_DIR = os.getenv("DATASETS_DIR", "/datasets") - - -class Instance(BaseModel): - processes: int = 1 - memory: str = "123GiB" - queue: str = "cpu" - job_extra_directives: list[str] = ["--propagate", "--time=2:00:00"] - - -class Adapt(BaseModel): - maximum_jobs: int = 10 - - -class LIneASlurm(BaseModel): - instance: Instance = Instance() - adapt: Adapt = Adapt() - - -class Local(BaseModel): - n_workers: int = 2 - threads_per_worker: int = 2 - memory_limit: str = "1GiB" - - -class Inputs(BaseModel): - photo: str = f"{DATASETS_DIR}/DatasetA" - specz: str = f"{DATASETS_DIR}/DatasetB" - - -class Executor(BaseModel): - local: Local = Local() - linea_slurm: LIneASlurm = LIneASlurm() - - -class Config(BaseModel): - output_dir: str = "./output" - executor: Executor = Executor() - inputs: Inputs = Inputs() - - -if __name__ == "__main__": - import yaml - - cfg = Config() - - with open('config.yml', 'w') as outfile: - data_json = cfg.model_dump() - print(data_json) - yaml.dump(data_json, outfile) diff --git a/orchestration/pipelines.old/cross_lsdb_dev/config.yml b/orchestration/pipelines.old/cross_lsdb_dev/config.yml deleted file mode 100644 index 079bc21..0000000 --- a/orchestration/pipelines.old/cross_lsdb_dev/config.yml +++ /dev/null @@ -1,19 +0,0 @@ -executor: - linea_slurm: - adapt: - maximum_jobs: 10 - instance: - job_extra_directives: - - --propagate - - --time=2:00:00 - memory: 123GiB - processes: 1 - queue: cpu - local: - memory_limit: 1GiB - n_workers: 2 - threads_per_worker: 2 -inputs: - photo: /datasets/DatasetA - specz: /datasets/DatasetB -output_dir: ./output diff --git a/orchestration/pipelines.old/cross_lsdb_dev/environment.yml b/orchestration/pipelines.old/cross_lsdb_dev/environment.yml deleted file mode 100644 index 673503b..0000000 --- a/orchestration/pipelines.old/cross_lsdb_dev/environment.yml +++ /dev/null @@ -1,14 +0,0 @@ -name: pipe_cross_lsdb_dev -channels: - - defaults -dependencies: - - python=3.10 - - pip: - - PyYaml - - dask==2024.1.0 - - distributed==2024.1.0 - - dask-jobqueue==0.8.2 - - hipscat==0.2.1 - - hipscat-import==0.2.1 - - lsdb==0.1.0 - diff --git a/orchestration/pipelines.old/cross_lsdb_dev/install.sh b/orchestration/pipelines.old/cross_lsdb_dev/install.sh deleted file mode 100755 index 11d0265..0000000 --- a/orchestration/pipelines.old/cross_lsdb_dev/install.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash --login - -source `dirname $CONDA_EXE`/activate || { echo "Failed to activate Conda environment"; exit 1; } - -if [ ! -d "$PIPELINES_DIR" ]; then - echo "Error: PIPELINES_DIR not defined." - exit 1 -fi - -PIPE_BASE="$PIPELINES_DIR/cross_lsdb_dev" -HASENV=`conda env list | grep 'pipe_cross_lsdb_dev '` - -if [ -z "$HASENV" ]; then - echo "Create virtual environment..." - conda env create -f $PIPE_BASE/environment.yml - echo "Virtual environment created and packages installed." -else - if [ "$CONDA_FORCE_UPDATE" == "yes" ]; then - echo "Virtual environment already exists. Updating..." - conda env update --file $PIPE_BASE/environment.yml --prune - fi -fi - -conda activate pipe_cross_lsdb_dev - -export PATH=$PATH:"$PIPE_BASE/scripts/" - -if [ -z "$PYTHONPATH" ]; then - export PYTHONPATH="$PIPE_BASE/packages/" -else - export PYTHONPATH=$PYTHONPATH:"$PIPE_BASE/packages/" -fi - -echo "Conda Environment: $CONDA_DEFAULT_ENV" diff --git a/orchestration/pipelines.old/cross_lsdb_dev/packages/__init__.py b/orchestration/pipelines.old/cross_lsdb_dev/packages/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/orchestration/pipelines.old/cross_lsdb_dev/packages/executor.py b/orchestration/pipelines.old/cross_lsdb_dev/packages/executor.py deleted file mode 100755 index fcb6746..0000000 --- a/orchestration/pipelines.old/cross_lsdb_dev/packages/executor.py +++ /dev/null @@ -1,47 +0,0 @@ -"""_summary_ """ - -from dask.distributed import LocalCluster -from dask_jobqueue import SLURMCluster -from utils import load_yml -import logging -from typing import Union - - -def get_executor_config( - executor_key: str, config_file: str -) -> Union[LocalCluster, SLURMCluster]: - """returns the configuration of where the pipeline will be run - - Args: - executor_key (str): executor key - config_file (str): config path - - Returns: - Union[LocalCluster, SLURMCluster]: Executor object - """ - - logger = logging.getLogger() - logger.info("Getting executor config: %s", executor_key) - - configs = load_yml(config_file) - - try: - config = configs["executor"][executor_key] - except KeyError: - logger.warning("The executor key not found. Using minimal local config.") - executor_key = "minimal" - - match executor_key: - case "local": - cluster = LocalCluster(**config) - case "linea-slurm": - icfg = config["instance"] - cluster = SLURMCluster(**icfg) - cluster.adapt(**config["adapt"]) - case _: - cluster = LocalCluster( - n_workers=1, - threads_per_worker=1, - ) - - return cluster diff --git a/orchestration/pipelines.old/cross_lsdb_dev/packages/utils.py b/orchestration/pipelines.old/cross_lsdb_dev/packages/utils.py deleted file mode 100755 index d23b553..0000000 --- a/orchestration/pipelines.old/cross_lsdb_dev/packages/utils.py +++ /dev/null @@ -1,59 +0,0 @@ -"""_summary_ """ - -import logging -import os -import pathlib -from typing import Any - -import yaml - - -def setup_logger(name="pipeline-logger"): - """ - Configures the logger for recording events and messages. - - Returns: - logging.Logger: Configured logger instance. - """ - - logger = logging.getLogger(name) - logger.setLevel(logging.DEBUG) - - formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") - - logdir = os.getenv("LOG_DIR", ".") - - file_handler = logging.FileHandler(pathlib.Path(logdir, f"{name}.log")) - file_handler.setLevel(logging.DEBUG) - file_handler.setFormatter(formatter) - - logger.addHandler(file_handler) - - return logger - - -def load_yml(filepath: str) -> Any: - """Load yaml file - - Args: - filepath (str): filepath - - Returns: - Any: yaml file content - """ - with open(filepath, encoding="utf-8") as _file: - content = yaml.safe_load(_file) - - return content - - -def dump_yml(filepath, content, encoding="utf-8"): - """ Dump yaml file - - Args: - filepath (str): filepath output - content (dict): yaml content - """ - - with open(filepath, 'w', encoding=encoding) as _file: - yaml.dump(content, _file) \ No newline at end of file diff --git a/orchestration/pipelines.old/cross_lsdb_dev/run.sh b/orchestration/pipelines.old/cross_lsdb_dev/run.sh deleted file mode 100755 index 093ce76..0000000 --- a/orchestration/pipelines.old/cross_lsdb_dev/run.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash - -# Check if the argument was given -if [ $# -eq 0 ]; then - echo "Error: No arguments provided." - exit 1 -fi - -ARGS=$@ -shift $# - -if [ ! -d "$DASK_EXECUTOR_KEY" ]; then - export DASK_EXECUTOR_KEY=local -fi - -if [ ! -d "$PIPELINES_DIR" ]; then - echo "Error: PIPELINES_DIR not defined." - exit 1 -fi - -INSTALL_PIPE="$PIPELINES_DIR/cross_lsdb_dev/install.sh" - -if [ ! -f "$INSTALL_PIPE" ]; then - echo "Error: Installation script not found." - exit 1 -fi - -# Installing pipeline -echo "Installing pipeline..." -. "$INSTALL_PIPE" - -set -xe - -# Run the Python code with the given argument -# run-crossmatch $ARGS || { echo "Failed to run-crossmatch"; exit 1; } -run-crossmatch $ARGS - -echo $? >> return.code - -echo "Done." \ No newline at end of file diff --git a/orchestration/pipelines.old/cross_lsdb_dev/scripts/run-crossmatch b/orchestration/pipelines.old/cross_lsdb_dev/scripts/run-crossmatch deleted file mode 100755 index 43fcf19..0000000 --- a/orchestration/pipelines.old/cross_lsdb_dev/scripts/run-crossmatch +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import os -import time -from pathlib import Path - -import lsdb -from dask.distributed import Client -from executor import get_executor_config -from utils import dump_yml, load_yml, setup_logger - - -def run(config_file): - """Run lsdb crossmatch - - Args: - config_file (str): lsdb parameters - """ - - logger = setup_logger(name="cross-lsdb") - - start_time_full = time.time() - - # Loading configurations - pipe_config = load_yml(config_file) - param = pipe_config.get("inputs") - logger.info("Parameters: %s", param) - - - executor_key = os.getenv("DASK_EXECUTOR_KEY", "local") - cluster = get_executor_config(executor_key, config_file) - - with Client(cluster): - phot_dp0 = lsdb.read_hipscat(param.get("photo")) - spec_dp0 = lsdb.read_hipscat(param.get("specz")) - - cross = spec_dp0.crossmatch(phot_dp0) - data = cross.compute() - - os.makedirs(pipe_config.get("output_dir"), exist_ok=True) - outputfile = Path(pipe_config.get("output_dir"), "cross-output.parquet") - data.to_parquet(outputfile) - - register_outputs(outputfile) - - logger.info("--> Object Count: \n%s", str(data.count())) - - cluster.close() - - logger.info("Time elapsed: %s", str(time.time() - start_time_full)) - - -def register_outputs(filepath, role='main'): - """ Register outputs in process.yml - - Args: - filepath (str): output path - role (str, optional): role name. Defaults to 'main'. - """ - - outpath = str(Path(filepath).resolve()) - proc_yaml_file = str(Path('./process.yml').resolve()) - process_info = load_yml(proc_yaml_file) - process_info['outputs'] = [{"path": outpath, "role": role}] - dump_yml(proc_yaml_file, process_info) - - - - - - - - -if __name__ == "__main__": - # Create the parser and add arguments - parser = argparse.ArgumentParser() - parser.add_argument(dest="config_path", help="yaml config path") - - args = parser.parse_args() - config_path = args.config_path - - # Run pipeline - run(config_path) diff --git a/orchestration/pipelines.old/load_pipelines.sh b/orchestration/pipelines.old/load_pipelines.sh deleted file mode 100755 index 1749f37..0000000 --- a/orchestration/pipelines.old/load_pipelines.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -cat << EOF > ${PIPELINES_DIR}/pipelines.yaml -cross_lsdb_dev: - display_name: 'LSDB Crossmatch (dev)' - path: '${PIPELINES_DIR}/cross_lsdb_dev' - executor: 'local' # only to orchestration - runner: 'bash' - executable: 'run.sh' - schema_config: '${PIPELINES_DIR}/cross_lsdb_dev/config.py' - version: '0.0.1' -EOF \ No newline at end of file diff --git a/orchestration/pipelines.old/pipelines.yaml b/orchestration/pipelines.old/pipelines.yaml deleted file mode 100644 index 18c911c..0000000 --- a/orchestration/pipelines.old/pipelines.yaml +++ /dev/null @@ -1,8 +0,0 @@ -cross_lsdb_dev: - display_name: 'LSDB Crossmatch (dev)' - path: '/pipelines/cross_lsdb_dev' - executor: 'local' # only to orchestration - runner: 'bash' - executable: 'run.sh' - schema_config: '/pipelines/cross_lsdb_dev/config.py' - version: '0.0.1' diff --git a/orchestration/pipelines.old/pipelines.yaml.template b/orchestration/pipelines.old/pipelines.yaml.template deleted file mode 100644 index 18c911c..0000000 --- a/orchestration/pipelines.old/pipelines.yaml.template +++ /dev/null @@ -1,8 +0,0 @@ -cross_lsdb_dev: - display_name: 'LSDB Crossmatch (dev)' - path: '/pipelines/cross_lsdb_dev' - executor: 'local' # only to orchestration - runner: 'bash' - executable: 'run.sh' - schema_config: '/pipelines/cross_lsdb_dev/config.py' - version: '0.0.1' From e6f1a8a5aae28c00153f88732eb696b0f5896609 Mon Sep 17 00:00:00 2001 From: Cristiano Singulani Date: Fri, 2 Aug 2024 18:13:35 +0000 Subject: [PATCH 15/20] Fixed small error in training set maker Updated documentation --- .gitmodules | 3 ++- README.md | 32 +++++++++++++++++-------- backend/core/fixtures/initial_data.yaml | 10 ++++---- orchestration/pipelines | 2 +- 4 files changed, 30 insertions(+), 17 deletions(-) diff --git a/.gitmodules b/.gitmodules index f7fe3f4..aca4cd2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,4 @@ [submodule "orchestration/pipelines"] path = orchestration/pipelines - url = https://github.com/linea-it/pzserver_pipelines + url = https://github.com/linea-it/pzserver_pipelines + branch = training_set_maker diff --git a/README.md b/README.md index ec2b293..71f3eef 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,6 @@ Clone the repository and access the directory: ```bash git clone https://github.com/linea-it/pzserver_app.git cd pzserver_app -mkdir -p archive/data archive/log/backend ``` Copy the file `docker-compose-development.yml` and rename to `docker-compose.yml` @@ -98,6 +97,11 @@ In the development environment it is not necessary to change Ngnix settings. But if a local change is needed, copy the `nginx_development.conf` file to `nginx.conf` Also change the `docker-compose.yml` file in the ngnix service at the line `- ./nginx_development.conf:/etc/nginx/conf.d/default.conf:ro`. In this way, the ngnix.conf file represents your local environment, if you make any modifications that are necessary for the project, copy this modification to the template file, as the nginx.conf file is not part of the repository. +Finally, to start the whole application: + +``` bash +docker-compose up +``` ### Setting Up a New Application to manage authentication @@ -109,8 +113,8 @@ Go to Django ADMIN (for local installation, open a web browser and go to the URL - `client_type` should be set to confidential - `authorization_grant_type` should be set to **'Resource owner password-based'** - `name` can be set to whatever you'd like -- checkbox for `Skip authorization` should remain unchecked -- `Algorithm`: keep the default option (No OIDC support) +- checkbox for `Skip authorization` should remain unchecked +- `Algorithm`: keep the default option (No OIDC support) > **WARNING**: As mentioned above, **BEFORE** clicking on the SAVE button, you must edit the configuration files (**.env** and **.env.local**) and change the variables `DJANGO_OAUTH_CLIENT_ID` and `DJANGO_OAUTH_CLIENT_SECRET` in both files according to the values of `client_id` and `client_secret` respectively. only after editing the configuration files, the `SAVE` button must be pressed. @@ -129,6 +133,14 @@ mkdir orchestration/db orchestration/logs orchestration/processes ``` bash cp docker-compose-development-orch.yml docker-compose.yml +docker network create orchestration-network # create internal network +``` + +Loading pipelines (submodules): + +``` bash +git submodule init +git submodule update ``` Enter the orchestration-api container: @@ -142,7 +154,13 @@ python manage.py migrate python manage.py createsuperuser ``` -Start orchestration services: +Still inside the container, installing pipelines (confirm default directories with 'yes'): +``` bash +cd /pipelines +./setup.sh +``` + +Exit the container and start orchestration services: ``` bash docker-compose up orchestrator ``` @@ -154,12 +172,6 @@ All that remains is to modify the ORCHEST_URL variable in the `.env` with the va ORCHEST_URL=http://orchestrator ``` -Finally, to start the whole application: - -``` bash -docker-compose up -``` - ### Some example commands Turn on background environment (if you have the application already running on the terminal, stop it with `CTRL + C` keys and up ir again, but in the background using `-d` argument): diff --git a/backend/core/fixtures/initial_data.yaml b/backend/core/fixtures/initial_data.yaml index 514fa09..369ba85 100644 --- a/backend/core/fixtures/initial_data.yaml +++ b/backend/core/fixtures/initial_data.yaml @@ -40,11 +40,11 @@ - model: core.pipeline pk: 1 fields: - name: cross_lsdb_dev - display_name: Cross LSDB (dev) + name: training_set_maker + display_name: Training Set Maker version: 0.0.1 - description: Test pipeline + description: Training Set Maker pipeline created_at: 2022-05-18 15:36:59.830913+00:00 - system_config: {'executor': {'linea_slurm': {'adapt': {'maximum_jobs': 10}, 'instance': {'job_extra_directives': ['--propagate', '--time=2:00:00'], 'memory': '123GiB', 'processes': 1, 'queue': 'cpu'}}, 'local': {'memory_limit': '1GiB', 'n_workers': 2, 'threads_per_worker': 2}}, 'inputs': {'photo': '/datasets/DatasetA', 'specz': '/datasets/DatasetB'}, 'output_dir': './output'} - product_types_accepted: [1,2,4] + system_config: {"executor": {"local": {"n_workers": 2, "threads_per_worker": 2, "memory_limit": "1GiB"}, "linea-slurm": {"instance": {"cores": 54, "processes": 1, "memory": "123GiB", "queue": "cpu", "job_extra_directives": ["--propagate", "--time=2:00:00"]}, "adapt": {"maximum_jobs": 10}}}, "inputs": {"dataset": {"path": "/datasets/mini_dataset"}, "specz": [{"path": "/datasets/specz.parquet", "columns": {"ra": "ra", "dec": "dec"}}]}, "output_dir": "outputs", "param": {"suffixes": ["_specz", "_dataset"], "output_catalog_name": "tsm_cross_001", "radius_arcsec": 1, "n_neighbors": 1}} + product_types_accepted: [1] output_product_type: 2 \ No newline at end of file diff --git a/orchestration/pipelines b/orchestration/pipelines index bffeb8a..8008b43 160000 --- a/orchestration/pipelines +++ b/orchestration/pipelines @@ -1 +1 @@ -Subproject commit bffeb8a6fc06fc0b77fb98a5c4ce41a50bebc5aa +Subproject commit 8008b433375c050b267a3a419e89da858eab0479 From b8b6cc07d0c2d6c25c23167a0a435bc979545c12 Mon Sep 17 00:00:00 2001 From: Julia Gschwend Date: Tue, 13 Aug 2024 17:03:23 -0300 Subject: [PATCH 16/20] Update README.md replace `docker-compose` by `docker compose` --- README.md | 88 +++++++++++++++++++++++++++---------------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 71f3eef..57912b2 100644 --- a/README.md +++ b/README.md @@ -14,10 +14,10 @@ git clone https://github.com/linea-it/pzserver_app.git cd pzserver_app ``` -Copy the file `docker-compose-development.yml` and rename to `docker-compose.yml` +Copy the file `docker compose-development.yml` and rename to `docker compose.yml` ```bash -cp docker-compose-development.yml docker-compose.yml +cp docker compose-development.yml docker compose.yml ``` Create the environment variables file based on `env_template`. @@ -34,37 +34,37 @@ Check your linux user id with: ```bash echo $UID ``` -and update it in the `docker-compose.yml` file if necessary (if it is not the usual 1000). +and update it in the `docker compose.yml` file if necessary (if it is not the usual 1000). Now start the database service. It is important that the first time the database service is turned on alone, in this step postgresql will create the database and the user based on the settings `POSTGRES_USER`, `POSTGRES_PASSWORD` and `POSTGRES_DB`. ```bash -docker-compose up database +docker compose up database ``` -Wait for the message `database system is ready to accept connections` and then close the service with the `CTRL + C` keys or `docker-compose stop database` in another terminal. +Wait for the message `database system is ready to accept connections` and then close the service with the `CTRL + C` keys or `docker compose stop database` in another terminal. Now start the backend service. As this is the first time, the base image will be pulled and the container will be built, this may take a while. ```bash -docker-compose up backend +docker compose up backend ``` If everything goes normally the last message will be something like `... spawned uWSGI worker x (pid: xx, cores: x)` Shut down the backend service to change one of the Django variables. -To terminate use `CTRL + C` or `docker-compose stop`. +To terminate use `CTRL + C` or `docker compose stop`. With the services turned off, let's run a command in the backend container to generate a SECRET for Django. ```bash -docker-compose run backend python -c "import secrets; print(secrets.token_urlsafe())" +docker compose run backend python -c "import secrets; print(secrets.token_urlsafe())" ``` This is the output of the command: ```bash -$ docker-compose run backend python -c "import secrets; print(secrets.token_urlsafe())" +$ docker compose run backend python -c "import secrets; print(secrets.token_urlsafe())" Creating pzserver_backend_run ... done 6klbHhaeA6J2imKt9AVVgS5yl9mCWoiQqrfUV469DLA ``` @@ -74,13 +74,13 @@ Copy the generated key and replace the `SECRET` variable value in the `.env` fil Create the Django superuser. ```bash -docker-compose run backend python manage.py createsuperuser +docker compose run backend python manage.py createsuperuser ``` Import the application's initial data using the following command: ```bash -docker-compose run backend python manage.py loaddata initial_data +docker compose run backend python manage.py loaddata initial_data ``` This `loaddata` command will insert some basic records into the database for the application to work. these records are in the `core/fixtures/initial_data.yaml` file. @@ -88,19 +88,19 @@ This `loaddata` command will insert some basic records into the database for the Now install the Frontend dependencies by running the `yarn` command. As this is the first time starting this container, the base image will be pulled, which may take a while. ```bash -docker-compose run frontend yarn +docker compose run frontend yarn ``` This command will create the directory `pzserver/frontend/node_modules` if you have any problem with dependencies remove this directory and run the command again. In the development environment it is not necessary to change Ngnix settings. But if a local change is needed, copy the `nginx_development.conf` file to `nginx.conf` -Also change the `docker-compose.yml` file in the ngnix service at the line `- ./nginx_development.conf:/etc/nginx/conf.d/default.conf:ro`. In this way, the ngnix.conf file represents your local environment, if you make any modifications that are necessary for the project, copy this modification to the template file, as the nginx.conf file is not part of the repository. +Also change the `docker compose.yml` file in the ngnix service at the line `- ./nginx_development.conf:/etc/nginx/conf.d/default.conf:ro`. In this way, the ngnix.conf file represents your local environment, if you make any modifications that are necessary for the project, copy this modification to the template file, as the nginx.conf file is not part of the repository. Finally, to start the whole application: ``` bash -docker-compose up +docker compose up ``` ### Setting Up a New Application to manage authentication @@ -132,7 +132,7 @@ mkdir orchestration/db orchestration/logs orchestration/processes ``` ``` bash -cp docker-compose-development-orch.yml docker-compose.yml +cp docker compose-development-orch.yml docker compose.yml docker network create orchestration-network # create internal network ``` @@ -145,7 +145,7 @@ git submodule update Enter the orchestration-api container: ``` bash -docker-compose run orchestration-api bash +docker compose run orchestration-api bash ``` Inside the container, create the database and an admin user: @@ -162,7 +162,7 @@ cd /pipelines Exit the container and start orchestration services: ``` bash -docker-compose up orchestrator +docker compose up orchestrator ``` And then follow the steps to create an authentication application ([step by step](https://github.com/linea-it/orchestration/?tab=readme-ov-file#how-to-use-using-client-credential)) just by changing the url from http://localhost to http://localhost:8088, and using the admin user created previously. Note when creating an authentication application, we must change the `ORCHEST_CLIENT_ID` and `ORCHEST_CLIENT_SECRET` in the `.env` with the client_id and secret_id values ​​respectively. @@ -178,7 +178,7 @@ Turn on background environment (if you have the application already running on t ```bash -docker-compose up -d +docker compose up -d ``` Access in the browser: @@ -189,41 +189,41 @@ Access in the browser: Turn off all environment: ```bash -docker-compose stop +docker compose stop ``` Restart all environment: ```bash -docker-compose stop && docker-compose up -d +docker compose stop && docker compose up -d ``` Run a terminate on one of the services ```bash # with the service turned on -docker-compose exec backend bash +docker compose exec backend bash # with the service turned off -docker-compose run backend bash +docker compose run backend bash ``` Access database with psql: ```bash # Use the credentials that are in the .env -docker-compose exec database psql -h localhost -U -d +docker compose exec database psql -h localhost -U -d ``` Add libraries to frontend using yarn: ``` bash -docker-compose run frontend yarn add +docker compose run frontend yarn add ``` Check front-end changes before pushing new commits to the remote repository (it is recommended to build the frontend to prevent errors with ESlint from disrupting the Pull Request process): ``` bash -docker-compose run frontend yarn build +docker compose run frontend yarn build ``` ### Manual build of images and push to docker hub @@ -253,31 +253,31 @@ docker push linea/pzserver:frontend_ run all tests ```bash -docker-compose exec backend pytest +docker compose exec backend pytest ``` run all tests with coverage, Check local coverage in localhost/coverage ```bash -docker-compose exec backend pytest --cov=./ --cov-report=html +docker compose exec backend pytest --cov=./ --cov-report=html ``` run only a file ```bash -docker-compose exec backend pytest core/test/test_product_file.py +docker compose exec backend pytest core/test/test_product_file.py ``` run only a class ```bash -docker-compose exec backend pytest core/test/test_product_file.py::ProductFileListCreateAPIViewTestCase +docker compose exec backend pytest core/test/test_product_file.py::ProductFileListCreateAPIViewTestCase ``` run single test method ```bash -docker-compose exec backend pytest core/test/test_product_file.py::ProductFileListCreateAPIViewTestCase::test_list_product_file +docker compose exec backend pytest core/test/test_product_file.py::ProductFileListCreateAPIViewTestCase::test_list_product_file ``` ## Enable authentication via LIneA Satosa (Github) @@ -310,7 +310,7 @@ cp pz.key pzkey.pem cp pz.crt pzcert.pem ``` -Next we must uncomment the volume that represents the saml2 directory in docker-compose.yml: +Next we must uncomment the volume that represents the saml2 directory in docker compose.yml: ```yml - ./archive/log/backend:/archive/log @@ -347,7 +347,7 @@ The following example assumes an installation where the database and ngnix are i Only: - create the folders -- create `docker-compose.yml` file +- create `docker compose.yml` file - create `.env` file - create `ngnix.conf` file @@ -358,7 +358,7 @@ mkdir pzserver pzserver/archive pzserver/archive/data pzserver/archive/django_st cd pzserver ``` -Create a `docker-compose.yml` file based on the `docker-compose-production.yml` template +Create a `docker compose.yml` file based on the `docker compose-production.yml` template Change the frontend and backend images to the desired version, replace the string `` with the image tag. @@ -369,16 +369,16 @@ Usually the changes are in ngnix volumes and port. Create an `.env` file based on the `env_template` file and edit the database access variables. -Wait for the message `database system is ready to accept connections` and then close the service with the `CTRL + C` keys or `docker-compose stop database` in another terminal. +Wait for the message `database system is ready to accept connections` and then close the service with the `CTRL + C` keys or `docker compose stop database` in another terminal. ```bash -docker-compose up database +docker compose up database ``` Start the backend service and wait for the `Booting worker with pid...` message. ```bash -docker-compose up backend +docker compose up backend ``` Shutdown the backend service and change the Django variables. @@ -389,11 +389,11 @@ In production it is **MANDATORY** to turn off Debug `DEBUG=0`. and change the `S With the service turned off, run the command below to generate a SECRET, copy and paste it into the `.env` ```bash -docker-compose run backend python -c "import secrets; print(secrets.token_urlsafe())" +docker compose run backend python -c "import secrets; print(secrets.token_urlsafe())" ``` ```bash -docker-compose run backend python manage.py createsuperuser +docker compose run backend python manage.py createsuperuser ``` Create the Ngnix configuration file `nginx.conf` based on the `nginx_production.conf` file @@ -401,15 +401,15 @@ Create the Ngnix configuration file `nginx.conf` based on the `nginx_production. Start all services ```bash -docker-compose up -d +docker compose up -d ``` -Configure a URL that points to the machine where it is installed on the port configured for Ngnix in docker-compose. +Configure a URL that points to the machine where it is installed on the port configured for Ngnix in docker compose. At the end of this example the pzserver folder looks like this: ```bash --rw-r--r-- docker-compose.yml +-rw-r--r-- docker compose.yml -rw-r--r-- nginx.conf # Ngnix configuration file. -rw-r--r-- .env # File with configuration variables drwxr-xr-x archive # Directory where the files generated by the application are kept. @@ -421,8 +421,8 @@ drwxr-xr-x pg_backups # Directory where postgresql files are in container Procedure to update the production environment or any other that uses built images. -- Edit the `docker-compose.yml` file and change the frontend and backend images tag. +- Edit the `docker compose.yml` file and change the frontend and backend images tag. - Edit the `.env` file to add new variables or change them if necessary. -- Pull the new images with the `docker-compose pull` command. -- Restart services `docker-compose stop && docker-compose up -d`. +- Pull the new images with the `docker compose pull` command. +- Restart services `docker compose stop && docker compose up -d`. From 6ba2ca8fd2bdf20ac8e50d16e848cb85df46df3b Mon Sep 17 00:00:00 2001 From: Julia Gschwend Date: Tue, 13 Aug 2024 17:07:36 -0300 Subject: [PATCH 17/20] Update README.md return wrong replacements --- README.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 57912b2..8318c42 100644 --- a/README.md +++ b/README.md @@ -14,10 +14,10 @@ git clone https://github.com/linea-it/pzserver_app.git cd pzserver_app ``` -Copy the file `docker compose-development.yml` and rename to `docker compose.yml` +Copy the file `docker-compose-development.yml` and rename to `docker-compose.yml` ```bash -cp docker compose-development.yml docker compose.yml +cp docker-compose-development.yml docker-compose.yml ``` Create the environment variables file based on `env_template`. @@ -34,7 +34,7 @@ Check your linux user id with: ```bash echo $UID ``` -and update it in the `docker compose.yml` file if necessary (if it is not the usual 1000). +and update it in the `docker-compose.yml` file if necessary (if it is not the usual 1000). Now start the database service. It is important that the first time the database service is turned on alone, in this step postgresql will create the database and the user based on the settings `POSTGRES_USER`, `POSTGRES_PASSWORD` and `POSTGRES_DB`. @@ -95,7 +95,7 @@ This command will create the directory `pzserver/frontend/node_modules` if you h In the development environment it is not necessary to change Ngnix settings. But if a local change is needed, copy the `nginx_development.conf` file to `nginx.conf` -Also change the `docker compose.yml` file in the ngnix service at the line `- ./nginx_development.conf:/etc/nginx/conf.d/default.conf:ro`. In this way, the ngnix.conf file represents your local environment, if you make any modifications that are necessary for the project, copy this modification to the template file, as the nginx.conf file is not part of the repository. +Also change the `docker-compose.yml` file in the ngnix service at the line `- ./nginx_development.conf:/etc/nginx/conf.d/default.conf:ro`. In this way, the ngnix.conf file represents your local environment, if you make any modifications that are necessary for the project, copy this modification to the template file, as the nginx.conf file is not part of the repository. Finally, to start the whole application: @@ -132,7 +132,7 @@ mkdir orchestration/db orchestration/logs orchestration/processes ``` ``` bash -cp docker compose-development-orch.yml docker compose.yml +cp docker-compose-development-orch.yml docker-compose.yml docker network create orchestration-network # create internal network ``` @@ -310,7 +310,7 @@ cp pz.key pzkey.pem cp pz.crt pzcert.pem ``` -Next we must uncomment the volume that represents the saml2 directory in docker compose.yml: +Next we must uncomment the volume that represents the saml2 directory in docker-compose.yml: ```yml - ./archive/log/backend:/archive/log @@ -347,7 +347,7 @@ The following example assumes an installation where the database and ngnix are i Only: - create the folders -- create `docker compose.yml` file +- create `docker-compose.yml` file - create `.env` file - create `ngnix.conf` file @@ -358,7 +358,7 @@ mkdir pzserver pzserver/archive pzserver/archive/data pzserver/archive/django_st cd pzserver ``` -Create a `docker compose.yml` file based on the `docker compose-production.yml` template +Create a `docker-compose.yml` file based on the `docker-compose-production.yml` template Change the frontend and backend images to the desired version, replace the string `` with the image tag. @@ -404,12 +404,12 @@ Start all services docker compose up -d ``` -Configure a URL that points to the machine where it is installed on the port configured for Ngnix in docker compose. +Configure a URL that points to the machine where it is installed on the port configured for Ngnix in docker-compose. At the end of this example the pzserver folder looks like this: ```bash --rw-r--r-- docker compose.yml +-rw-r--r-- docker-compose.yml -rw-r--r-- nginx.conf # Ngnix configuration file. -rw-r--r-- .env # File with configuration variables drwxr-xr-x archive # Directory where the files generated by the application are kept. @@ -421,7 +421,7 @@ drwxr-xr-x pg_backups # Directory where postgresql files are in container Procedure to update the production environment or any other that uses built images. -- Edit the `docker compose.yml` file and change the frontend and backend images tag. +- Edit the `docker-compose.yml` file and change the frontend and backend images tag. - Edit the `.env` file to add new variables or change them if necessary. - Pull the new images with the `docker compose pull` command. - Restart services `docker compose stop && docker compose up -d`. From 78b865631063304fad98dccbc03e1e351466c244 Mon Sep 17 00:00:00 2001 From: Julia Gschwend Date: Tue, 13 Aug 2024 17:50:20 -0300 Subject: [PATCH 18/20] Update README.md add instruction to restart the service after django admin configuration --- README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8318c42..04a94ff 100644 --- a/README.md +++ b/README.md @@ -120,7 +120,14 @@ Go to Django ADMIN (for local installation, open a web browser and go to the URL ![Adding new application](images/new_app.png) -The installation is done, you can now test the newly configured application. +The installation is done. To test the newly configured application, restart the service by pressing `Ctrl+C` in the terminal, then execute: + +``` +docker compose up -d +``` + +And then, go to `https://localhost` to open the application. + ### Orchestration setup From 50224a5cea9cc80c1782818649a70ac50d12bf3e Mon Sep 17 00:00:00 2001 From: Julia Gschwend Date: Tue, 13 Aug 2024 18:26:41 -0300 Subject: [PATCH 19/20] Update README.md add exception (port 8088, if 8080 is not available) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 04a94ff..d20af28 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,7 @@ Exit the container and start orchestration services: docker compose up orchestrator ``` -And then follow the steps to create an authentication application ([step by step](https://github.com/linea-it/orchestration/?tab=readme-ov-file#how-to-use-using-client-credential)) just by changing the url from http://localhost to http://localhost:8088, and using the admin user created previously. Note when creating an authentication application, we must change the `ORCHEST_CLIENT_ID` and `ORCHEST_CLIENT_SECRET` in the `.env` with the client_id and secret_id values ​​respectively. +And then follow the steps to create an authentication application ([step by step](https://github.com/linea-it/orchestration/?tab=readme-ov-file#how-to-use-using-client-credential)) just by changing the url from `http://localhost/admin/oauth2_provider/application/add/` to `http://localhost:8080/admin/oauth2_provider/application/add/` (or 8088, if 8080 doesn't work), and using the admin user created previously. Note when creating an authentication application, we must change the `ORCHEST_CLIENT_ID` and `ORCHEST_CLIENT_SECRET` in the `.env` with the client_id and secret_id values ​​respectively. All that remains is to modify the ORCHEST_URL variable in the `.env` with the value http://orchestrator ``` bash From f7de99596879dfce54fb3e1aeddd185814afae1e Mon Sep 17 00:00:00 2001 From: Julia Gschwend Date: Tue, 13 Aug 2024 18:44:40 -0300 Subject: [PATCH 20/20] Update README.md fix mistake --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d20af28..c4cdda9 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,7 @@ Exit the container and start orchestration services: docker compose up orchestrator ``` -And then follow the steps to create an authentication application ([step by step](https://github.com/linea-it/orchestration/?tab=readme-ov-file#how-to-use-using-client-credential)) just by changing the url from `http://localhost/admin/oauth2_provider/application/add/` to `http://localhost:8080/admin/oauth2_provider/application/add/` (or 8088, if 8080 doesn't work), and using the admin user created previously. Note when creating an authentication application, we must change the `ORCHEST_CLIENT_ID` and `ORCHEST_CLIENT_SECRET` in the `.env` with the client_id and secret_id values ​​respectively. +And then follow the steps to create an authentication application ([step by step](https://github.com/linea-it/orchestration/?tab=readme-ov-file#how-to-use-using-client-credential)) just by changing the url from `http://localhost/admin/oauth2_provider/application/add/` to `http://localhost:8088/admin/oauth2_provider/application/add/` and using the admin user created previously. Note when creating an authentication application, we must change the `ORCHEST_CLIENT_ID` and `ORCHEST_CLIENT_SECRET` in the `.env` with the client_id and secret_id values ​​respectively. All that remains is to modify the ORCHEST_URL variable in the `.env` with the value http://orchestrator ``` bash