Skip to content

Commit

Permalink
Merge pull request #20 from UoA-eResearch/IDS-963-generate-zip-for-fi…
Browse files Browse the repository at this point in the history
…nal-ro-crate

Ids 963 generate zip for final ro crate
  • Loading branch information
JLoveUOA authored Dec 16, 2024
2 parents 12a2065 + 14d36e4 commit 2a5ea56
Show file tree
Hide file tree
Showing 15 changed files with 194 additions and 32 deletions.
28 changes: 14 additions & 14 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions src/api/fake_resdrive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
""""
Functions for faking a mounted research drive on the local home directory
Only for demonstration purposes,
please replace once service accounts can mount a research drive
"""

import shutil
from pathlib import Path

TEST_DATA_NAME = Path("tests/restst000000001-testing")


def make_fake_resdrive(drive_path: Path) -> None:
"TESTING/DEMONSTRATION FUNCTION TO POPULATE RESEARCH DRIVE PATHS"
(drive_path / "Archive").mkdir(parents=True, exist_ok=True)
data_path = drive_path / "Vault"
if not data_path.exists():
data_path.mkdir(parents=True, exist_ok=False)
populate_fake_resdrive(data_path)


def populate_fake_resdrive(input_path: Path) -> None:
"Populate the vault directory with fake data"
shutil.copytree(TEST_DATA_NAME, input_path, dirs_exist_ok=True)
27 changes: 21 additions & 6 deletions src/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,16 @@
from sqlmodel import Session, SQLModel, create_engine, select

from api.cors import add_cors_middleware
from api.manifests import bag_directory, create_manifests_directory, generate_manifest
from api.fake_resdrive import make_fake_resdrive
from api.manifests import (
bag_directory,
bagit_exists,
create_manifests_directory,
generate_manifest,
)
from api.security import ApiKey, validate_api_key, validate_permissions
from crate.ro_builder import ROBuilder
from crate.ro_loader import ROLoader
from crate.ro_loader import ROLoader, zip_existing_crate
from models.member import Member
from models.person import Person
from models.project import InputProject, Project, ProjectWithDriveMember
Expand Down Expand Up @@ -124,7 +130,8 @@ async def set_drive_info(
]
project.research_drives = drives
for drive in drives:
drive.manifest = generate_manifest(drive.name)
dirve_path = get_resdrive_path(drive.name)
drive.manifest = generate_manifest(dirve_path / "Vault")
# Add the validated services and members into the project
project.members = members
# Upsert the project.
Expand Down Expand Up @@ -200,6 +207,8 @@ def get_resdrive_path(drive_name: str) -> Path:
"""Get a path for a research drive.
Please update when service acc logic is finalized"""
drive_path = Path.home() / "mnt" / drive_name
###WHILE TESTING MAKE THE DRIVE
make_fake_resdrive(drive_path)
if not drive_path.is_dir():
raise FileNotFoundError(
"Research Drive must be mounted in order to generate RO-Crate"
Expand Down Expand Up @@ -246,7 +255,10 @@ def build_crate_contents(
drive_entity = ro_crate_builder.add_research_drive_service(drive_found)
ro_crate_builder.crate.root_dataset.append_to("mainEntity", drive_entity)
drive_entity.append_to("project", project_entities)
ro_crate_loader.write_crate(drive_location)
ro_crate_location = drive_location
if bagit_exists(ro_crate_location):
ro_crate_location = ro_crate_location / "data"
ro_crate_loader.write_crate(ro_crate_location)
bag_directory(
drive_location,
bag_info={"projects": ",".join([project.title for project in projects])},
Expand All @@ -263,16 +275,19 @@ def build_crate_contents(
async def generate_ro_crate(
drive_name: ResearchDriveID,
session: SessionDep,
) -> ROLoader:
) -> None:
"""Async task for generating the RO-crate in a research drive
then moving all files into archive"""
drive_path = get_resdrive_path(drive_name)
return build_crate_contents(
drive_location = drive_path / "Vault"
output_location = drive_path / "Archive"
build_crate_contents(
drive_name,
session,
drive_location=drive_path / "Vault",
output_location=drive_path / "Archive",
)
zip_existing_crate(output_location / str(drive_name), drive_location)


@app.get(ENDPOINT_PREFIX + "/resdriveinfo", response_model=ProjectWithDriveMember)
Expand Down
35 changes: 26 additions & 9 deletions src/api/manifests.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,20 @@
DEFAULT_CHECKSUM = ["sha256", "sha512"]


def _sorted_walk(data_dir: str) -> Generator[str, None, None]:
def _sorted_walk(data_dir: str, dirs_only: bool = False) -> Generator[str, None, None]:
"Generate a sorted list of filenames or directory names"
for dirpath, dirnames, filenames in os.walk(data_dir):
filenames.sort()
relative_dirpath = Path(dirpath).relative_to(data_dir)
dirnames.sort()
for fn in filenames:
path = os.path.join(dirpath, fn)
yield path
if len(filenames) > 1000 or dirs_only:
for dn in dirnames:
path = os.path.join(relative_dirpath, dn)
yield path
else:
filenames.sort()
for fn in filenames:
path = os.path.join(relative_dirpath, fn)
yield path


def _encode_filename(s: str) -> str:
Expand All @@ -44,26 +51,36 @@ def genertate_filelist(drive_path: Path) -> str:
return "\n".join(filenames)


def generate_manifest(drive_id: str) -> Manifest:
def generate_manifest(drive_path: Path) -> Manifest:
"""Generate a manifest from a drive ID.
in future provide logic for a service account to mount a research drive.
Currently generate a mockup from a test directory.
"""
# mount drive based on ID
# use service account to mount drive to mountpoint
_ = f"//files.auckland.ac.nz/research/{drive_id}"
mountpoint = Path("tests/restst000000001-testing")
manifest = genertate_filelist(mountpoint)
manifest = genertate_filelist(drive_path)
return Manifest(manifest=manifest)


def bagit_exists(drive_path: Path) -> bool:
"""Return true if something looking like a bagIT is at this location"""
return (drive_path / "bagit.txt").is_file() and (drive_path / "data").is_dir()


def bag_directory(drive_path: Path, bag_info: Dict[str, str]) -> None:
"""Create a bagit bag from a given directory
Args:
drive_path (Path): the path to the directory to bag
bag_info (Dict[str,str]): a dictionary documenting ownership of the bag
"""
# if a bagit already exists update it
if bagit_exists(drive_path):
bag = bagit.Bag(str(drive_path))
bag.info = bag.info | bag_info
bag.save(processes=PROCESSES, manifests=True)
return

_ = bagit.make_bag(
bag_dir=drive_path.as_posix(),
bag_info=bag_info,
Expand Down
1 change: 0 additions & 1 deletion src/crate/ro_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ def add_project(
project_properties = crate_project.model_dump(
exclude={"id", "codes"}, by_alias=True, exclude_none=True
)
# project_properties = project_properties | sumbission_properties
project_id = f"{PROJECT_PREFIX}{crate_project.id}"
project_entity = ContextEntity(
crate=self.crate,
Expand Down
17 changes: 17 additions & 0 deletions src/crate/ro_loader.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""Classes and functions for loading and archiving RO-Crates
"""

import shutil
import tarfile
from enum import Enum
from pathlib import Path
from typing import Any, Dict

import orjson
from bagit import Bag
from rocrate.rocrate import ROCrate

JsonType = Dict[str, Any]
Expand Down Expand Up @@ -129,4 +131,19 @@ def archive_crate(
)
out_tar.close()
case ARCHIVETYPE.ZIP:
self.crate.source = crate_location
self.crate.write_zip(file_location)


def zip_existing_crate(crate_destination: Path, crate_location: Path) -> None:
"""Move an existing RO-Crate into a Zip Archive"""
if crate_destination.suffix == ".zip":
crate_destination = crate_destination.parent / crate_destination.stem
if not crate_location.is_dir():
raise FileExistsError("RO-Crate Source should be a directory")
bag = Bag(str(crate_location))
if not bag.validate():
raise ValueError("RO-Crate Source should be a valid BagIt")
if not Path(crate_location / "data" / "ro-crate-metadata.json").is_file():
raise FileExistsError("No RO-Crate metadata found in RO-Crate source")
shutil.make_archive(str(crate_destination), "zip", str(crate_location))
1 change: 0 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ class Meta:
text="res???#########-????????",
letters="abcdefghijklmnopqrstuvwxyz",
)
# submission = factory.SubFactory(drive_offboard_submission_factory)

return ResearchDriveServiceFactory

Expand Down
7 changes: 6 additions & 1 deletion tests/test-requests.http
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ GET {{server_url}}/api/v1/resdriveinfo/
?drive_id={{drive_id}}
x-api-key: {{api_key}}

###
GET {{server_url}}/api/v1/resdrivemanifest/
?drive_id={{drive_id}}
x-api-key: {{api_key}}

###

POST {{server_url}}/api/v1/resdriveinfo
Expand Down Expand Up @@ -149,7 +154,7 @@ x-api-key: {{api_key}}
{
"retentionPeriodYears": 6,
"dataClassification": "Sensitive",
"isCompleted": false,
"isCompleted": true,
"driveName": "reslig202200001-Tītoki-metabolomics"

}
Loading

0 comments on commit 2a5ea56

Please sign in to comment.