Skip to content

Commit

Permalink
Merge pull request #171 from svarona/iskylims_isciiides
Browse files Browse the repository at this point in the history
Merge branch 'develop' into iskylims_isciiides
  • Loading branch information
svarona authored Oct 31, 2023
2 parents c1af994 + eced4e0 commit 44cdd27
Show file tree
Hide file tree
Showing 157 changed files with 4,639,305 additions and 423 deletions.
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ BU-ISCIII provides a serie or services in its portfolio for supporting bioinform
- [copy\_sftp](#copy_sftp)
- [bioinfo\_doc](#bioinfo_doc)
- [archive](#archive)
- [autoclean\_sftp](#autoclean_sftp)
- [Acknowledgements](#acknowledgements)

## Installation
Expand Down Expand Up @@ -354,6 +355,28 @@ Options:
--help Show this message and exit.
```

#### autoclean_sftp

Example of usage:

```bash
bu-isciii autoclean-sftp
```

Help:

```bash
Usage: bu-isciii autoclean-sftp [OPTIONS]

Clean old sftp services

Options:
-s, --sftp_folder PATH Absolute path to sftp folder
-d, --days INTEGER Integer, remove files older than a window of `-d
[int]` days. Default 14 days.
--help Show this message and exit.
```

## Acknowledgements

Python package idea and design is really inspired in [nf-core/tools](https://github.com/nf-core/tools).
23 changes: 23 additions & 0 deletions bu_isciii/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import bu_isciii.clean
import bu_isciii.archive
import bu_isciii.copy_sftp
import bu_isciii.autoclean_sftp

log = logging.getLogger()

Expand Down Expand Up @@ -592,5 +593,27 @@ def archive(
archive_ser.handle_archive()


# CLEAN OLD SFTP SERVICES
@bu_isciii_cli.command(help_priority=8)
@click.option(
"-s",
"--sftp_folder",
type=click.Path(),
default=None,
help="Absolute path to sftp folder",
)
@click.option(
"-d",
"--days",
type=int,
default=14,
help="Integer, remove files older than a window of `-d [int]` days. Default 14 days.",
)
def autoclean_sftp(sftp_folder, days):
"""Clean old sftp services"""
sftp_clean = bu_isciii.autoclean_sftp.AutoremoveSftpService(sftp_folder, days)
sftp_clean.handle_autoclean_sftp()


if __name__ == "__main__":
run_bu_isciii()
171 changes: 171 additions & 0 deletions bu_isciii/autoclean_sftp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
#!/usr/bin/env python

# import
import os
import re
import sys
import logging

import shutil
import rich

from datetime import datetime, timedelta

# local import
import bu_isciii
import bu_isciii.utils
import bu_isciii.config_json

log = logging.getLogger(__name__)
stderr = rich.console.Console(
stderr=True,
style="dim",
highlight=False,
force_terminal=bu_isciii.utils.rich_force_colors(),
)


# TODO: add to utils.py?
def timestamp_converter(timestamp):
date_formated = datetime.fromtimestamp(timestamp)
return date_formated


class LastMofdificationFinder:
"""
Identifies the lates modification in a directory
"""

def __init__(self, path):
self.path = path
self.last_modified_time = 0

def find_last_modification(self):
self.get_last_modified(self.path)
return timestamp_converter(self.last_modified_time)

def get_last_modified(self, directory):
last_modified_time = os.path.getmtime(directory)

for root, dirs, files in os.walk(self.path):
for file in files:
file_path = os.path.join(root, file)
file_modified_time = os.path.getmtime(file_path)
if file_modified_time > last_modified_time:
last_modified_time = file_modified_time

if last_modified_time > self.last_modified_time:
self.last_modified_time = last_modified_time


class AutoremoveSftpService:
"""
Identifies service's stored in an sftp directory
and remove those that have not been updated/modified
within 14 days
"""

def __init__(self, path=None, days=14):
# Parse input path
if path is None:
use_default = bu_isciii.utils.prompt_yn_question("Use default path?: ")
if use_default:
data_path = bu_isciii.config_json.ConfigJson().get_configuration(
"global"
)["data_path"]
self.path = os.path.join(data_path, "sftp")
else:
self.path = bu_isciii.utils.prompt_path(
msg="Directory where the sftp site is allocated:"
)
else:
self.path = path

# Define the margin threshold of days to mark old services
self.days = timedelta(days=days)
stderr.print(
"Services older than "
+ str(self.days.days)
+ " days are going to be deleted from "
+ self.path
)

# TODO: modify this. PR to make this method reusable outside the class
def check_path_exists(self):
# if the folder path is not found, then bye
if not os.path.exists(self.path):
stderr.print(
"[red]ERROR: It seems like finding the correct path is beneath me. I apologise. The path:"
+ self.path
+ "does not exitst. Exiting.." % self.path
)
sys.exit()
else:
return True

# Uses regex to identify sftp-services & gets their lates modification
def get_sftp_services(self):
self.sftp_services = {} # {sftp-service_path : last_update}
service_pattern = (
r"^[SRV][A-Z]+[0-9]+_\d{8}_[A-Z0-9]+_[a-zA-Z]+(?:\.[a-zA-Z]+)?_[a-zA-Z]$"
)

stderr.print("[blue]Scanning " + self.path + "...")
for root, dirs, files in os.walk(self.path):
for dir_name in dirs:
match = re.match(service_pattern, dir_name)
if match:
sftp_service_fullPath = os.path.join(root, dir_name)

# Get sftp-service last modification
service_finder = LastMofdificationFinder(sftp_service_fullPath)
service_last_modification = service_finder.find_last_modification()
self.sftp_services[
sftp_service_fullPath
] = service_last_modification
if len(self.sftp_services) == 0:
sys.exit(f"No services found in {self.path}")

# Mark services older than $days
def mark_toDelete(self):
self.marked_services = []

for key, value in self.sftp_services.items():
if datetime.now() - value > self.days:
self.marked_services.append(key)

# Delete marked services
def remove_oldservice(self):
if len(self.marked_services) == 0:
stderr.print(
"[yellow]sftp-site up to date. There are no services older than "
+ str(self.days.days)
+ " days. Skiping autoclean-sftp... "
)
sys.exit()
else:
service_elements = "\n".join(self.marked_services)
stderr.print(
"The following services are going to be deleted from the sftp:\n"
+ service_elements
)
confirm_sftp_delete = bu_isciii.utils.prompt_yn_question("Are you sure?: ")
if confirm_sftp_delete:
for service in self.marked_services:
try:
stderr.print("Deleting service: " + service)
shutil.rmtree(service)

except OSError:
stderr.print(
"[red]ERROR: Cannot delete service folder:" + service
)
else:
stderr.print("Aborting ...")
sys.exit()

def handle_autoclean_sftp(self):
self.check_path_exists()
self.get_sftp_services()
self.mark_toDelete()
self.remove_oldservice()
29 changes: 17 additions & 12 deletions bu_isciii/bioinfo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def __init__(
self.service_folder = os.path.join(
self.path, self.conf["services_path"], year, self.service_name
)
self.samples = self.resolution_info["samples"]
self.samples = self.resolution_info.get("samples", None)
self.handled_services = None
path_to_wkhtmltopdf = os.path.normpath(self.conf["wkhtmltopdf_path"])
self.config_pdfkit = pdfkit.configuration(wkhtmltopdf=path_to_wkhtmltopdf)
Expand Down Expand Up @@ -304,19 +304,24 @@ def create_markdown(self, file_path):
markdown_data["service"] = self.resolution_info
markdown_data["user_data"] = self.resolution_info["service_user_id"]
samples_in_service = {}
for sample_data in self.samples:
if sample_data["run_name"] not in samples_in_service:
samples_in_service[sample_data["run_name"]] = {}
if (
sample_data["project_name"]
not in samples_in_service[sample_data["run_name"]]
):

if self.samples is not None:
for sample_data in self.samples:
if sample_data["run_name"] not in samples_in_service:
samples_in_service[sample_data["run_name"]] = {}
if (
sample_data["project_name"]
not in samples_in_service[sample_data["run_name"]]
):
samples_in_service[sample_data["run_name"]][
sample_data["project_name"]
] = []
samples_in_service[sample_data["run_name"]][
sample_data["project_name"]
] = []
samples_in_service[sample_data["run_name"]][
sample_data["project_name"]
].append(sample_data["sample_name"])
].append(sample_data["sample_name"])
else:
samples_in_service = {" N/A": {" N/A": ["No recorded samples"]}}

markdown_data["samples"] = samples_in_service

# Resolution related information
Expand Down
24 changes: 14 additions & 10 deletions bu_isciii/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def __init__(
self.delete_files = self.get_clean_items(self.services_to_clean, type="files")
# self.delete_list = [item for item in self.delete_list if item]
self.nocopy = self.get_clean_items(self.services_to_clean, type="no_copy")
self.service_samples = self.resolution_info.get("Samples", None)

if option is None:
self.option = bu_isciii.utils.prompt_selection(
Expand Down Expand Up @@ -162,7 +163,7 @@ def show_removable(self, to_stdout=True):
"""
if to_stdout:
folders = ", ".join(self.delete_folders)
stderr.print(f"The following folders will be purge: {folders}")
stderr.print(f"The following folders will be purged: {folders}")
files = ", ".join(self.delete_files)
stderr.print(f"The following files will be deleted: {files}")
return
Expand Down Expand Up @@ -305,15 +306,18 @@ def purge_files(self):
Params:
"""
files_to_delete = []
for sample_info in self.service_samples:
for file in self.delete_files:
file_to_delete = file.replace("sample_name", sample_info["sample_name"])
files_to_delete.append(file_to_delete)
path_content = self.scan_dirs(to_find=files_to_delete)
for file in path_content:
os.remove(file)
stderr.print("[green]Successfully removed " + file)
if self.service_samples is not None:
files_to_delete = []
for sample_info in self.service_samples:
for file in self.delete_files:
file_to_delete = file.replace(
"sample_name", sample_info["sample_name"]
)
files_to_delete.append(file_to_delete)
path_content = self.scan_dirs(to_find=files_to_delete)
for file in path_content:
os.remove(file)
stderr.print("[green]Successfully removed " + file)
return

def purge_folders(self, sacredtexts=["lablog", "logs"], add="", verbose=True):
Expand Down
2 changes: 1 addition & 1 deletion bu_isciii/conf/configuration.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"options": ["-rlpv", "--update", "-L", "--inplace"],
"exclusions": [
"'*_NC'",
"'lablog'",
"'*lablog*'",
"'work'",
"'00-reads'",
"'*.sh'",
Expand Down
36 changes: 27 additions & 9 deletions bu_isciii/new_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def __init__(
self.services_requested = self.resolution_info["resolutions"][0][
"available_services"
]
self.service_samples = self.resolution_info["samples"]
self.service_samples = self.resolution_info.get("samples", None)

if ask_path and path is None:
stderr.print("Directory where you want to create the service folder.")
Expand Down Expand Up @@ -229,14 +229,32 @@ def samples_json(self):
f.close()

def create_new_service(self):
self.create_folder()
self.copy_template()
self.create_samples_id()
self.create_symbolic_links()
self.samples_json()
self.rest_api.put_request(
"update-state", "resolution", self.resolution_id, "state", "in_progress"
)
if self.service_samples is not None:
self.create_folder()
self.copy_template()
self.create_samples_id()
self.create_symbolic_links()
self.samples_json()
self.rest_api.put_request(
"update-state", "resolution", self.resolution_id, "state", "in_progress"
)
else:
stderr.print(
"[yellow]WARN: No samples recorded in service: " + self.resolution_id
)
if bu_isciii.utils.prompt_yn_question("Do you want to proceed?: "):
self.create_folder()
self.copy_template()
self.rest_api.put_request(
"update-state",
"resolution",
self.resolution_id,
"state",
"in_progress",
)
else:
stderr.print("Directory not created. Bye!")
sys.exit(1)

def get_resolution_id(self):
return self.resolution_id
Expand Down
Loading

0 comments on commit 44cdd27

Please sign in to comment.