Prepared package

LucaCerina · Feb 6, 2024 · af822f3 · af822f3
1 parent 159ea74
commit af822f3
Show file tree

Hide file tree

Showing 5 changed files with 55 additions and 7 deletions.
diff --git a/.gitignore b/.gitignore
@@ -160,4 +160,5 @@ cython_debug/
 #.idea/
 
 # Repo specific
-WSC_non_mapped_lines.txt
+WSC_non_mapped_lines.txt
+build.bat
diff --git a/README.md b/README.md
@@ -10,10 +10,18 @@ For example, obstructive apneas may be formatted as: 'Obs Apnea', 'OBS Apnea', '
 Other issues relates to missing information in the annotations' columns, inconsistent time formats (most are 24h, some am/pm) and other dirty bits that are not necessary and complicate automatic parsing.
 
 ## How to use this script
-Clone the repo or download the `wsc_clean.py` file and run it from command line as:
+Clone the repo or download the `wsc_clean.py` and `mappings.txt` file and run it from command line as:
 
 `python wsc_clean.py <your_dataset_polysomnography_folder>`
 
+The package can be also installed from PyPI:
+
+`pip install wisconsinsc_cleaner`
+
+and executed as:
+
+`wsc_clean <your_dataset_polysomnography_folder>`
+
 ## Content of this repo
 A single python script (no installation needed) parses all the annotation files and produce another set of annotation files with the suffix `.uniform.txt`.
 The mapping of annotations is available in the `mappings.txt` file in the form `A|B|C` (see [https://zzz.bwh.harvard.edu/luna/ref/annotations/#remap] for details), meaning that every instance of `B` or `C` will be mapped as `A`. If a mapping does not exist, the original value is returned with a prefix `misc:`.

diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,34 @@
+[build-system]
+requires = [
+    "setuptools"
+]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.package-data]
+wisconsinsc_cleaner = ["*.txt"]
+
+[project]
+name = "wisconsinsc_cleaner"
+version = "0.0.1"
+description = "A tool to clean and uniform annotation files from Wisconsin Sleep Cohort (WSC), distributed by NSRR"
+readme = "README.md"
+requires-python = ">=3.8"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Development Status :: 4 - Beta",
+]
+keywords = ['data storage', 'data cleaning', 'medical records']
+
+[project.scripts]
+wsc_clean = "wisconsinsc_cleaner.wsc_clean:main"
+
+[project.urls]
+"Homepage" = "https://github.com/LucaCerina/WisconsinSC_cleaner"
+"Bug Tracker" = "https://github.com/LucaCerina/WisconsinSC_cleaner/issues"
+
+[metadata]
+author = "LucaCerina"
+author_email = "[email protected]"
+license = "MIT"
diff --git a/mappings.txt → wisconsinsc_cleaner/mappings.txt b/mappings.txt → wisconsinsc_cleaner/mappings.txt
@@ -122,7 +122,7 @@ biocal:look_left|LOOK LEFT|eyes left|eyes left and back|move eyes left & center|
 biocal:look_right|LOOK RIGHT|eyes right|look r|eyes right and back|move eyes right & center|move eyes right, back
 biocal:look_up|LOOK UP|eyes up|eyes up and back|move eyes up, back|up
 biocal:look_down|LOOK DOWN|eyes down and back|move eyes down, back|eyes down
-biocal:look_left_righteyes left and right
+biocal:look_left_right|eyes left and right
 
 biocal:teeth_grit|GRIT TEETH
 biocal:clenc_jaw|clench jaw||clench teeth|clench

diff --git a/wsc_clean.py → wisconsinsc_cleaner/wsc_clean.py b/wsc_clean.py → wisconsinsc_cleaner/wsc_clean.py
@@ -6,6 +6,7 @@
 from csv import DictReader, DictWriter
 from datetime import datetime, timedelta
 from glob import glob
+from importlib import resources
 from itertools import zip_longest
 from time import perf_counter
 from typing import Tuple, Union
@@ -38,10 +39,12 @@ def datetime_sorter(start_time:datetime, input_time:datetime) -> int:
         delta += timedelta(days=1)
     return delta.total_seconds()
 
-def load_mappings(mapping_file:str) -> dict:
+def load_mappings() -> dict:
     """Convert maps in mappings.txt file to a dictionary to be used later.
     Raise ValueError in case of badly formatted map
     """
+    # TODO bit of a clunky path here
+    mapping_file = resources._get_package('wisconsinsc_cleaner').__path__._path[0]+'/mappings.txt'
     assert Path(mapping_file).exists(), f"Mapping file {mapping_file} not found."
     with open(mapping_file) as map_file:
         maps = map_file.read().splitlines()
@@ -414,7 +417,7 @@ def process_gamma_log(recording:str, input_filename:str, output_filename:str, ma
 
     return no_error, unmapped
 
-if __name__ == "__main__":
+def main():
     # Get data folder
     if len(sys.argv)<2:
         print("Error! Usage wsc_clean.py <wsc_polysomnography_folder>")
@@ -426,7 +429,7 @@ def process_gamma_log(recording:str, input_filename:str, output_filename:str, ma
 
     # Load annotations mappings
     print("Loading mappings")
-    mapping = load_mappings('./mappings.txt')
+    mapping = load_mappings()
 
     # Get all recordings
     print("Identifying recordings")
@@ -483,4 +486,6 @@ def process_gamma_log(recording:str, input_filename:str, output_filename:str, ma
     with open(f'./{non_mapped_filename}', 'w', encoding='utf-8') as nfile:
         for line in non_mapped_lines:
             nfile.write(line+'\n')
-
+
+if __name__ == "__main__":
+    main()