HEAL · mbkranz · Aug 9, 2023 · Aug 9, 2023 · Aug 9, 2023 · Aug 9, 2023
@@ -1,3 +1,4 @@
+.vscode
 /venv*/
 /.venv*/
 *.egg-info*

@@ -1,6 +1,6 @@
 {
-    "title": "Example Data Dictionary",
-    "description": "This is an example",
+    "description": "This is a proof of concept to demonstrate the healdata-utils functionality",
+    "title": "Healdata-utils Demonstration Data Dictionary",
     "data_dictionary": [
         {
             "name": "study_id",

@@ -0,0 +1,8 @@
+module,name,title,description,type,format,constraints.maxLength,constraints.enum,constraints.pattern,constraints.maximum,constraints.minimum,encodings,ordered,missingValues,trueValues,falseValues,repo_link,standardsMappings.type,standardsMappings.label,standardsMappings.url,standardsMappings.source,standardsMappings.id,relatedConcepts.type,relatedConcepts.label,relatedConcepts.url,relatedConcepts.source,relatedConcepts.id,univarStats.median,univarStats.mean,univarStats.std,univarStats.min,univarStats.max,univarStats.mode,univarStats.count,univarStats.twentyFifthPercentile,univarStats.seventyFifthPercentile,univarStats.categoricalMarginals.name,univarStats.categoricalMarginals.count
+,id,,Unique identifier for participant,integer,,,,,,,,,-99.0|-98.0,,,,,,,,,,,,,,,,,,,,,,,,
+,visit_dt,,Date of the interview,any,,,,,,,,,1582-10-13 23:58:21|1582-10-13 23:58:22,,,,,,,,,,,,,,,,,,,,,,,,
+,sex_at_birth,,The self-reported sex of the participant/subject at birth,integer,,,1.0|2.0|3.0|4.0,,,,1.0=Male|2.0=Female|3.0=Intersex|4.0=None of these describe me|-99.0=Not reported|-98.0=Prefer not to answer,,-99.0|-98.0,,,,,,,,,,,,,,,,,,,,,,,,
+,race,,Self-reported race,integer,,,1.0|2.0|3.0|4.0|5.0|6.0|7.0|8.0,,,,1.0=White|2.0=Black or African American|3.0=American Indian or Alaska Native|4.0=Native|5.0=Hawaiian or Other Pacific Islander|6.0=Asian|7.0=Some other race|8.0=Multiracial|-99.0=Not reported|-98.0=Prefer not to answer,,-99.0|-98.0,,,,,,,,,,,,,,,,,,,,,,,,
+,hispanic_ethnicity,,"Are you of Hispanic, Latino, or Spanish origin?",integer,,,1.0|0.0,,,,1.0=Yes|0.0=No|-99.0=Not reported|-98.0=Prefer not to answer,,-99.0|-98.0,,,,,,,,,,,,,,,,,,,,,,,,
+,SU4,,Heroin Days Used in days,integer,,,,,,,-99.0=Not reported|-98.0=Prefer not to answer,,-99.0|-98.0,,,,,,,,,,,,,,,,,,,,,,,,
+,age,,Age of participant in year,integer,,,,,,,-99.0=Not reported|-98.0=Prefer not to answer,,-99.0|-98.0,,,,,,,,,,,,,,,,,,,,,,,,
@@ -1,6 +1,6 @@
 {
-    "title": "Example Data Dictionary",
-    "description": "This is an example",
+    "description": "This is a proof of concept to demonstrate the healdata-utils functionality",
+    "title": "Healdata-utils Demonstration Data Dictionary",
     "data_dictionary": [
         {
             "name": "id",
@@ -13,7 +13,7 @@
         },
         {
             "name": "visit_dt",
-            "type": "datetime",
+            "type": "any",
             "missingValues": [
                 "1582-10-13 23:58:21",
                 "1582-10-13 23:58:22"
@@ -89,8 +89,8 @@
             },
             "constraints": {
                 "enum": [
-                    "0.0",
-                    "1.0"
+                    "1.0",
+                    "0.0"
                 ]
             },
             "missingValues": [

@@ -0,0 +1,8 @@
+module,name,title,description,type,format,constraints.maxLength,constraints.enum,constraints.pattern,constraints.maximum,constraints.minimum,encodings,ordered,missingValues,trueValues,falseValues,repo_link,standardsMappings.type,standardsMappings.label,standardsMappings.url,standardsMappings.source,standardsMappings.id,relatedConcepts.type,relatedConcepts.label,relatedConcepts.url,relatedConcepts.source,relatedConcepts.id,univarStats.median,univarStats.mean,univarStats.std,univarStats.min,univarStats.max,univarStats.mode,univarStats.count,univarStats.twentyFifthPercentile,univarStats.seventyFifthPercentile,univarStats.categoricalMarginals.name,univarStats.categoricalMarginals.count
+,id,,Unique identifier for participant,integer,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+,visit_dt,,Date of the interview,any,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+,sex_at_birth,,The self-reported sex of the participant/subject at birth,any,,,1|2|3|4,,,,1=Male|2=Female|3=Intersex|4=None of these describe me|a=Not reported|b=Prefer not to answer,,a|b,,,,,,,,,,,,,,,,,,,,,,,,
+,race,,Self-reported race,any,,,1|2|3|4|5|6|7|8,,,,1=White|2=Black or African American|3=American Indian or Alaska Native|4=Native|5=Hawaiian or Other Pacific Islander|6=Asian|7=Some other race|8=Multiracial|a=Not reported|b=Prefer not to answer,,a|b,,,,,,,,,,,,,,,,,,,,,,,,
+,hispanic_ethnicity,,"Are you of Hispanic, Latino, or Spanish origin?",boolean,,,0|1,,,,0=No|1=Yes|a=Not reported|b=Prefer not to answer,,a|b,,,,,,,,,,,,,,,,,,,,,,,,
+,SU4,,Heroin Days Used in days,any,,,,,,,a=Not reported|b=Prefer not to answer,,a|b,,,,,,,,,,,,,,,,,,,,,,,,
+,age,,Age of participant in year,integer,,,b|a,,,,a=Not reported|b=Prefer not to answer,,,,,,,,,,,,,,,,,,,,,,,,,,
@@ -1,6 +1,6 @@
 {
-    "title": "Example Data Dictionary",
-    "description": "This is an example",
+    "description": "This is a proof of concept to demonstrate the healdata-utils functionality",
+    "title": "Healdata-utils Demonstration Data Dictionary",
     "data_dictionary": [
         {
             "name": "id",
@@ -9,12 +9,12 @@
         },
         {
             "name": "visit_dt",
-            "type": "datetime",
+            "type": "any",
             "description": "Date of the interview"
         },
         {
             "name": "sex_at_birth",
-            "type": "integer",
+            "type": "any",
             "encodings": {
                 "1": "Male",
                 "2": "Female",
@@ -39,7 +39,7 @@
         },
         {
             "name": "race",
-            "type": "integer",
+            "type": "any",
             "encodings": {
                 "1": "White",
                 "2": "Black or African American",
@@ -72,7 +72,7 @@
         },
         {
             "name": "hispanic_ethnicity",
-            "type": "integer",
+            "type": "boolean",
             "encodings": {
                 "0": "No",
                 "1": "Yes",
@@ -93,7 +93,7 @@
         },
         {
             "name": "SU4",
-            "type": "integer",
+            "type": "any",
             "encodings": {
                 "a": "Not reported",
                 "b": "Prefer not to answer"
@@ -116,8 +116,8 @@
             },
             "constraints": {
                 "enum": [
-                    "b",
-                    "a"
+                    "a",
+                    "b"
                 ]
             },
             "description": "Age of participant in year"

@@ -0,0 +1,4 @@
+{
+    "valid": true,
+    "errors": []
+}
@@ -0,0 +1,4 @@
+{
+    "valid": true,
+    "errors": []
+}
@@ -94,7 +94,7 @@ def convert_readstat(file_path,
             field['encodings'] = value_labels
             #NOTE: enums are assumed if labels represent entire set of values
             # this avoids value labels that are, for example, partials such as top/bottom encodings
-            enums = set(value_labels.keys()).difference(set(missing_values))
+            enums = [val for val in value_labels.keys() if not val in missing_values]
             constraints_enums = {'constraints':{'enum':[str(v) for v in enums]}}
             field.update(constraints_enums)
 

@@ -0,0 +1,62 @@
+import pytest 
+from pathlib import Path
+from healdata_utils.cli import choice_fxn
+import json
+
+@pytest.fixture(scope="module")
+def fields_propname():
+    return "data_dictionary"
+
+@pytest.fixture(scope="module")
+def valid_input_params():
+    inputdir = Path("data/valid/input")
+    data_dictionary_props = {
+        "description": (
+            "This is a proof of concept to demonstrate"
+            " the healdata-utils functionality"
+        ),
+        "title": "Healdata-utils Demonstration Data Dictionary",
+    }
+    input_params = {
+        "dta":{"filepath":inputdir.joinpath("stata_dta_dataset1.dta"),"data_dictionary_props":data_dictionary_props},
+        "sav":{"filepath":inputdir.joinpath("spss_sav_dataset1.sav"),"data_dictionary_props":data_dictionary_props},
+        "redcap.csv":{"filepath":inputdir.joinpath("redcap_dd_export.redcap.csv"),"data_dictionary_props":data_dictionary_props},
+    }
+    return input_params
+
+@pytest.fixture(scope="module")
+def valid_output_json():
+    path = Path("data/valid/output")
+    filenames = {
+        "dta":"heal_dd_from_stata_dta_dataset1.json",
+        "sav":"heal_dd_from_spss_sav_dataset1.json",
+        "redcap.csv":"heal_dd_from_redcap_dd_export.json",
+    }
+    jsons = {}
+    for inputtype,name in filenames.items():
+        if inputtype in choice_fxn:
+            jsons[inputtype] = json.loads(path.joinpath(name).read_text())
+        else:
+            raise Exception("Inputtype not in registered fxns")
+
+    return jsons
+
+@pytest.fixture(scope="module")
+def valid_output_csv():
+    path = Path("data/valid/output")
+    filenames = {
+        "dta":"heal_dd_from_stata_dta_dataset1.csv",
+        "sav":"heal_dd_from_spss_sav_dataset1.csv",
+        "redcap.csv":"heal_dd_from_redcap_dd_export.csv",
+    }
+    csvs = {}
+    for inputtype,name in filenames.items():
+        if inputtype in choice_fxn:
+            csvs[inputtype] = path.joinpath(name).read_text().split("\n")
+        else:
+            raise Exception("Inputtype not in registered fxns")
+
+    return csvs
+
+
+