Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup tests and minor bug fixes #33

Merged
merged 6 commits into from
Aug 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.vscode
/venv*/
/.venv*/
*.egg-info*
Expand Down
146 changes: 146 additions & 0 deletions data/valid/output/heal_dd_from_redcap_dd_export.csv

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"title": "Example Data Dictionary",
"description": "This is an example",
"description": "This is a proof of concept to demonstrate the healdata-utils functionality",
"title": "Healdata-utils Demonstration Data Dictionary",
"data_dictionary": [
{
"name": "study_id",
Expand Down
8 changes: 8 additions & 0 deletions data/valid/output/heal_dd_from_spss_sav_dataset1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
module,name,title,description,type,format,constraints.maxLength,constraints.enum,constraints.pattern,constraints.maximum,constraints.minimum,encodings,ordered,missingValues,trueValues,falseValues,repo_link,standardsMappings.type,standardsMappings.label,standardsMappings.url,standardsMappings.source,standardsMappings.id,relatedConcepts.type,relatedConcepts.label,relatedConcepts.url,relatedConcepts.source,relatedConcepts.id,univarStats.median,univarStats.mean,univarStats.std,univarStats.min,univarStats.max,univarStats.mode,univarStats.count,univarStats.twentyFifthPercentile,univarStats.seventyFifthPercentile,univarStats.categoricalMarginals.name,univarStats.categoricalMarginals.count
,id,,Unique identifier for participant,integer,,,,,,,,,-99.0|-98.0,,,,,,,,,,,,,,,,,,,,,,,,
,visit_dt,,Date of the interview,any,,,,,,,,,1582-10-13 23:58:21|1582-10-13 23:58:22,,,,,,,,,,,,,,,,,,,,,,,,
,sex_at_birth,,The self-reported sex of the participant/subject at birth,integer,,,1.0|2.0|3.0|4.0,,,,1.0=Male|2.0=Female|3.0=Intersex|4.0=None of these describe me|-99.0=Not reported|-98.0=Prefer not to answer,,-99.0|-98.0,,,,,,,,,,,,,,,,,,,,,,,,
,race,,Self-reported race,integer,,,1.0|2.0|3.0|4.0|5.0|6.0|7.0|8.0,,,,1.0=White|2.0=Black or African American|3.0=American Indian or Alaska Native|4.0=Native|5.0=Hawaiian or Other Pacific Islander|6.0=Asian|7.0=Some other race|8.0=Multiracial|-99.0=Not reported|-98.0=Prefer not to answer,,-99.0|-98.0,,,,,,,,,,,,,,,,,,,,,,,,
,hispanic_ethnicity,,"Are you of Hispanic, Latino, or Spanish origin?",integer,,,1.0|0.0,,,,1.0=Yes|0.0=No|-99.0=Not reported|-98.0=Prefer not to answer,,-99.0|-98.0,,,,,,,,,,,,,,,,,,,,,,,,
,SU4,,Heroin Days Used in days,integer,,,,,,,-99.0=Not reported|-98.0=Prefer not to answer,,-99.0|-98.0,,,,,,,,,,,,,,,,,,,,,,,,
,age,,Age of participant in year,integer,,,,,,,-99.0=Not reported|-98.0=Prefer not to answer,,-99.0|-98.0,,,,,,,,,,,,,,,,,,,,,,,,
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"title": "Example Data Dictionary",
"description": "This is an example",
"description": "This is a proof of concept to demonstrate the healdata-utils functionality",
"title": "Healdata-utils Demonstration Data Dictionary",
"data_dictionary": [
{
"name": "id",
Expand All @@ -13,7 +13,7 @@
},
{
"name": "visit_dt",
"type": "datetime",
"type": "any",
"missingValues": [
"1582-10-13 23:58:21",
"1582-10-13 23:58:22"
Expand Down Expand Up @@ -89,8 +89,8 @@
},
"constraints": {
"enum": [
"0.0",
"1.0"
"1.0",
"0.0"
]
},
"missingValues": [
Expand Down
8 changes: 8 additions & 0 deletions data/valid/output/heal_dd_from_stata_dta_dataset1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
module,name,title,description,type,format,constraints.maxLength,constraints.enum,constraints.pattern,constraints.maximum,constraints.minimum,encodings,ordered,missingValues,trueValues,falseValues,repo_link,standardsMappings.type,standardsMappings.label,standardsMappings.url,standardsMappings.source,standardsMappings.id,relatedConcepts.type,relatedConcepts.label,relatedConcepts.url,relatedConcepts.source,relatedConcepts.id,univarStats.median,univarStats.mean,univarStats.std,univarStats.min,univarStats.max,univarStats.mode,univarStats.count,univarStats.twentyFifthPercentile,univarStats.seventyFifthPercentile,univarStats.categoricalMarginals.name,univarStats.categoricalMarginals.count
,id,,Unique identifier for participant,integer,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,visit_dt,,Date of the interview,any,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,sex_at_birth,,The self-reported sex of the participant/subject at birth,any,,,1|2|3|4,,,,1=Male|2=Female|3=Intersex|4=None of these describe me|a=Not reported|b=Prefer not to answer,,a|b,,,,,,,,,,,,,,,,,,,,,,,,
,race,,Self-reported race,any,,,1|2|3|4|5|6|7|8,,,,1=White|2=Black or African American|3=American Indian or Alaska Native|4=Native|5=Hawaiian or Other Pacific Islander|6=Asian|7=Some other race|8=Multiracial|a=Not reported|b=Prefer not to answer,,a|b,,,,,,,,,,,,,,,,,,,,,,,,
,hispanic_ethnicity,,"Are you of Hispanic, Latino, or Spanish origin?",boolean,,,0|1,,,,0=No|1=Yes|a=Not reported|b=Prefer not to answer,,a|b,,,,,,,,,,,,,,,,,,,,,,,,
,SU4,,Heroin Days Used in days,any,,,,,,,a=Not reported|b=Prefer not to answer,,a|b,,,,,,,,,,,,,,,,,,,,,,,,
,age,,Age of participant in year,integer,,,b|a,,,,a=Not reported|b=Prefer not to answer,,,,,,,,,,,,,,,,,,,,,,,,,,
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"title": "Example Data Dictionary",
"description": "This is an example",
"description": "This is a proof of concept to demonstrate the healdata-utils functionality",
"title": "Healdata-utils Demonstration Data Dictionary",
"data_dictionary": [
{
"name": "id",
Expand All @@ -9,12 +9,12 @@
},
{
"name": "visit_dt",
"type": "datetime",
"type": "any",
"description": "Date of the interview"
},
{
"name": "sex_at_birth",
"type": "integer",
"type": "any",
"encodings": {
"1": "Male",
"2": "Female",
Expand All @@ -39,7 +39,7 @@
},
{
"name": "race",
"type": "integer",
"type": "any",
"encodings": {
"1": "White",
"2": "Black or African American",
Expand Down Expand Up @@ -72,7 +72,7 @@
},
{
"name": "hispanic_ethnicity",
"type": "integer",
"type": "boolean",
"encodings": {
"0": "No",
"1": "Yes",
Expand All @@ -93,7 +93,7 @@
},
{
"name": "SU4",
"type": "integer",
"type": "any",
"encodings": {
"a": "Not reported",
"b": "Prefer not to answer"
Expand All @@ -116,8 +116,8 @@
},
"constraints": {
"enum": [
"b",
"a"
"a",
"b"
]
},
"description": "Age of participant in year"
Expand Down
4 changes: 4 additions & 0 deletions errors/heal-csv-errors.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"valid": true,
"errors": []
}
4 changes: 4 additions & 0 deletions errors/heal-json-errors.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"valid": true,
"errors": []
}
2 changes: 1 addition & 1 deletion src/healdata_utils/transforms/readstat/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def convert_readstat(file_path,
field['encodings'] = value_labels
#NOTE: enums are assumed if labels represent entire set of values
# this avoids value labels that are, for example, partials such as top/bottom encodings
enums = set(value_labels.keys()).difference(set(missing_values))
enums = [val for val in value_labels.keys() if not val in missing_values]
constraints_enums = {'constraints':{'enum':[str(v) for v in enums]}}
field.update(constraints_enums)

Expand Down
137 changes: 0 additions & 137 deletions tests/compile_tests/compile_test_assertions.py

This file was deleted.

62 changes: 62 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import pytest
from pathlib import Path
from healdata_utils.cli import choice_fxn
import json

@pytest.fixture(scope="module")
def fields_propname():
return "data_dictionary"

@pytest.fixture(scope="module")
def valid_input_params():
inputdir = Path("data/valid/input")
data_dictionary_props = {
"description": (
"This is a proof of concept to demonstrate"
" the healdata-utils functionality"
),
"title": "Healdata-utils Demonstration Data Dictionary",
}
input_params = {
"dta":{"filepath":inputdir.joinpath("stata_dta_dataset1.dta"),"data_dictionary_props":data_dictionary_props},
"sav":{"filepath":inputdir.joinpath("spss_sav_dataset1.sav"),"data_dictionary_props":data_dictionary_props},
"redcap.csv":{"filepath":inputdir.joinpath("redcap_dd_export.redcap.csv"),"data_dictionary_props":data_dictionary_props},
}
return input_params

@pytest.fixture(scope="module")
def valid_output_json():
path = Path("data/valid/output")
filenames = {
"dta":"heal_dd_from_stata_dta_dataset1.json",
"sav":"heal_dd_from_spss_sav_dataset1.json",
"redcap.csv":"heal_dd_from_redcap_dd_export.json",
}
jsons = {}
for inputtype,name in filenames.items():
if inputtype in choice_fxn:
jsons[inputtype] = json.loads(path.joinpath(name).read_text())
else:
raise Exception("Inputtype not in registered fxns")

return jsons

@pytest.fixture(scope="module")
def valid_output_csv():
path = Path("data/valid/output")
filenames = {
"dta":"heal_dd_from_stata_dta_dataset1.csv",
"sav":"heal_dd_from_spss_sav_dataset1.csv",
"redcap.csv":"heal_dd_from_redcap_dd_export.csv",
}
csvs = {}
for inputtype,name in filenames.items():
if inputtype in choice_fxn:
csvs[inputtype] = path.joinpath(name).read_text().split("\n")
else:
raise Exception("Inputtype not in registered fxns")

return csvs



Loading