-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e923d62
commit 7cbeafc
Showing
1 changed file
with
161 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
# Import libraries | ||
from __future__ import print_function | ||
|
||
import pandas as pd | ||
import psycopg2 | ||
import getpass | ||
import argparse | ||
|
||
from collections import OrderedDict | ||
|
||
# define the queries used to get neuromuscular blocks | ||
queries = {"eicu": """ | ||
set search_path to public,eicu_crd; | ||
with has_vent as | ||
( | ||
select | ||
distinct p.patientunitstayid | ||
FROM vent_unpivot_rc p | ||
-- only include settings before 24 hours | ||
WHERE p.chartoffset <= 24*60 | ||
AND p.peakpressure IS NOT NULL | ||
) | ||
, pt_in_hosp as | ||
( | ||
select pt.hospitalid | ||
, sum(neuroblock_day1) as num_neurblock | ||
, sum(case when v.patientunitstayid is not null then 1 else 0 end) as num_vent | ||
, sum(case when v.patientunitstayid is not null and neuroblock_day1 = 1 then 1 else 0 end) as num_vent_and_nb | ||
, count(pt.patientunitstayid) as num_pat_total | ||
from patient pt | ||
inner join mp_neuroblock nb | ||
on pt.patientunitstayid = nb.patientunitstayid | ||
left join has_vent v | ||
on pt.patientunitstayid = v.patientunitstayid | ||
group by pt.hospitalid | ||
) | ||
, hosp_in_cohort as | ||
( | ||
select distinct hospitalid | ||
from mp_cohort | ||
where exclusion_no_peak_pressure = 0 | ||
) | ||
-- define what hospitals we keep: | ||
-- 1) at least 10 patients admitted | ||
-- 2) have good vent data (defined by hosp_in_cohort) | ||
-- 3) at least 3% of patients are on neuromuscular blockade | ||
, hosp_keep as | ||
( | ||
select | ||
h.hospitalid | ||
, ROUND(100.0*num_neurblock::NUMERIC/num_pat_total,2) as percent_nb | ||
from pt_in_hosp h | ||
LEFT JOIN hosp_in_cohort co | ||
ON h.hospitalid = co.hospitalid | ||
WHERE num_pat_total >= 10 | ||
AND co.hospitalid is not null | ||
AND ROUND(100.0*num_neurblock::NUMERIC/num_pat_total,2) >= 3 | ||
) | ||
select | ||
d.patientunitstayid | ||
, d.hospitalid | ||
, d.neuroblock_day1 | ||
, d.neuroblock_day2 | ||
FROM mp_data d | ||
INNER JOIN hosp_keep h | ||
ON d.hospitalid = h.hospitalid | ||
WHERE patientunitstayid in | ||
( | ||
select patientunitstayid | ||
from mp_cohort | ||
where GREATEST(exclusion_non_adult, exclusion_secondary_hospital_stay, exclusion_secondary_icu_stay, exclusion_by_apache, exclusion_no_rc_data, exclusion_trach, exclusion_not_vent_48hr, exclusion_no_peak_pressure) = 0 | ||
) | ||
ORDER BY patientunitstayid | ||
""", | ||
'mimic': """ | ||
set search_path to public,mimiciii; | ||
with vw1 as | ||
( | ||
select co.icustay_id | ||
, max(CASE WHEN coalesce(nb1.icustay_id, nb2.icustay_id) IS NOT NULL THEN 1 ELSE 0 END) as neuroblock_day1 | ||
from mpwr_cohort co | ||
LEFT JOIN neuroblock_dose nb1 | ||
ON nb1.icustay_id = co.icustay_id | ||
and nb1.starttime > co.starttime_first_vent - interval '1' hour | ||
and nb1.starttime <= co.starttime_first_vent + interval '24' hour | ||
LEFT JOIN neuroblock_dose nb2 | ||
ON nb2.icustay_id = co.icustay_id | ||
and nb2.starttime < co.starttime_first_vent - interval '1' hour | ||
and nb2.endtime >= co.starttime_first_vent - interval '1' hour | ||
GROUP BY co.icustay_id | ||
) | ||
-- day 2 | ||
, vw2 as | ||
( | ||
select co.icustay_id | ||
, max(CASE WHEN coalesce(nb1.icustay_id, nb2.icustay_id) IS NOT NULL THEN 1 ELSE 0 END) as neuroblock_day2 | ||
from mpwr_cohort co | ||
LEFT JOIN neuroblock_dose nb1 | ||
ON nb1.icustay_id = co.icustay_id | ||
and nb1.starttime > co.starttime_first_vent + interval '24' hour | ||
and nb1.starttime <= co.starttime_first_vent + interval '48' hour | ||
LEFT JOIN neuroblock_dose nb2 | ||
ON nb2.icustay_id = co.icustay_id | ||
and nb2.starttime < co.starttime_first_vent + interval '24' hour | ||
and nb2.endtime >= co.starttime_first_vent + interval '24' hour | ||
GROUP BY co.icustay_id | ||
) | ||
-- data has exclusions applied | ||
select | ||
d.icustay_id | ||
, neuroblock_day1 | ||
, neuroblock_day2 | ||
from mpwr_data d | ||
LEFT JOIN vw1 | ||
on d.icustay_id = vw1.icustay_id | ||
LEFT JOIN vw2 | ||
on d.icustay_id = vw2.icustay_id | ||
""" | ||
} | ||
|
||
parser = argparse.ArgumentParser(description='Add year to a dataset') | ||
parser.add_argument('data', default='mimic', | ||
help='filename of data that needs to have year') | ||
|
||
args = parser.parse_args() | ||
|
||
if args.data in ('mimic', 'data_mimic', 'data_mimic.csv'): | ||
join_id = 'icustay_id' | ||
dbname = 'mimic' | ||
fn = '../data/pin_mimic.csv' | ||
fn_out = '../data/mimic_neuroblock.csv' | ||
query = queries['mimic'] | ||
|
||
elif args.data in ('eicu', 'pin_eicu', 'pin_eicu.csv', 'data_eicu', 'data_eicu.csv'): | ||
join_id = 'patientunitstayid' | ||
dbname = 'eicu' | ||
fn = '../data/pin_eicu.csv' | ||
fn_out = '../data/eicu_neuroblock.csv' | ||
query = queries['eicu'] | ||
|
||
# Connect to local postgres version of mimic | ||
sqluser = getpass.getuser() | ||
con = psycopg2.connect(dbname=dbname, user=sqluser) | ||
print('Connected to postgres {}.{}.{}!'.format(int(con.server_version/10000), | ||
int((con.server_version - int(con.server_version/10000)*10000)/100), | ||
int(con.server_version - int(con.server_version/100)*100))) | ||
|
||
df = pd.read_csv(fn, header=0) | ||
nb = pd.read_sql_query(query, con) | ||
|
||
print('{} patients in original cohort.'.format(df.shape[0])) | ||
|
||
df = df.merge(nb, how='inner', on=join_id) | ||
for c in df.columns: | ||
if c[-3:] == '_nb': | ||
df.drop(c, axis=1, inplace=True) | ||
|
||
print('{} patients kept after exclusions.'.format(df.shape[0])) | ||
print('{} patients have neuroblock on day1.'.format(df['neuroblock_day1'].sum())) | ||
print('{} patients have neuroblock on day2.'.format(df['neuroblock_day2'].sum())) | ||
df.to_csv(fn_out, index=False) |