From 88bad4bfc1f3b4e235d16a6f50376fc442782eb0 Mon Sep 17 00:00:00 2001
From: Ryan Hays <ryan.hays28@gmail.com>
Date: Fri, 19 Feb 2021 14:44:11 +0000
Subject: [PATCH] Cortex code deleted

---
 LAMP/analysis/__init__.py        |   4 -
 LAMP/analysis/participant_ext.py | 540 -------------------------------
 LAMP/analysis/study_ext.py       | 256 ---------------
 3 files changed, 800 deletions(-)
 delete mode 100644 LAMP/analysis/__init__.py
 delete mode 100644 LAMP/analysis/participant_ext.py
 delete mode 100644 LAMP/analysis/study_ext.py

diff --git a/LAMP/analysis/__init__.py b/LAMP/analysis/__init__.py
deleted file mode 100644
index 7847982..0000000
--- a/LAMP/analysis/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from __future__ import absolute_import
-
-from LAMP.analysis.participant_ext import ParticipantExt
-from LAMP.analysis.study_ext import StudyExt 
\ No newline at end of file
diff --git a/LAMP/analysis/participant_ext.py b/LAMP/analysis/participant_ext.py
deleted file mode 100644
index 708eaca..0000000
--- a/LAMP/analysis/participant_ext.py
+++ /dev/null
@@ -1,540 +0,0 @@
-import pandas as pd 
-import numpy as np
-import datetime
-import os
-import math
-import LAMP
-import itertools
-from functools import reduce
-
-
-class ParticipantExt():
-    """
-    Create participant dataframe
-    """
-    def __init__(self, 
-                 id, 
-                 domains=None, 
-                 age=None, 
-                 race=None, 
-                 sex=None, 
-                 df_props={}):
-
-        self.id = id
-        self.domains = domains
-        self.age = age
-        self.race = race
-        self.sex = sex
-
-        self.df = self.create_df(**df_props)
-
-        self.impute_status = False
-        self.bin_status = False
-        self.normalize_status = False	
-
-    @property
-    def id(self):
-        return self._id
-
-    @property
-    def age(self):
-        return self._age
-
-    @property
-    def race(self):
-        return self._race
-
-    @property
-    def sex(self):
-        return self._sex
-
-    @id.setter
-    def id(self, value):
-        self._id = value
-
-    @age.setter
-    def age(self, value):
-        self._age = value
-
-    @race.setter
-    def race(self, value):
-        self._race = value
-
-    @sex.setter
-    def sex(self, value):
-        self._sex = value
-
-    def reset(self):
-        """
-        Resets the participant's df to be the original version
-        """
-        self.df = self.create_df()
-        self.impute_status, self.bin_status, self.normalize_status = False, False, False
-
-    def domain_check(self, domains):
-        """
-        If domains is passed in, just return it.
-        Else, see if domains is set as object attribute
-        """
-        if domains is None:
-            if not hasattr(self, 'domains'):
-                raise AttributeError('Domains were not set for cohort and were not provided.')
-            domains = self.domains
-        return domains
-
-    def attachment_results(self):
-        ATTACHMENTS = {"beta_a", "beta_b", 
-                      "screen_features.screen_time", 'screen_features.sessions', 'screen_features.checks', 'screen_features.session_time'}
-        
-        attachment_dict = {}
-        for feature in ATTACHMENTS:
-            try:
-                attachment_events = LAMP.Type.get_attachment(type_id=self.id, attachment_key="lamp." + feature)["data"]
-            except Exception as e: #unable to find resource
-                continue
-            
-            if len(attachment_events) > 0:
-                attachment_dict[feature] = [(event['value'], event['timestamp']) for event in attachment_events]
-        
-        #Sort
-        for feature in attachment_dict:
-            attachment_dict[feature] = sorted(attachment_dict[feature], key=lambda x: x[1])
-        
-        return attachment_dict
-    
-    def passive_feature_results(self, resolution):
-        """
-        """
-        #Find beiwe id
-        RESOLUTION_KEY = {'day':'daily', 'week':'weekly', 'month':'monthly'}
-        PASSIVE_FEATURES = ['Hometime', 'DistTravelled', 'RoG', 'MaxDiam',
-                           'MaxHomeDist', 'SigLocsVisited', 'AvgFlightLen', 'StdFlightLen',
-                           'AvgFlightDur', 'StdFlightDur', 'ProbPause', 'SigLocEntropy',
-                           'MinsMissing', 'CircdnRtn', 'WkEndDayRtn', 'outgoing_texts',
-                           'outgoing_textlengths', 'text_outdegree', 'incoming_texts',
-                           'incoming_textlengths', 'text_indegree', 'text_reciprocity',
-                           'text_responsiveness', 'outgoing_calls', 'outgoing_calllengths',
-                           'call_outdegree', 'incoming_calls', 'incoming_calllengths',
-                           'call_indegree', 'call_reciprocity', 'call_responsiveness']
-
-        #Get all passive feature events
-        passive_feature_dict = {}
-        for feature in PASSIVE_FEATURES:            
-            feature_query, feature_query_2 = 'beiwe.' + feature + '.' + RESOLUTION_KEY[resolution], 'beiwe.passive_features.' + feature + '.' + RESOLUTION_KEY[resolution] 
-
-            feature_events, feature_events_2 = LAMP.SensorEvent.all_by_participant(participant_id=self.id, origin=feature_query), LAMP.SensorEvent.all_by_participant(participant_id=self.id, origin=feature_query_2)
-
-            if len(feature_events['data']) > 0:
-                passive_feature_dict[feature] = []
-                for event in feature_events['data']:
-                    passive_feature_dict[feature].append((event['data']['value'], event['timestamp']))
-                    
-            elif len(feature_events_2['data']) > 0:
-                passive_feature_dict[feature] = []
-                for event in feature_events_2['data']:
-                    passive_feature_dict[feature].append((event['data']['value'], event['timestamp']))
-
-        #Sort
-        for feature in passive_feature_dict:
-            passive_feature_dict[feature] = sorted(passive_feature_dict[feature], key=lambda x: x[1])
-        
-        return passive_feature_dict
-
-    def survey_results(self, participant=None, question_categories=None):
-        """
-        Get survey events for participant
-        :param participant (str): the LAMP ID for participant. If not provided, then take participant id
-        :param question_categories (bool): indicates whether to use custom question mappings as defined in params file
-        """
-
-        if participant is None:
-            participant = self.id
-
-        participant_activities = LAMP.Activity.all_by_participant(participant)['data']
-        participant_activities_surveys = [activity for activity in participant_activities if activity['spec'] == 'lamp.survey']
-        participant_activities_surveys_ids = [survey['id'] for survey in participant_activities_surveys]        
-
-        participant_results = [result for result in LAMP.ActivityEvent.all_by_participant(participant)['data'] if result['activity'] in participant_activities_surveys_ids and len(result['temporal_slices']) > 0]
-        participant_surveys = {} #maps survey_type to occurence of scores 
-        for result in participant_results:
-            #Check if it's a survey event
-            if result['activity'] not in participant_activities_surveys_ids or len(result['temporal_slices']) == 0: 
-                continue
-
-            activity = LAMP.Activity.view(result['activity'])['data'][0]
-            result_settings = activity['settings']
-
-            survey_time = result['timestamp']
-            survey_result = {} #maps question domains to scores
-            for event in result['temporal_slices']: #individual questions in a survey
-                question = event['item']
-                
-                for i in range(len(result_settings)) : #match question info to question
-                    if result_settings[i]['text'] == question: 
-                        current_question_info=result_settings[i]
-                        break
-
-                #score based on question type:
-                score=None
-                event_value=event.get('value') #safely get event['value'] to protect from missing keys
-                
-                if current_question_info['type'] =='likert' and event_value!=None :
-                    score = float(event_value)
-                        
-                elif current_question_info['type']=='boolean':
-                    if event_value == 'no': score = 0.0 #no is healthy in standard scoring
-                    elif event_value =='yes' : score = 3.0 # yes is healthy in reverse scoring
-
-                elif current_question_info['type'] == 'list' :
-                    for option_index in range(len(current_question_info['options'])) :
-                        if event_value == current_question_info['options'][option_index] :
-                            score = option_index * 3 / (len(current_question_info['options'])-1)
-
-                elif current_question_info['type'] == 'text':  #skip
-                    continue
-                
-                if score==None : continue
-                #add event to a category, either user-defined or default activity
-                if question_categories:
-                    #See if there is an extra space in the string
-                    if question not in question_categories:
-                        if question[:-1] in question_categories:
-                            question = question[:-1]
-                        else:
-                            continue
-
-                    event_category = question_categories[question]['category']
-                    #flip score if necessary
-                    if question_categories[question]['reverse_scoring']: 
-                        score = 3.0 - score
-
-                    if event_category in survey_result: survey_result[event_category].append(score) 
-                    else: survey_result[event_category] = [score]
-
-                else:
-                    if activity['name'] not in survey_result:
-                        survey_result[activity['name']] = []
-                    survey_result[activity['name']].append(score)
-                    
-
-            #add mean to each cat to master dictionary           
-            for category in survey_result: 
-                survey_result[category] = np.mean(survey_result[category])
-                if category not in participant_surveys: 
-                    participant_surveys[category] = [(survey_result[category], survey_time)]
-                else: 
-                    participant_surveys[category].append((survey_result[category], survey_time))
-
-        #Sort surveys by time
-        for activity_category in participant_surveys:
-            participant_surveys[activity_category] = sorted(participant_surveys[activity_category], key=lambda x: x[1])
-
-        return participant_surveys
-        
-
-    def create_df(self, days_cap=120, day_first=None, day_last=None, resolution='day', start_monday=False, start_morning=True, time_centered=False, question_categories=None):
-        """
-        Create participant dataframe
-        :param day_first (datetime.Date)
-        """
-
-        FIFTEEN_MIN_PER_UNIT = {'15 min': 1, 'day': 4*24, 'week': 4*24*7, 'month': 4*24*30}
-        UNITS_PER_DAY = {'15 min': 4*24, 'day': 1, 'week': 1/7, 'month': 1/30}
-
-  
-        assert resolution in ['15 min', 'day', 'week', 'month']
-
-        surveys = self.survey_results(question_categories=question_categories) #survey ActivityEvents
-        passive_features = self.passive_feature_results(resolution=resolution) #beiewe.passive_features
-        attachment_features = self.attachment_results() #static attachment features
-        
-        surveys = {**surveys, **passive_features, **attachment_features}
-        #surveys.update(passive_features).update(attachment_features)
-
-        if len(surveys) == 0:
-            return None
-
-        #Find the first, last date
-        if day_first is None: day_first = datetime.datetime.utcfromtimestamp(sorted([surveys[dom][0][1]/1000 for dom in surveys])[0])
-        else: day_first = datetime.datetime.combine(day_first, datetime.time.min) #convert to datetime
-
-        if day_last is None: day_last = datetime.datetime.utcfromtimestamp(sorted([surveys[dom][-1][1]/1000 for dom in surveys])[-1])
-        else: day_last = datetime.datetime.combine(day_last, datetime.time.min) #convert to datetime
-
-        #Clip days based on morning and weekday parameters
-        if start_monday:
-            if day_first.weekday() > 0: 
-                day_first += datetime.timedelta(days = - day_first.weekday())
-
-        if start_morning: 
-            day_first, day_last = day_first.replace(hour=9, minute=0, second=0), day_last.replace(hour=9, minute=0, second=0)
-        days_elapsed = (day_last - day_first).days 
-        date_list = [day_first + datetime.timedelta(minutes=15*FIFTEEN_MIN_PER_UNIT[resolution]*x) for x in range(0, math.ceil(min(days_elapsed, days_cap) * UNITS_PER_DAY[resolution]))]
-
-        #Create dateframe for the number of time units that have data; limited by days; cap at 'days_cap' if this number is large
-        df = pd.DataFrame({'Date': date_list, 'id':self.id})
-
-        domains = [dom for dom in surveys]
-        for dom in domains: 
-            df[dom] = np.nan
-
-        #Parse surveys
-        for dom in surveys:
-            if dom not in domains and domains is not None:
-                continue
-
-            #Based on resolution, match each survey event to its closest date
-            dates = [datetime.datetime.utcfromtimestamp(event_time/1000) for _, event_time in surveys[dom] if day_first <= datetime.datetime.utcfromtimestamp(event_time/1000) <= day_last] 
-
-            #Choose closest date if "time centered"; else, choose preceding date
-            if time_centered: rounded_dates = [df.loc[df.index[(date - df['Date']).abs().sort_values().index[0]], 'Date'] for date in dates]
-            else: rounded_dates = [df.loc[df.index[(date - df['Date'])[(date - df['Date']) >= datetime.timedelta(0)].sort_values().index[0]], 'Date'] for date in dates]
-
-            results = [event_val for event_val, event_time in surveys[dom] if day_first <= datetime.datetime.utcfromtimestamp(event_time/1000) <= day_last]
-            dom_results = pd.DataFrame({'Date':dates, 'Rounded Date':rounded_dates, 'Result':results})
-            for date, date_df in dom_results.groupby('Rounded Date'):                    
-                df.loc[df['Date'] == date.to_pydatetime(), dom] = np.mean(date_df['Result']) 
-
-        #Convert to date to actual date objects if resolution is day or greater
-        if resolution != '15 min':
-            df['Date'] = df['Date'].apply(lambda d: d.date())
-            
-        #Trim columns if there are predetermined domains
-        if self.domains is not None: 
-            df = df.loc[:, ['id', 'Date'] + [d for d in self.domains if d in df.columns.values]]
-
-        return df
-
- 
-    def impute(self, domains):
-        """
-        Get value for each column for each window
-        """
-        if self.impute_status:
-            print('Dataframe already imputed.')
-            return
-
-        weighted_dict = [0.05, 0.20, 0.40, 1.5, 0.4, 0.20, 0.05]
-
-        #Get indices of all middle bin values; add them to new df
-        for dom in domains:
-            if dom not in self.df:
-                continue
-
-            dom_values = []
-
-            for ind in range(len(self.df.index)):
-
-                #Get indices
-                middle_weight_index = 3
-                starting_index = max(ind -3, 0)
-                ending_index = min(ind + 4, 90)
-
-                #Get slice values
-                subj_slice = self.df.iloc[starting_index:ending_index]
-
-                #Remove na
-                subj_slice_no_nan = subj_slice[dom].dropna()
-                slice_indices = subj_slice_no_nan.index
-
-                if len(slice_indices) == 0:
-                    dom_values.append(np.nan)
-                    continue
-
-                #Match slice index with weight index
-                weighted_dict_vals = [weighted_dict[middle_weight_index - (ind - slice_i)] for slice_i in slice_indices]
-
-                #Find total in bin
-                slice_val = sum(subj_slice_no_nan * [val / sum(weighted_dict_vals) for val in weighted_dict_vals])
-                dom_values.append(slice_val)
-
-            self.df[dom] = dom_values
-
-        self.impute_status = True
-
-
-    def bin(self, domains, window_size=3, shift=0):
-        """
-        Bin dataframe
-        :param domains (list): the domains to bin 
-        :window_size (int): the size of the bins (in days)
-        :shift (int): the day of the week to start the binning on (Monday == 0)
-        """
-
-        #domains = self.domain_check(domains)
-        domains = self.df.columns.drop(['Date', 'id'])
-        
-        #Shift until Monday
-        df_copy = self.df.copy()
-        if shift is not None:
-            try:
-                dow = df_copy.iloc[0]['Date'].weekday()
-                if dow > 0 and len(df_copy) > dow:
-                    df_copy = df_copy.shift(shift - dow)
-            except:
-                print(self.id, df_copy)
-        df_copy['bin'] = np.floor(df_copy.index / window_size )
-        bins = df_copy.groupby('bin')
-        subj_bin_df = pd.DataFrame(columns=['Bin Start Date', 'Bin End Date'] + domains.values.tolist())
-        for b in bins:
-            bin_values = []
-            #Add bin start/end dates
-            
-            start_date, end_date = b[1].iloc[0]['Date'], b[1].iloc[-1]['Date']
-            bin_values.extend((start_date, end_date))
-            for dom in domains:
-                if dom in b[1].columns:
-                    bin_dom_value = b[1][dom].mean()
-                    bin_values.append(bin_dom_value)
-                else:
-                    bin_values.append(np.nan)
-
-            #Add date range
-            subj_bin_df.loc[b[0]] = bin_values    
-
-        subj_bin_df['id'] = self.id
-        
-        self.bins = subj_bin_df
-        
-    def impute_bins(self, domains):
-        """
-        Try to impute bin objects
-        """
-        assert self.bins is not None
-        
-        for d in domains:
-            for index, row in self.bins.iterrows():
-                if 0 < index < len(self.bins.index) - 1:
-                    index = int(index)
-                    if pd.isnull(self.bins.iloc[index][d]) and not pd.isnull(self.bins.iloc[index-1][d]) and not pd.isnull(self.bins.iloc[index+1][d]):                        
-                        self.bins.at[index,d] = np.mean([self.bins.iloc[index-1][d], self.bins.iloc[index+1][d]])
-
-
-    def normalize(self, domains, domain_means={}, domain_vars={}):
-        """
-        Normalize columns values to 0 mean/ unit variance
-        :param domain_means (dict): the mean for each column value
-        :param domain_vars (dict): the variance for each column value
-        If mean/var not provided, resort to in-sample normalization
-        """
-        if self.normalize_status: return
- 
-        domains = self.domain_check(domains)
-        if domain_means == {} and domain_vars == {}:
-            for dom in domains:
-                if dom in self.df.columns:
-                    domain_means[dom] = self.df[dom].mean()
-                    domain_vars[dom] = self.df[dom].std()
-
-        for dom in domains:
-            if dom in self.df.columns and dom in domain_means and dom in domain_vars:
-                self.df[dom] = (self.df[dom] - domain_means[dom]) / domain_vars[dom]
-
-            self.normalize_status = True
-
-    def create_transition_dict(self, level):
-        """
-        Create nested dictionary structure 
-        :param level (int): the level dictionary structure. Must be >= 0
-        """
-        trans_dict = {}
-        for comb in itertools.product(('out', 'in'), repeat=level):
-            trans_dict[comb] = {comb2:0 for comb2 in itertools.product(('out', 'in'), repeat=level)}
-        return trans_dict
-
-
-    def assign_transition_dict(self, trans_dict, row, row2):
-        """
-        Increment transition dict
-        """
-        label1 = tuple(['in' if col < 1.0 else 'out' for col in row])
-        label2 = tuple(['in' if col < 1.0 else 'out' for col in row2])
-        trans_dict[label1][label2] += 1
-
-    def get_transitions(self, domains=None, joint_size=1):
-        """
-        Count transition events for each col in subj_df
-        """
-        domains = self.domain_check(domains)
-        all_trans_dict = {}
-        for dom_group in itertools.combinations(domains, r=joint_size):
-
-            #Create trans dictionary
-            group_dict = self.create_transition_dict(level=joint_size)
-
-            #Find bins with values for each group
-            good_bins = self.bins[list(dom_group)].dropna()
-
-            #Assign
-            row_iterator = good_bins.iterrows()
-            try:
-                last_i, last = next(row_iterator)
-            except StopIteration:
-                continue
-            for index, row in row_iterator:
-                if int(index) - int(last_i) <= 3:
-                    self.assign_transition_dict(group_dict, last, row)
-                last_i, last = index, row
-
-            all_trans_dict[dom_group] = group_dict
-
-        return all_trans_dict
-
-    def domain_bouts(self, domains=None):
-        """
-        """
-        def parse_bout_list(bout_list, state, low_bouts, high_bouts):
-            """
-            Helper function to parse bout list at end of bout
-            """
-            if len(bout_list) == 1: bout_list.append(bout_list[-1] + 3) #edge case where last domain event is only one in its bout
-
-            if state: low_bouts.append(float(bout_list[-1]) - float(bout_list[0]))
-            else: high_bouts.append(float(bout_list[-1]) - float(bout_list[0]))
-            return low_bouts, high_bouts
-
-        domains = self.domain_check(domains)
-        bout_dict = {}
-        for dom in domains:
-            if dom not in self.df:
-                continue
-
-            bout_list = [] #temporary list that contains times of current bout
-            subj_dom = self.df.loc[self.df[dom].notnull(), dom]
-            row_iterator = subj_dom.iteritems()
-            try:
-                last_day, last_val = next(row_iterator)
-                bout_list.append(last_day)
-                if last_val < 1.0: last_state = True #set this back on first val
-                else: last_state = False
-            except StopIteration:
-                continue
-
-            bout_dict[dom] = {}
-            low_bouts, high_bouts = [], [] #duration of all in-range bouts
-            low_bouts_end, high_bouts_end = 0, 0 #counter the keep track of # of ended bout things
-            for day, val in row_iterator:
-                if val < 1.0: state = True
-                else: state = False
-
-                if last_state == state and day - last_day <= 6: #continue bout
-                    bout_list.append(day)
-
-                else: #discontinue bout
-                    if day - last_day > 8: 
-                        bout_list.append(last_day + 3) #If adjacent rows are day outside threshold, discontinue bout;cap last bout at 3 days past last activity	
-                        if last_state: low_bouts_end += 1
-                        else: high_bouts_end += 1
-                    else: bout_list.append(day) #then normal switch
-
-                    low_bouts, high_bouts = parse_bout_list(bout_list, last_state, low_bouts, high_bouts)
-                    bout_list = [day]
-
-                last_day, last_val = day, val
-                last_state = state
-
-            low_bouts, high_bouts = parse_bout_list(bout_list, last_state, low_bouts, high_bouts) #parse last bout
-            bout_dict[dom]['low'], bout_dict[dom]['high'] = [float(b) for b in low_bouts], [float(b) for b in high_bouts]
-            bout_dict[dom]['low ends'], bout_dict[dom]['high ends'] = low_bouts_end, high_bouts_end
-
-        return bout_dict
\ No newline at end of file
diff --git a/LAMP/analysis/study_ext.py b/LAMP/analysis/study_ext.py
deleted file mode 100644
index 126f7a2..0000000
--- a/LAMP/analysis/study_ext.py
+++ /dev/null
@@ -1,256 +0,0 @@
-import LAMP
-import numpy as np
-from functools import reduce
-
-class StudyExt():
-    """
-    """
-    def __init__(self, 
-                 participants, 
-                 domains=None, 
-                 df_props={}):
-
-        self.domains = domains
-        self.df_props = df_props
-        self.init_participants(participants)
-
-    def __iter__(self):
-        for participant in self.participants:
-            yield participant
-
-    def __len__(self):
-        return len(self.participants)
-
-    def __getitem__(self, key):
-        return self.participants[key]
-
-    @property
-    def participants(self):
-        return self._participants
-
-    @property
-    def domains(self):
-        return self._domains
-
-    @participants.setter
-    def participants(self, value):
-        self._participants = value
-
-    @domains.setter
-    def domains(self, value):
-        self._domains = value
-
-    def domain_check(self, domains):
-        """
-        If domains is passed in, just return it.
-
-        Else, see if domains is set as object attribute
-        """
-        if domains is None:
-            if not hasattr(self, 'domains'):
-                raise AttributeError('Domains were not set for cohort and were not provided.')
-            domains = self.domains
-        return domains
-
-    def init_participants(self, participants):
-        """
-        Initialize participants in cohorts. Can take either participant objects or participant ids
-        """
-        self.participants = []
-        for participant in participants:
-
-            if isinstance(participant, LAMP.analysis.ParticipantExt):
-                #NEED TO CHECK participant MATCHES PROPS
-                self.participants.append(participant)
-
-            else:
-                self.add_participant(participant)
-
-    def get_participant(self, participant_id):
-        """
-        Get participant object by id.
-
-        If it doesn't exist, will return None
-        """
-        for participant in self.participants:
-            if participant.id == participant_id:
-                return participant
-
-        print('participant not found!')
-        return None
-
-    def add_participant(self, participant):
-        self.participants.append(LAMP.analysis.ParticipantExt(id = participant, 
-                                          domains=self.domains, 
-                                          df_props=self.df_props))
-
-
-    def mean_age(self):
-        """
-        Return mean, std age of participants in cohort
-        """
-        participant_ages = [participant.age for participant in self.participants if participant.age is not None]
-        if len(participant_ages) == 0:
-            return None
-
-        return np.mean(participant_ages)
-
-
-    def std_age(self):
-        """
-        Return std age of participants in cohort
-        """
-        participant_ages = [participant.age for participant in self.participants if participant.age is not None]
-        if len(participant_ages) == 0:
-            return None
-
-        return np.std(participant_ages)
-
-    def domain_mean(self, domain):
-        """
-        Find the mean value for particular domain in cohort
-        
-        :param domain (str): the specified domain
-        """
-        dom_values = [participant.df[domain].values for participant in self.participants if domain in participant.df.columns]
-        if len(dom_values) == 0: return None
-        return np.nanmean(np.concatenate(dom_values))
-
-    def domain_stdev(self, domain):
-        """
-        Find the std value for particular domain in cohort
-        
-        :param domain (str): the specified domain
-        """
-        dom_values = [participant.df[domain].values for participant in self.participants if domain in participant.df.columns]
-        if len(dom_values) == 0: return None
-        return np.nanstd(np.concatenate(dom_values))
-
-    def normalize(self, domains=None, dom_means={}, dom_vars={}, in_sample=False):
-        """
-        Normalize each domain in cohort so that values have 0 mean/unit variance
-
-        If in_sample is true, then performs within-sample normalization
-        
-        :param domains (list): domains to use. Default None, in which all availble domains are used
-        :param dom_means (dict): the predetermined means of specified domains
-        :param dom_vars (dict): the predetermined standard deviations of specified domains
-        :param in_sample (bool): Whether to perform within-sample normalization. Default False
-        """
-        if domains is None and self.domains is None: #Get all features from all particpants
-            domains = set(np.concatenate([participant.df.columns.drop(['Date', 'id']) for participant in participants]))
-
-        elif domains is None:
-            domains = self.domains
-            
-        #
-        if not in_sample:
-            dom_means = {dom: self.domain_mean(dom) for dom in domains if self.domain_mean(dom) is not None and dom not in dom_means}
-            dom_vars = {dom: self.domain_stdev(dom) for dom in domains if self.domain_stdev(dom) is not None and dom not in dom_vars}
-
-        for participant in self.participants: participant.normalize(domains=domains, domain_means=dom_means, domain_vars=dom_vars)
-
-    def impute(self, domains=None):
-        """
-        Impute every participant in cohort.
-        
-        :param domains (list): domains to use. Default None, in which all availble domains are used
-        """
-        if domains is None and self.domains is None: #Get all features from all particpants
-            domains = set(np.concatenate([participant.df.columns.drop(['Date', 'id']) for participant in participants]))
-        elif domains is None:
-            domains = self.domains
-
-        for participant in self: participant.impute(domains=domains)
-
-    def bin(self, domains=None, window_size=3):
-        """
-        Bins all participants in cohort.
-        
-        :param domains (list): domains to use. Default None, in which all availble domains are used
-        :param window_size (int): the number of days to use per bin
-        """
-        if domains is None and self.domains is None: #Get all features from all particpants
-            domains = set(np.concatenate([participant.df.columns.drop(['Date', 'id']) for participant in participants]))
-        elif domains is None:
-            domains = self.domains
-
-        for participant in self: participant.bin(domains=domains, window_size=window_size)
-            
-    def impute_bins(self, domains=None):
-        """
-        Impute bins
-        
-        :param domains (list): domains to use. Default None, in which all availble domains are used
-        """
-        if domains is None and self.domains is None: #Get all features from all particpants
-            domains = set(np.concatenate([participant.df.columns.drop(['Date', 'id']) for participant in participants]))
-        elif domains is None:
-            domains = self.domains
-            
-        for participant in self: participant.impute_bins(domains=domains)
-        
-
-    def transition_probabilities(self, domains=None, joint_size=1):
-        """
-        Get cohort_wide transistion probabilities.
-
-        :param domains (list): domains to use. Default None, in which all availble domains are used
-        :param joint_size (int): the number of variables used when calculating the joint probabilities for transistion event. Defaults to 1. 
-        """
-        if domains is None and self.domains is None: #Get all features from all particpants
-            domains = set(np.concatenate([participant.df.columns.drop(['Date', 'id']) for participant in participants]))
-        elif domains is None:
-            domains = self.domains
-
-        samples_tp = [pro.get_transitions(domains = domains, joint_size = joint_size) for pro in self]
-
-        master_dict = {}
-
-        for sample in samples_tp:
-            for cat in sample:
-                if cat not in master_dict:
-                    master_dict[cat] = {state: sample[cat][state] for state in sample[cat]}
-                else: #merge
-                    for state in sample[cat]:
-                        master_dict[cat][state] = {state2: master_dict[cat][state][state2] + sample[cat][state][state2] for state2 in 
-                                                                                                                        master_dict[cat][state]}
-        #Convert to probabilities
-        trans_dict = {}
-        for cat in master_dict:
-            trans_dict[cat] = {}
-            for state in master_dict[cat]:
-                trans_dict[cat][state] = {}
-                for state2 in master_dict[cat][state]:
-                    if sum(master_dict[cat][state].values()) == 0:
-                        trans_dict[cat][state][state2] = None
-                    else:
-                        trans_dict[cat][state][state2] = float(master_dict[cat][state][state2]) / float(sum(master_dict[cat][state].values()))
-        return trans_dict, master_dict
-
-    def domain_bouts(self, domains=None):
-        """
-        Get elevated/sedated domain bouts in each domain
-        
-        :param domains (list): domains to use. Default None, in which all availble domains are used
-        """
-        if domains is None and self.domains is None: #Get all features from all particpants
-            domains = set(np.concatenate([participant.df.columns.drop(['Date', 'id']) for participant in participants]))
-        elif domains is None:
-            domains = self.domains
-            
-        bout_dict = {}
-        for participant in self: 
-            participant_bout_dict = participant.domain_bouts(domains=domains)
-            for dom in participant_bout_dict:
-                if dom not in bout_dict: bout_dict[dom] = participant_bout_dict[dom]
-                else: 
-                    bout_dict[dom]['low'] += participant_bout_dict[dom]['low']
-                    bout_dict[dom]['high'] += participant_bout_dict[dom]['high']
-                    bout_dict[dom]['low ends'] += participant_bout_dict[dom]['low ends']
-                    bout_dict[dom]['high ends'] += participant_bout_dict[dom]['high ends']
-
-        return bout_dict
-
-
-