-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathUnlabeled_Data_Creating.py
148 lines (124 loc) · 5.16 KB
/
Unlabeled_Data_Creating.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import itertools
import os
import pandas as pd
wrk_path = r"/Users/calvin/Documents/MastersProject"
os.chdir(wrk_path)
'''
Creating Component 1 - DataFrame (Phospholipids)
1. HSPC
2. POPC
3. DOPC
'''
component_1_df = pd.DataFrame(columns=['component_1'])
list_component_lipids = ['hspc', 'popc', 'dopc']
component_1_df['component_1'] = list_component_lipids
'''
Creating Component 2 DataFrame (Drugs/Vitamins)
The list of components are:
Vitamin E
Vitamin D
Cholesterol
This list is to be expanded at a later date. Furthermore, there is discussion around attempting to encapsulate two of these components at a time
'''
component_2_df = pd.DataFrame(columns=['component_2'])
# list_component_drug = ['vite','vitd','chol', 'vita','vitb12']
list_component_drug = ['vite', 'vitd', 'chol']
component_2_df['component_2'] = list_component_drug
'''
Creating Component 3 DataFrame (Stealth Polymer)
The list of components are:
PEG - 2000
This list may be expanded at a later date
'''
component_3_df = pd.DataFrame(columns=['component_3'])
list_component_polymer = ['peg2000']
component_3_df['component_3'] = list_component_polymer
'''
Creating Vol by Vol Percent For Component 1 DataFrame
In general, this percent of component 1 must always be equal to or over 50% of the formulation. However, this is not a scientific rule.
'''
vol_vol_pcnt_1_df = pd.DataFrame(columns=['vol_vol_pcnt_1'])
vol_vol_pcnt = [90, 80, 70, 60, 50, 45, 40, 35, 30, 25, 20, 15, 10, 5]
vol_vol_pcnt_1_df['vol_vol_pcnt_1'] = [i for i in vol_vol_pcnt if i >= 50]
'''
Creating Vol by Vol Percent For Component 2 DataFrame
This will have to be below or equal to 50%
'''
vol_vol_pcnt_2_df = pd.DataFrame(columns=['vol_vol_pcnt_2'])
vol_vol_pcnt_2_df['vol_vol_pcnt_2'] = [i for i in vol_vol_pcnt if i <= 50]
'''
Creating Vol by Vol Percent For Component 3 DataFrame
This will have to be below or equal to 5%
'''
vol_vol_pcnt_3_df = pd.DataFrame(columns=['vol_vol_pcnt_3'])
vol_vol_pcnt_3_df['vol_vol_pcnt_3'] = [i for i in vol_vol_pcnt if i <= 5]
'''
Creating the Concentration DataFrames for each component
The chosen concentrations (mM) are:
75
50
30
25
20
10
5
'''
conc = [75, 50, 30, 25, 20, 10, 5]
conc_1_df = pd.DataFrame(columns=['conc_1'])
conc_1_df['conc_1'] = conc
conc_2_df = pd.DataFrame(columns=['conc_2'])
conc_2_df['conc_2'] = conc
conc_3_df = pd.DataFrame(columns=['conc_3'])
conc_3_df['conc_3'] = conc
'''
Creating the dispense speed dataframe
While the speed can be anywhere between 1-400ul/s, only 3 speeds shall be chosen to reduce the number of experiments required
'''
# speed = [400,300,200,120,80,30]
speed = [400, 120, 30]
speed_disp_df = pd.DataFrame(columns=['speed_disp'])
speed_disp_df['speed_disp'] = speed
'''
Creating the Formulation Combination DataFrame
'''
formulation_combination_df = pd.DataFrame(list(itertools.product(component_1_df.component_1,
component_2_df.component_2,
component_3_df.component_3,
vol_vol_pcnt_1_df.vol_vol_pcnt_1,
vol_vol_pcnt_2_df.vol_vol_pcnt_2,
vol_vol_pcnt_3_df.vol_vol_pcnt_3,
conc_1_df.conc_1,
conc_2_df.conc_2,
conc_3_df.conc_3,
speed_disp_df.speed_disp
)),
columns=['component_1',
'component_2',
'component_3',
'vol_vol_pcnt_1',
'vol_vol_pcnt_2',
'vol_vol_pcnt_3',
'conc_1',
'conc_2',
'conc_3',
'speed_disp'])
'''
Total vol by vol percent column
The three vol by vol percent columns are to be summed. Then totals that are higher or lower than 100 must be removed (as
it is a percent and you cannot go over 100, nor should the percent be lower than 100)
'''
col_list = list(formulation_combination_df)
rmve = ('component_1',
'component_2',
'component_3',
'conc_1',
'conc_2',
'conc_3',
'speed_disp')
for i in range(len(rmve)):
col_list.remove(rmve[i])
formulation_combination_df['total_pcnt'] = formulation_combination_df[col_list].sum(axis=1)
formulation_combination_df = formulation_combination_df.loc[formulation_combination_df['total_pcnt'] == 100].reset_index(drop=True)
print (formulation_combination_df.info())
print("DONE")
formulation_combination_df.to_csv('full_unlabeled_combination_experiment.csv', index = False)