-
Notifications
You must be signed in to change notification settings - Fork 0
/
Mastersizer_Plotter.py
322 lines (212 loc) · 9.88 KB
/
Mastersizer_Plotter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
# -*- coding: utf-8 -*-
"""
Code to fit a multimodal lognormal size distribution
Created: 21/02/24
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import statistics as stats
from scipy.stats import norm
def mode_check(mode_sizes, mode_means, mode_stds):
'''Checks that the modes of a distribution match'''
if len(mode_sizes) < 1:
raise ValueError("No mode sizes given")
elif len(mode_sizes) != len(mode_means) or len(mode_sizes) != len(mode_stds):
raise ValueError("Mode parameter lengths differ")
return
def log_trap_int(x_array, y_array, x1=0, x2=float('inf')):
""" Performs a numerical integration in logspace using the trapezium rule bewteen the limits x1 and x2 """
x_array = np.log10(x_array) # Normalising in logspace
if x1 == 0:
x1 = min(x_array)
if x2 == float('inf'):
x2 = max(x_array)
def linear_interp(x, x1, x2, y1, y2):
""" Performas a linear interpolation """
y = y1 + (x - x1) * (y2 - y1) / (x2 - x1)
return y
total_integral = 0
for i in range(1, len(x_array)):
x = x_array[i]
if x > x1 and x < x2:
total_integral += (x - x_array[i - 1]) * \
(y_array[i] + y_array[i - 1]) / 2
if x_array[i - 1] > x: # correction at beginning of trapezium
y_x1 = linear_interp(
x_array[i - 1], x1, x, y_array[i - 1], y_array[i])
total_integral -= (x1 - x_array[i - 1]) * \
(y_x1 + y_array[i - 1]) / 2
return total_integral
def get_den(den_option, num_den, sa_den, vol_den):
'''Uses the den_option to return the required density distribution and performs a normalisation'''
Total = np.sum(num_den)
if den_option == 'N':
den = num_den
elif den_option == 'SA':
den = sa_den * Total / np.sum(sa_den)
elif den_option == 'V':
den = vol_den * Total / np.sum(vol_den)
else:
raise ValueError(f'den_option should be number (N), surface area (SA), or volume (V), rather than: {den_option}')
return den
def get_label(den_option):
'''returns the correct y_label for the correcsponding den_option'''
if den_option == 'N':
y_label = 'Number Density / %'
elif den_option == 'SA':
y_label = 'Surface Area Density / %'
elif den_option == 'V':
y_label = 'Volume Density / %'
else:
raise ValueError(f'den_option should be number (N), surface area (SA), or volume (V), instead: {den_option}')
return y_label
def calc_size_freq(d, mode_sizes, mode_means, mode_stds, get_modes=False):
'''Function to calulate the frequency denisity for a given particle size'''
frequency_density, mode_fd = 0, []
for number, mode_size in enumerate(mode_sizes):
fd = mode_size * norm.pdf(np.log10(d), np.log10(mode_means[number]), mode_stds[number])
frequency_density += fd
if get_modes:
mode_fd.append(fd)
if get_modes:
return frequency_density, mode_fd
return frequency_density
def multimodal_dist(dp, mode_sizes, mode_means, mode_stds, get_modes=False):
'''Calculates a multimodal distribution'''
mode_check(mode_sizes, mode_means, mode_stds)
frequency_density, mode_fds = [] , []
for n in range(len(mode_sizes)):
mode_fds.append([])
for d in dp:
if get_modes:
output = calc_size_freq(d, mode_sizes, mode_means, mode_stds, get_modes=True)
frequency_density.append(output[0])
mode_fd =output[1]
for mode, fd in enumerate(mode_fd):
mode_fds[mode].append(fd)
else:
frequency_density.append(calc_size_freq(d, mode_sizes, mode_means, mode_stds))
if get_modes:
return frequency_density, mode_fds
return frequency_density
def plot_multimodal(dp, mode_sizes, mode_means, mode_stds, den_option, dp_fit, exp_data=[], dpi=600, min_dp=0.01):
'''Plots a multimodal lognormal distribution and has the option to plot the experimental data'''
mode_check(mode_sizes, mode_means, mode_stds)
plt.figure(dpi=dpi)
frequency_density, mode_fds = multimodal_dist(dp_fit, mode_sizes, mode_means, mode_stds, get_modes=True)
for mode, fds in enumerate(mode_fds):
plt.plot(dp_fit, fds, '-', label=f'Mode {mode + 1}')
plt.plot(dp_fit, frequency_density, '--', color='black', label='fit')
if len(exp_data) != 0:
plt.plot(dp, exp_data, '-', color='black', label='Real Data')
plt.xscale('log')
plt.xlabel('d$_p$ / $\mu$m')
plt.ylabel(get_label(den_option))
plt.xlim([min_dp, max(dp)])
plt.legend(loc='upper left') # loc='upper left'
# plt.savefig('plot.pdf')
plt.show()
return frequency_density
def calc_charge_res(x_list, y_list, y_fit):
'''Calculates the total residual and R² value between a set of values and a fit'''
TR, SSE, SST = 0, 0, 0
y_av = stats.mean(y_list)
for n, y in enumerate(y_list):
R = np.abs(y_fit[n] - y_list[n])
TR += R
SSE += R ** 2
SST += (y - y_av) ** 2
R2 = 1 - SSE / SST
return TR, R2
def objective_function(variables, *args):
'''Used for the function minimisation with scipy.minimize'''
dp, num_den, n, minimise_op, show_steps = args
v = variables
mode_sizes, mode_means, mode_stds = v[:n], v[n:(n * 2)], v[(n * 2):]
y_fit = multimodal_dist(dp, mode_sizes, mode_means, mode_stds, get_modes=False)
TR, R2 = calc_charge_res(dp, num_den, y_fit)
if minimise_op == "TR":
result = TR
elif minimise_op == "R2":
result = 1 - R2
else:
raise ValueError(f"minimise_op should be either total residual (TR), or R² value (R2). Not: {minimise_op}.")
if show_steps:
# print(f'mode_sizes = {", ".join(map(str, mode_sizes))}')
# print(f'mode_means = {", ".join(map(str, mode_means))}')
# print(f'mode_stds = {", ".join(map(str, mode_stds))}')
print(f'Total residual: {round(TR, 3)}, R² value: {round(R2, 4)}')
return result
def positivity_constraint(variables):
return variables
def get_all_dists(dp_fit, freq_den, den_option, dpi=200):
'''Gets all the relevant distributions and plots them'''
# Calculating the conversion factors at the fitted particle sizes
vp_fit = np.pi * np.power(dp_fit, 3) / 6
sa_fit = np.pi * np.power(dp_fit, 2)
if den_option == 'V':
v = freq_den
n = v / vp_fit
sa = n * sa_fit
elif den_option == 'SA':
sa = freq_den
n = sa / sa_fit
v = n * vp_fit
elif den_option == 'N':
n = freq_den
sa = n * sa_fit
v = n * vp_fit
# Normalisations
n /= log_trap_int(dp_fit, n)
sa /= log_trap_int(dp_fit, sa)
v /= log_trap_int(dp_fit, v)
# Plotting
plt.figure(dpi=dpi)
plt.plot(dp_fit, v, label='Volume')
plt.plot(dp_fit, sa, label='Surface Area')
plt.plot(dp_fit, n, label='Number')
plt.xscale('log')
plt.xlabel('d$_p$ / $\mu$m')
plt.ylabel('Normalised Frequency Density')
plt.xlim([min(dp_fit), max(dp_fit)])
plt.legend()
# plt.savefig('plot.pdf')
plt.show()
return n, sa, v
if __name__ == "__main__":
file_name = 'Volcano_Volume_Density.csv'
# file_name = 'Mastersizer G90.csv'
df = pd.read_csv(file_name, sep=',')
# print(df.columns)
dp = df['Size'] # Diameters in um
vol_den = df['Grimsvotn'] # Asumes the data is given ans volume distribution
den_option = 'V' # Distribution type to convert to before fitting: number (N), surface area (SA), or volume (V)
minimise_op = 'R2' # Minimisation option, either total residual (TR), or R² value (R2)
fit_option = True # If True will fit the data. False is useful for finding an initial guess
post_conversion = True # If true will convert the fit to all distributions
dpi = 600
mode_sizes = 0.008379052, 0.635798575, 6.150637405 # relative sizes of modes
mode_means = 0.828113968, 32.99460371, 96.6914689 # means od modes / um
mode_stds = 0.083381234, 0.348839354, 0.226022998 # modes standard distributions (logspace)
vp = np.pi * np.power(np.array(dp), 3) / 6 # volumes at given sizes
sa = np.pi * np.power(np.array(dp), 2) # surface area at given sizes
num_den = vol_den / vp # Assumes all particels are spherical and is converting to number distribution
sa_den = num_den * sa
den = get_den(den_option, num_den, sa_den, vol_den)
den /= log_trap_int(dp, den) # Normalisation
dp_fit = np.logspace(np.log10(min(dp)), np.log10(max(dp)), 1000, base=10)
freq_den = plot_multimodal(dp, mode_sizes, mode_means, mode_stds, den_option, dp_fit, exp_data=den, dpi=dpi, min_dp=0.1)
if fit_option:
initial_guess = mode_sizes + mode_means + mode_stds
n = int(len(initial_guess) / 3)
result = minimize(objective_function, initial_guess, args=(dp, den, n, minimise_op, True), constraints={'type': 'ineq', 'fun': positivity_constraint})
val = result.x
mode_sizes, mode_means, mode_stds = val[:n], val[n:(n * 2)], val[(n * 2):]
fds = plot_multimodal(dp, mode_sizes, mode_means, mode_stds, den_option, dp_fit, exp_data=den, dpi=dpi, min_dp=0.1)
print(f'mode_sizes = [{", ".join(map(str, mode_sizes))}]')
print(f'mode_means = [{", ".join(map(str, mode_means))}]')
print(f'mode_stds = [{", ".join(map(str, mode_stds))}]')
if post_conversion: # This is for the giess not the fit! Rerun wit the fit result as the guess to get it for the fit!
n, sa, v = get_all_dists(dp_fit, freq_den, den_option, dpi=dpi)