-
Notifications
You must be signed in to change notification settings - Fork 0
/
MSRESOLVE.py
6876 lines (6229 loc) · 599 KB
/
MSRESOLVE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import bisect
import copy
import ParsingFunctions as parse
import numpy
import csv
import time
import timeit
import math
import sys
import pandas
import XYYYDataFunctionsSG as DataFunctions
import os
import shutil
import importlib
from numpy import genfromtxt
import export_import as ei
#G stands for Global, and is used to draw data from the UserInput File, and to store data during processing.
import UserInput as G; importlib.reload(G) #import the user input and reload the module to get rid of any unwanted variables in the namespace
debuggingExportIndividualItem = False #setting the default value for this global variable.
############################################################################################################################################
#########################################################Best Mass Fragment Chooser#########################################################
############################################################################################################################################
#This function is made to conduct prelimiary checks. The checks are designed to
#fail only if there is not chance of the the chosen reference data passing the
#SLS method. There are 2 cases where this occurs :1) a molecule does not
#contain reference intensities for any mass fragments in the combination or 2)
#The reference data for the mass fragments does not contain any zeros.
def passesRowsSumChecks(rowSumsList, massFragCombination, allOverlappingPatterns, numberOfMassFragments=None, numberOfRows=None):
if numberOfMassFragments==None:
numberOfMassFragments=len(massFragCombination)##It is slightly faster to pass in the length variable to make it shorter
passesRowsSumChecks=True#Initialize return variable as true
if 0 in rowSumsList: #Check if any row is entirely full of zeros
passesRowsSumChecks=False
elif numpy.array(rowSumsList).all() == numberOfMassFragments: #Check if the array passed to it is full. If it is full, it appends to the allOverlapping patterns list
allOverlappingPatterns.append(massFragCombination)###May be an error with the use of allOverlapping patterns
#FIXME: #TODO: should be printed to a file.
#print(allOverlappingPatterns)
passesRowsSumChecks=False
return passesRowsSumChecks, allOverlappingPatterns #Return true if the rowsSumsList passes the check
#The function maintains two lists: 1)that contains the objective function values that
#need to be kept in a sorted order 2) a parallel list where a value needs to be inserted
#according to the sorting of the first one. It also takes in an integer value, N, that limits
#the the length of the two lists to N values. Using a binary search method, it will
#find the location where the value to insert will be inserted(if possible). The
#value will be inserted there and the last value removed from the list (if
#applicable). The bisect method used requires the list be presorted in ascending order.
#Thus, the algorithm here inserts values in a way to create an ascending ordered list.
#By default storeAndPop is used to keep the best N values based on minimizing
#the objective function values. If instead it is desirable to retain values
#with the objective function maximized, the optional argument 'optimumType'
#should be set to `Maximum`.
#Alternatively, the objective function values could be multiplied by -1.
#The function supports multidimensional objective functions in nested objects
#such as tuples or lists, e.g., (A,B,C) in which case it will be sorted
#by A, then B, then C. For multidimensional objective functions, it is
#necessary to have the dimensions as either all "maximum" or all "minimum" type.
#Using a -1 factor can be helpful in this regard (e.g., passing in (A,-B,C) etc.
#If the values in the sample space for parallelList are not unique it is
#possible that this repeated calls to this function could lead to
#a parallelList of a particular value repeated many times. If repeated values
#are undesired, then excludeDuplicates can be set to False.
def storeAndPop(objectiveFunctionValuesList, objectiveFunctionValueToInsert,
parallelList, valueToInsertInParallelList, maxItemsAllowed,
optimumType="Minimum", excludeDuplicates=True):
#Find the insertion index where the value will be inserted by using a binary
#search
insertionIndex=bisect.bisect(objectiveFunctionValuesList,
objectiveFunctionValueToInsert)
#Initialize a variable to keep track of if a value was inserted into the
#list.
valueStoredInList=False
#If it is a duplicate exit now without checking everything else
#Note that we only check the value in parallel list to the left of
#the insertion index. This is because bisect.bisect() will specify
#and insertion index such that a duplicate is inserted
#to the right of the original.
if (excludeDuplicates and len(parallelList) and
(parallelList[insertionIndex-1] == valueToInsertInParallelList)):
#This value is a duplicate. Return the original lists.
return objectiveFunctionValuesList, parallelList, valueStoredInList
#If the list isn't yet filled, the value will inherently be in the top N
#value in the list. This value can just be inserted at the insertionIndex.
if len(objectiveFunctionValuesList)<maxItemsAllowed:
objectiveFunctionValuesList.insert(insertionIndex,
objectiveFunctionValueToInsert)
parallelList.insert(insertionIndex, valueToInsertInParallelList)
valueStoredInList=True
#If the list already contains N elements, a new element could either be
#inserted in the list or at the end of the list. For optimumType == Minimum,
#an item at the end would be worse and thus nothing should be added.
#However, for optimumType == Maximum an object at the end would be the best
#and thus should be added.
elif (len(objectiveFunctionValuesList) == maxItemsAllowed and
(insertionIndex<maxItemsAllowed or optimumType=="Maximum")):
#insert the value to insert in the location found through the binary
#search
objectiveFunctionValuesList.insert(insertionIndex,
objectiveFunctionValueToInsert)
parallelList.insert(insertionIndex, valueToInsertInParallelList)
valueStoredInList=True
if optimumType == 'Minimum':
#delete the last element since something was added to the list
del objectiveFunctionValuesList[-1]
del parallelList[-1]
elif optimumType == 'Maximum':
#delete the first element since something was added to the list
del objectiveFunctionValuesList[0]
del parallelList[0]
else:
raise ValueError("optimumType must be either 'Maximum' " +
"or 'Minimum'")
return objectiveFunctionValuesList, parallelList, valueStoredInList
#The rough uniqueness check is a limiting check that takes the mass fragment
#combinations that pass the row sums check and builds a lists of
#keep_N_ValuesInRoughUniquenessCheck that contain the largest number of zeros
#and the corresponding mass fragment combination for each of the values in the
#first list.
#The largest number of zeros would be most likely to pass the SLS method.
#It calculates a sum that roughly expresses how unique the molecular mass
#fragments are to the different molecules, but this is a quick and not-rigrous
#method. Then, the value is stored *only* if it is in the top N of the values
#so far.
def roughUniquenessCheck(rowSumsList, smallestRowsSumsList,topMassFragCombinationsList, keep_N_ValuesInRoughUniquenessCheck, massFragCombination):
#We want to save the smallest sum since that would contain the smallest
#number of zeros.
roughUniqueness=numpy.sum(rowSumsList)
#Use Store and Pop to add the tuple to the list of top rough uniquness
#combinations. This will only save a user specified number of tuples.
[smallestRowsSumsList,topMassFragCombinationsList,valueStoredInRUTopList]=storeAndPop(smallestRowsSumsList, roughUniqueness,topMassFragCombinationsList, massFragCombination, keep_N_ValuesInRoughUniquenessCheck)
return smallestRowsSumsList,topMassFragCombinationsList, valueStoredInRUTopList
#The significance factor check is a limiting check the selects the mass
#fragment combinations having the largest sum of significance factors. It
#calculates the significance factors for each element in the sub-reference
#array (refernce array with zeros for all the data for mass fragments that
#aren't needed). Is then takes the sum of all of the significance factors.
#The top N mass fragment combinations are then stored in a list in decending
#order according to the magnitude of their significance sum.
#Basically, it calculates the significance factor for each element in the
#chosen reference array and sums all of the significane values for the whole
#array. It keeps the mass fragments that have the largest magnitude of
#significance sum.
def significanceFactorCheck(chosenReferenceIntensities,largestMagnitudeSigFactorSumsList,topMassFragCombinationsList, massFragCombination, keep_N_ValuesInSignificanceFactorCheck, moleculesLikelihood):
#Initialize the sum of the significance factors
sigFactorSum=0
#The elemSignificanceCalculator finds the significance factors a column at
#a time. This loops through the columns(molecules)
for columnCounter in range(len(chosenReferenceIntensities[0])):
#Finds the significance factors for each element in the column. This
#does not include the first column since that is the mass fragment
#numbers
significanceColumnDataList=ElemSignificanceCalculator(chosenReferenceIntensities, columnCounter, moleculesLikelihood)
#Sums the significance factors across the column and to the
#sum for the whole ref data array. The larger in magnitude this is, the 'better'.
sigFactorSum+=sum(significanceColumnDataList)
####Currently there is no real need to maintain a significance data
####list for the whole array
#The subtraction is used to make the sum negative. The binary search used
#will order from lowest to highest. A larger magnitude for this value then means
#the most negative which will be kept during the store and pop function.
#Note that we use the wording of largest magnitude so that our phrasing remains the same
#when talking about the negative of the sum.
negativeOfSigFactorSum=-1*sigFactorSum
#Uses store and pop to maintian a list of the mass fragment with the
#largest significance factors.
#The below line only keeps the combinations with the largest magnitude (most negative) of the negativeOfSigFactorSums.
[largestMagnitudeSigFactorSumsList,topMassFragCombinationsList, valueStoredInSFTopList]=storeAndPop(largestMagnitudeSigFactorSumsList,negativeOfSigFactorSum,topMassFragCombinationsList, massFragCombination, keep_N_ValuesInSignificanceFactorCheck)
return largestMagnitudeSigFactorSumsList, topMassFragCombinationsList, valueStoredInSFTopList
############################################################################################################################################
################################################Algorithm Part 1: Pre-Processing the data###################################################
############################################################################################################################################
# this function will take in the mass fragments that are getting changed, the slope of the line
#of which they are being changed by and finally the intercept of that line which is being subtracted
#from all of their values, so much of this function is going to be prompting and getting values from
#the user
def SlopeEliminator (ExperimentData,backgroundMassFragment,backgroundSlopes,backgroundIntercepts):
for mf_counter in range(len(backgroundMassFragment)):#array-indexed for loop
for mass_fragment_numbers_counter in range(len(ExperimentData.mass_fragment_numbers)):#array-indexed for loop
for times_counter in range(len(ExperimentData.times)): #array-indexed for loop
if ExperimentData.mass_fragment_numbers[mass_fragment_numbers_counter] == backgroundMassFragment[mf_counter]:#if the right mass fragment number is selected then its slope is eliminated
subtraction_value = ExperimentData.times[times_counter] * float(backgroundSlopes[mf_counter]) + float(backgroundIntercepts[mf_counter])
ExperimentData.workingData[times_counter,mass_fragment_numbers_counter] = ExperimentData.workingData[times_counter, mass_fragment_numbers_counter] - subtraction_value
#return collected #no return is necessary. this is implied. the function takes a "pointer/reference" to the collected array and then modifies it directly.
#this function works by drawing the input values from the input file which includes a baselineType, a time range, and the mass fragment
#that is being altered. the time range is taken, and using a for loop, all the values between those two times (including those two times)
#are used in finding the average or polyfit, the collected signals are also picked up here, in order to use them for finding the polyfit.
#So then, depending of the baselineType, the mass fragments row will be found using a nested for loop with an if statement, which then
# subtracts what ever was found- the polyfit or the average from each data point in the collected data set.
def LinearBaselineCorrectorSemiAutomatic(ExperimentData,baselineType,massesToBackgroundCorrect,earlyBaselineTimes,lateBaselineTimes):
#This section of code is a patch to permit easier usage of the Baseline Correction
# it Applies background correction to all fragments if none are listed,
# but corrector is turned on
if len(massesToBackgroundCorrect) == 0:
massesToBackgroundCorrect = ExperimentData.mass_fragment_numbers
# Applies timeRange to all fragments if only one time is given
if (len(earlyBaselineTimes) == 1 and len(massesToBackgroundCorrect) != 1):
newTimeList = earlyBaselineTimes
for i in range(len(massesToBackgroundCorrect)-1):
newTimeList = numpy.vstack((newTimeList,earlyBaselineTimes))
earlyBaselineTimes = newTimeList
# Applies timeRange2 to all fragments if only one time is given
if (len(lateBaselineTimes) == 1 and len(massesToBackgroundCorrect) != 1):
newTimeList = lateBaselineTimes
for i in range(len(massesToBackgroundCorrect)-1):
newTimeList = numpy.vstack((newTimeList,lateBaselineTimes))
lateBaselineTimes = newTimeList
#the mass_fragment_numbers list has the list of all mass fragments that were measured.
#massesToBackgroundCorrect is the list of mass fragments to do the correction on (which can be a smaller set of masses).
selectedtimes = []
selectedsignals = []
slopelist = []
interceptlist = []
averagelist = []
starttimelist = []
endtimelist = []
starttimelist2 = []
endtimelist2 = []
#since the format of the user input ranges change, this loop puts that information back into the form
#originally used (so that the function will work), with this loop here
for earlyBaselineIndex in range(len(earlyBaselineTimes)):#array-indexed for loop
starttimelist.append(earlyBaselineTimes[earlyBaselineIndex][0])
endtimelist.append(earlyBaselineTimes[earlyBaselineIndex][1])
if len(lateBaselineTimes) != 0:#if there is only one time range, this part of the loop doesn't happen
starttimelist2.append(lateBaselineTimes[earlyBaselineIndex][0])
endtimelist2.append(lateBaselineTimes[earlyBaselineIndex][1])
#this for loop goes through all the data, getting lists of the times and signals, which are then used
#to make lists of slopes, intercepts and averages, which can be used to alter the collected data
for massesToCorrectIndex in range(len(massesToBackgroundCorrect)):#array-indexed for loop
for measured_masses_counter in range(len(ExperimentData.mass_fragment_numbers)):#array-indexed for loop
if massesToBackgroundCorrect[massesToCorrectIndex] == ExperimentData.mass_fragment_numbers[measured_masses_counter]:#if the right mass fragment is found, we now have that index
for timescounter in range(len(ExperimentData.times)):#array-indexed for loop
if ExperimentData.times[timescounter] >= starttimelist[massesToCorrectIndex]:# once the loop gets past the start time it starts appending all the times and signals
selectedtimes.append(ExperimentData.times[timescounter])
selectedsignals.append(ExperimentData.workingData[timescounter,measured_masses_counter])
if ExperimentData.times[timescounter] > endtimelist[massesToCorrectIndex]:# once it gets past the end time, it deletes all the values that are being appended
selectedtimes.pop()
selectedsignals.pop()
if len(starttimelist2) != 0:# same as above, it might not exist
if ExperimentData.times[timescounter] >= starttimelist2[massesToCorrectIndex]:
selectedtimes.append(ExperimentData.times[timescounter])
selectedsignals.append(ExperimentData.workingData[timescounter,measured_masses_counter])
if ExperimentData.times[timescounter] > endtimelist2[massesToCorrectIndex]:
selectedtimes.pop()
selectedsignals.pop()
if timescounter == len(ExperimentData.times)-1:#once the loop is finished getting all that, the loop saves all the data for each molecules before going to the next molecule
[slope,intercept] = numpy.polyfit(numpy.array(selectedtimes),numpy.array(selectedsignals),1)
average = numpy.average(selectedsignals)
slopelist.append(slope)
interceptlist.append(intercept)
averagelist.append(average)
selectedtimes = []
selectedsignals = []
#these are the if statements that choose what happens based on user baselineType in the input data file
if len(baselineType) == 1:
baselineTypeholder = []
for length in range(len(massesToBackgroundCorrect)):
baselineTypeholder.append(baselineType[0])
baselineType = baselineTypeholder
for MassFragmentIndex in range(len(massesToBackgroundCorrect)):#array-indexed for loop
if baselineType[MassFragmentIndex] == 'flat': #the different baselineTypes subtract different things
for measured_masses_counter in range(len(ExperimentData.mass_fragment_numbers)):#array-indexed for loop
if massesToBackgroundCorrect[MassFragmentIndex] == ExperimentData.mass_fragment_numbers[measured_masses_counter]:#when the index for the mass fragment is obtained
try:
ExperimentData.workingData[:,measured_masses_counter] = ExperimentData.workingData[:,measured_masses_counter] - averagelist[MassFragmentIndex]
except IndexError:
print("Warning: LinearBaselineCorrectorSemiAutomatic has failed.\
It is possible that you have entered an observed mass fragement in massesToBackgroundCorrect that \
does not appear in the reference data. If this is the case, \
remove that mass fragment and rerun the program.")
sys.exit()
if baselineType[MassFragmentIndex] == 'linear':#other option
for measured_masses_counter in range(len(ExperimentData.mass_fragment_numbers)):#array-indexed for loop
if massesToBackgroundCorrect[MassFragmentIndex] == ExperimentData.mass_fragment_numbers[measured_masses_counter]:#same as above
try:
ExperimentData.workingData[:,measured_masses_counter] = ExperimentData.workingData[:,measured_masses_counter] - (slopelist[MassFragmentIndex]*ExperimentData.times + interceptlist[MassFragmentIndex])
except IndexError:
print("Warning: LinearBaselineCorrectorSemiAutomatic has failed.\
It is possible that you have entered an observed mass fragement in massesToBackgroundCorrect that \
does not appear in the reference data. If this is the case, \
remove that mass fragment and rerun the program.")
sys.exit()
#no return is necessary. this is implied. the function takes a "pointer/reference" to the collected array and then modifies it directly.
#this function is going to set a certain threshold based on the data, and eliminate any values below this,
#setting them to 0, this serves to eliminate negatives and insignificant values - this means that the rest of the
#script can solve for the signals more easily and eliminate a good range of possibilities. There are two tests:
#either an absolute threshold, or one that is a percentage of the max signal. The signal should pass both checks.
def LowerBoundThresholdFilter (ExperimentData,massesToLowerBoundThresholdFilter,lowerBoundThresholdPercentage,lowerBoundThresholdAbsolute):
#test case for whether all masses should be filtered
#if no masses are listed
if len(massesToLowerBoundThresholdFilter) == 0:
#set all mass to be filtered
massesToLowerBoundThresholdFilter = ExperimentData.mass_fragment_numbers
#For lower bound absolute value, if somebody has set only one value, then all masses will receive that. Otherwise, only the chosen masses will.
#We also need to convert this into a float if it's not already a float.
if len(lowerBoundThresholdAbsolute) == 1:
ThresholdTemporaryVariable = lowerBoundThresholdAbsolute[0]
lowerBoundThresholdAbsolute = []#making list blank again so I can append to it as many times as needed.
for mass in massesToLowerBoundThresholdFilter:
lowerBoundThresholdAbsolute.append(ThresholdTemporaryVariable) #making float and appending.
if len(lowerBoundThresholdAbsolute)!=0:
#This makes a numpy array at the end.
lowerboundByAbsoluteArray = numpy.array(lowerBoundThresholdAbsolute)
#if somebody has set only one value for the percentage or the absolute threshold, then all masses will receive that. Otherwise, only the chosen masses will.
if len(lowerBoundThresholdPercentage) ==1:
percentageTemporaryVariable = lowerBoundThresholdPercentage[0]
lowerBoundThresholdPercentage = []#making list blank again so I can append to it as many times as needed.
for mass in massesToLowerBoundThresholdFilter:
lowerBoundThresholdPercentage.append(float(percentageTemporaryVariable)) #making float and appending.
if len(lowerBoundThresholdPercentage)!=0:
#finding maximum of each column so that I can do the percentage case:
MaximumIntensitiesOfMassesToLowerBoundThresholdFilter = [ ]
for measured_masses_counter in range(len(ExperimentData.mass_fragment_numbers)): #array-indexed for loop to loop across all masses
for mtbc in range(len(massesToLowerBoundThresholdFilter)):#array-indexed for loop that loops acrosses masses to be filtered
if ExperimentData.mass_fragment_numbers[measured_masses_counter] == massesToLowerBoundThresholdFilter[mtbc]: #This passes *only* for the indices of masses to be filtered.
MaximumIntensitiesOfMassesToLowerBoundThresholdFilter.append(max(ExperimentData.workingData[:,measured_masses_counter]))
#This converts the percentages to absolute values and makes a numpy arry also.
lowerboundByPercentageArray = numpy.array(lowerBoundThresholdPercentage) * numpy.array(MaximumIntensitiesOfMassesToLowerBoundThresholdFilter)
#Now we will apply the actual filter to each point for each mass that needs to be applied to.
#We use the two loops again.
for measured_masses_counter in range(len(ExperimentData.mass_fragment_numbers)): #array-indexed for loop to loop across all masses
for mtbc in range(len(massesToLowerBoundThresholdFilter)):#array-indexed for loop that loops acrosses masses to be filtered
if ExperimentData.mass_fragment_numbers[measured_masses_counter] == massesToLowerBoundThresholdFilter[mtbc]: #This passes *only* for the indices of masses to be filtered.
#Below looks at each point for a given mass, and then applies the appropriate filter.
for point in enumerate(ExperimentData.workingData[:,measured_masses_counter]): #note that "point" is actually the a tuple with both the value and the index in form (index,value).
if len(lowerBoundThresholdAbsolute)!=0: #Check if length of lowerbound threshold list is not equal to zero before applying it
if point[1] < lowerboundByAbsoluteArray[mtbc]: #check if the value of the point is lower than the absolute threshold.
ExperimentData.workingData[:,measured_masses_counter][point[0]] = 0.0 #this changes the value to 0.0 at index given by point[0] within the array of masses.
if len(lowerBoundThresholdPercentage)!=0: #Check if length of lowerbound threshold by percentage list is not equal to zero before applying it
if point[1] < lowerboundByPercentageArray[mtbc]: #check if the value of the point is lower than the absolute threshold.
ExperimentData.workingData[:,measured_masses_counter][point[0]] = 0.0 #this changes the value to 0.0 at index given by point[0] within the array of masses.
#return collected #no return is necessary. this is implied. the function takes a "pointer/reference" to the collected array and then modifies it directly.
'''
This function simply searchs two arrays and returns an array with any and all
overlapping numbers
'''
def MatchingNumbers(Array1, Array2):
return list(set(Array1) & set(Array2))
'''
THis function determines and returns the ABC correction coefficients for Tuning Correction based off
of a literature (NIST) reference pattern and a measured (Experimental)reference pattern
'''
def ABCDetermination(ReferencePatternExistingTuning_FileNameAndForm, ReferencePatternDesiredTuning_FileNameAndForm, exportCoefficients=True):
#The two arguments are actually lists containing strings of the name and form type: (["FileNameOne.csv","xyyy"],["FileNameTwo.csv", "xyyy"])
'''
Step 1: Read in all neccessary information from fragment patterns
Step 2: populate all variables and standardize signals by 100
'''
if G.applyReferenceMassFragmentsThresholds !='yes':
print("Warning: The ABCDetermination will occur without threshold filtering, since that setting is off.")
if G.extractReferencePatternFromDataOption == 'yes':
print("Warning: MSRESOLVE is set to do a tuning correction and is also set to extract the reference pattern from the data. The tuning correction will be applied to the extracted pattern. This is not a typical procedure. Typically, the extract pattern from data feature is off when the tuning corrector is on.")
#For simplicity, we will put the items into temporary items, then into dictionaries that we can then access.
ReferencePatternExistingTuningDict = {}
[provided_reference_patterns, electronnumbers, molecules, molecularWeights, SourceOfFragmentationPatterns, sourceOfIonizationData, relativeIonizationEfficiencies, moleculeIonizationType, mass_fragment_numbers_monitored, referenceFileName, form] = readReferenceFile(*ReferencePatternExistingTuning_FileNameAndForm)
ReferencePatternExistingTuningDict['molecules']=molecules
ReferencePatternExistingTuningDict['provided_reference_patterns'] = provided_reference_patterns
ReferencePatternExistingTuningDict['provided_reference_patterns'] = StandardizeReferencePattern(ReferencePatternExistingTuningDict['provided_reference_patterns'],len(molecules)) #this does have the molecular weight as the first column.
if G.applyReferenceMassFragmentsThresholds =='yes':
ReferencePatternExistingTuningDict['provided_reference_patterns'] = ReferenceThresholdFilter(ReferencePatternExistingTuningDict['provided_reference_patterns'],G.referenceMassFragmentFilterThreshold)
ReferencePatternDesiredTuningDict = {}
[provided_reference_patterns, electronnumbers, molecules, molecularWeights, SourceOfFragmentationPatterns, sourceOfIonizationData, relativeIonizationEfficiencies, moleculeIonizationType, mass_fragment_numbers_monitored, referenceFileName, form] = readReferenceFile(*ReferencePatternDesiredTuning_FileNameAndForm)
ReferencePatternDesiredTuningDict['molecules']=molecules
ReferencePatternDesiredTuningDict['provided_reference_patterns'] = provided_reference_patterns
ReferencePatternDesiredTuningDict['provided_reference_patterns'] = StandardizeReferencePattern(ReferencePatternDesiredTuningDict['provided_reference_patterns'],len(molecules)) #this does have the molecular weight as the first column.
if G.applyReferenceMassFragmentsThresholds =='yes':
ReferencePatternDesiredTuningDict['provided_reference_patterns'] = ReferenceThresholdFilter(ReferencePatternDesiredTuningDict['provided_reference_patterns'],G.referenceMassFragmentFilterThreshold)
'''
Step 3a: Truncate to the molecules which match.
'''
OverlappingMolecules = numpy.intersect1d(ReferencePatternExistingTuningDict['molecules'],ReferencePatternDesiredTuningDict['molecules'] )
if len(OverlappingMolecules) == 0:
return [0,0,1], [[0,0,0],[0,0,0],[0,0,0]]
OverlappingFragments = numpy.intersect1d(ReferencePatternExistingTuningDict['provided_reference_patterns'][:,0],ReferencePatternDesiredTuningDict['provided_reference_patterns'][:,0])
if len(OverlappingFragments) == 0:
return [0,0,1], [[0,0,0],[0,0,0],[0,0,0]]
[OverlappingMolecules,ReferencePatternExistingTuningDict['OverlappingIndices'], ReferencePatternDesiredTuningDict['OverlappingIndices']] = numpy.intersect1d(ReferencePatternExistingTuningDict['molecules'],ReferencePatternDesiredTuningDict['molecules'], return_indices=True)
ReferencePatternExistingTuningDict['OverlappingMolecules'] = OverlappingMolecules
ReferencePatternDesiredTuningDict['OverlappingMolecules'] = OverlappingMolecules
ReferencePatternExistingTuningDict['overlapping_provided_reference_patterns'] = []
#Now need to get only the overlapping indices values. The numpy take function will work if we do it one row at a time:
for rowIndex,row in enumerate(ReferencePatternExistingTuningDict['provided_reference_patterns']):
massFragment = row[0]
truncatedIntensities = numpy.take(row[1:],ReferencePatternExistingTuningDict['OverlappingIndices'])
rowTruncated = numpy.insert(truncatedIntensities, 0, massFragment)
ReferencePatternExistingTuningDict['overlapping_provided_reference_patterns'].append(rowTruncated)
ReferencePatternDesiredTuningDict['overlapping_provided_reference_patterns'] = []
#Now need to get only the overlapping indices values. The numpy take function will work if we do it one row at a time:
for rowIndex,row in enumerate(ReferencePatternDesiredTuningDict['provided_reference_patterns']):
massFragment = row[0]
truncatedIntensities = numpy.take(row[1:],ReferencePatternDesiredTuningDict['OverlappingIndices'])
rowTruncated = numpy.insert(truncatedIntensities, 0, massFragment)
ReferencePatternDesiredTuningDict['overlapping_provided_reference_patterns'].append(rowTruncated)
#Note that the columns of overlapping_provided_reference_patterns are not just truncated but also rearranged.
#Thus, going forward, OverlappingMolecules must be used for indices rather than using molecules.
'''
Step 3b: Determine Matching Mass fragments
'''
#Only append rows that have an overlapping mass fragment.
matchingFragmentsOnlyPatterns = []
for row in ReferencePatternExistingTuningDict['overlapping_provided_reference_patterns']:
if row[0] in OverlappingFragments:
matchingFragmentsOnlyPatterns.append(row)
#now swap in the new list.
ReferencePatternExistingTuningDict['overlapping_provided_reference_patterns'] = matchingFragmentsOnlyPatterns
#Only append rows that have an overlapping mass fragment.
matchingFragmentsOnlyPatterns = []
for row in ReferencePatternDesiredTuningDict['overlapping_provided_reference_patterns']:
if row[0] in OverlappingFragments:
matchingFragmentsOnlyPatterns.append(row)
#now swap in the new list.
ReferencePatternDesiredTuningDict['overlapping_provided_reference_patterns'] = matchingFragmentsOnlyPatterns
#Convert the two lists to numpy arrays so that they can be divided....
ReferencePatternExistingTuningDict['overlapping_provided_reference_patterns'] = numpy.array(ReferencePatternExistingTuningDict['overlapping_provided_reference_patterns'])
ReferencePatternDesiredTuningDict['overlapping_provided_reference_patterns'] = numpy.array(ReferencePatternDesiredTuningDict['overlapping_provided_reference_patterns'])
#Need to standardize again, because the ratios during division below will be wrong if the largest peak was not among the matching fragments.
ReferencePatternExistingTuningDict['overlapping_provided_reference_patterns'] = StandardizeReferencePattern(ReferencePatternExistingTuningDict['overlapping_provided_reference_patterns'],len(OverlappingMolecules)) #this does have the molecular weight as the first column.
ReferencePatternDesiredTuningDict['overlapping_provided_reference_patterns'] = StandardizeReferencePattern(ReferencePatternDesiredTuningDict['overlapping_provided_reference_patterns'],len(OverlappingMolecules)) #this does have the molecular weight as the first column.
'''
Find a,b,c:
'''
numpy.seterr(divide='ignore', invalid='ignore') #This is to prevent some unexpected (but not at the moment concerning) warnings in the below lines from the nan values.
RatioOfPatterns = ReferencePatternDesiredTuningDict['overlapping_provided_reference_patterns'][:,1:]/ReferencePatternExistingTuningDict['overlapping_provided_reference_patterns'][:,1:]
#For each row, excluding the Take the mean excluding nan values.
meanRatioPerMassFragment = numpy.nanmean(RatioOfPatterns,1) #the 1 is for over axis 1, not over axix 0.
numpy.seterr(divide='warn', invalid='warn') #This is to [it tje warnings back how they normally are.
from numpy import inf
#Due to divide by zero errors, it is useful to get rid of any infinity type values.
#Below 3 lines were first attempt to deal with infinte values, but were not necessary. Can skip to the fitting.
meanRatioPerMassFragment[meanRatioPerMassFragment==0] = 'nan' #zeros are not useful.
# meanRatioPerMassFragment[meanRatioPerMassFragment==inf] = 'nan'
# meanRatioPerMassFragment[meanRatioPerMassFragment==-inf] = 'nan'
#Following this post.. https://stackoverflow.com/questions/28647172/numpy-polyfit-doesnt-handle-nan-values
FiniteValueIndices = numpy.isfinite(OverlappingFragments) & numpy.isfinite(meanRatioPerMassFragment)
#But changing the next lines to add in temporary variables.
FiniteOverlappingFragments = OverlappingFragments[FiniteValueIndices]
if len(FiniteOverlappingFragments) == 0:
return [0,0,1], [[0,0,0],[0,0,0],[0,0,0]]
FiniteMeanRatioPerMassFragment = meanRatioPerMassFragment[FiniteValueIndices]
abcCoefficients, abcCoefficients_cov =numpy.polyfit(FiniteOverlappingFragments,FiniteMeanRatioPerMassFragment,2, cov=True) #The two is for 2nd degree polynomial.
FiniteMeanReverseRatioPerMassFragment = 1/FiniteMeanRatioPerMassFragment
abcCoefficients_reverse, abcCoefficients_reverse_cov =numpy.polyfit(FiniteOverlappingFragments,FiniteMeanReverseRatioPerMassFragment,2, cov=True) #The two is for 2nd degree polynomial.
if exportCoefficients == True:
numpy.savetxt('TuningCorrectorPattern_FiniteOverlappingFragments.csv', FiniteOverlappingFragments, delimiter=",")
numpy.savetxt('TuningCorrectorPattern_FiniteMeanRatioPerMassFragment.csv', FiniteMeanRatioPerMassFragment, delimiter=",")
#Export the TuningCorrectorCoefficients as a list.
with open('TuningCorrectorCoefficients.txt', 'w') as the_file:
the_file.write(str(list(abcCoefficients)))
try: #Export the TuningCorrectorCoefficients_cov to a csv.
numpy.savetxt('TuningCorrectorCoefficients_cov.csv', abcCoefficients_cov, delimiter=",")
except:
print("Could not save to file TuningCorrectorCoefficients_cov")
#Do the same for the reverse relation.
numpy.savetxt('TuningCorrectorPattern_FiniteMeanReverseRatioPerMassFragment.csv', FiniteMeanReverseRatioPerMassFragment, delimiter=",")
with open('TuningCorrectorCoefficientsReverse.txt', 'w') as the_file:
the_file.write(str(list(abcCoefficients_reverse)))
#Export the TuningCorrectorCoefficients_cov to a csv.
try:
numpy.savetxt('TuningCorrectorCoefficientsReverse_cov.csv', abcCoefficients_reverse_cov, delimiter=",")
except:
print("Could not save to file TuningCorrectorCoefficientsReverse_cov")
#Factor = A*X^2 + B*X + C, so C=1.0 means the factor is 1.0 and independent of molecular weight.
#To use this with mixed patterns (meaning, some patterns taken from Literature reference like NIST) you will need to first determine the A,B,C coefficients, then you'll have to divide the Literature reference pattern by this factor. This will compensate for when the code multiplies by the factor, thus putting the mixed patterns into the same tuning.
return abcCoefficients, abcCoefficients_cov
#this function either creates or gets the three coefficients for the polynomial correction (Tuning Correction) and calculates
#the correction factor for the relative intensities of each mass fragment, outputting a corrected set
#of relative intensities
def TuningCorrector(referenceDataArrayWithAbscissa,referenceCorrectionCoefficients, referenceCorrectionCoefficients_cov, referenceFileExistingTuningAndForm=None,referenceFileDesiredTuningAndForm=None,tuningCorrection="no"):
#Tuning corrector is designed to work with standardized_reference_patterns, so first we make sure standardize the data.
referenceDataArrayWithAbscissa=StandardizeReferencePattern(referenceDataArrayWithAbscissa)
if type(referenceCorrectionCoefficients) == type({}):#check if it's a dictionary. If it is, we need to make it a list.
referenceCorrectionCoefficients = [referenceCorrectionCoefficients['A'],referenceCorrectionCoefficients['B'],referenceCorrectionCoefficients['C']]
if tuningCorrection =='yes':
if len(referenceFileDesiredTuningAndForm) == 0:#TODO: this isn't very good logic, but it allows automatic population of referenceFileDesiredTuningAndForm. The problem is it is reading from file again instead of using the already made ReferenceData object. ABCDetermination and possibly TuningCorrector should be changed so that it can take *either* a ReferenceData object **or** a ReferenceData filename. The function can check if it is receiving a string, and if it's not receiving a string it can assume it's receiving an object.
if '.csv' in G.referencePatternsFileNamesList[0]:
referenceFileDesiredTuningAndForm = [ "ExportedDesiredTuningReferencePattern.csv","xyyy" ] #Take the first item from G.referencePatternsFileNamesList and from G.referencePatternsFormsList.
if '.tsv' in G.referencePatternsFileNamesList[0]:
referenceFileDesiredTuningAndForm = [ "ExportedDesiredTuningReferencePattern.tsv","xyyy" ] #Take the first item from G.referencePatternsFileNamesList and from G.referencePatternsFormsList.
abcCoefficients, abcCoefficients_cov = ABCDetermination(referenceFileExistingTuningAndForm,referenceFileDesiredTuningAndForm)
referenceCorrectionCoefficients[0],referenceCorrectionCoefficients[1],referenceCorrectionCoefficients[2]= abcCoefficients
G.referenceCorrectionCoefficients = referenceCorrectionCoefficients #TODO: Maybe this logic should be changed, since it will result in an exporting of the last coefficients used, whether a person is doing forward tuning or reverse tuning.
referenceCorrectionCoefficients_cov = abcCoefficients_cov
G.referenceCorrectionCoefficients_cov = referenceCorrectionCoefficients_cov
referenceabscissa = referenceDataArrayWithAbscissa[:,0] #gets arrays of just data and abscissa
referenceDataArray = referenceDataArrayWithAbscissa[:,1:]
nonZeroValueLocations = referenceDataArray > 0 #this returns booleans of "True" and "False", which are the same as 0 and 1 during multiplication.
referenceDataArray_tuning_uncertainties = referenceDataArray*0.0 #just initializing.
if list(referenceCorrectionCoefficients) != [0,0,1]:
for massfrag_counter in range(len(referenceabscissa)):#array-indexed for loop, only the data is altered, based on the abscissa (mass-dependent correction factors)
factor = referenceCorrectionCoefficients[0]*(referenceabscissa[massfrag_counter]**2) + referenceCorrectionCoefficients[1]*referenceabscissa[massfrag_counter]+referenceCorrectionCoefficients[2] #obtains the factor from molecular weight of abscissa
referenceDataArray[massfrag_counter,:] = referenceDataArray[massfrag_counter,:]*factor
if type(referenceCorrectionCoefficients_cov) != None:
if sum(numpy.array(referenceCorrectionCoefficients_cov)).all()!=0:
if len(numpy.shape(referenceCorrectionCoefficients_cov)) == 1 and (len(referenceCorrectionCoefficients_cov) > 0): #If it's a 1D array/list that is filled, we'll diagonalize it.
referenceCorrectionCoefficients_cov = np.diagflat(referenceCorrectionCoefficients_cov)
#Now we need to use a function from XYYYDataFunctionsSG
factor_uncertainty = DataFunctions.returnPolyvalEstimatedUncertainties(referenceabscissa[massfrag_counter], referenceCorrectionCoefficients_cov, referenceCorrectionCoefficients_cov)
referenceDataArray_tuning_uncertainties[massfrag_counter,:]=referenceDataArray[massfrag_counter,:]*factor_uncertainty
# referenceDataArrayWithAbscissa[:,0] = referenceabscissa
# referenceDataArrayWithAbscissa[:,1:] = referenceDataArray #This is actually already occuring above because it's a pointer, but this line is just to make more clear what has happened.
referenceDataArrayWithAbscissa[:,1:] = nonZeroValueLocations*referenceDataArray #we are setting things to zero if they were originally zero (this multiplies the nonzero locations by 1, and everything else by 0)
referenceDataArrayWithAbscissa[:,1:] = (referenceDataArrayWithAbscissa[:,1:] > 0)*referenceDataArrayWithAbscissa[:,1:] #multiplying by 1 where the final value is >0, and by 0 everywhere else.
referenceDataArrayWithAbscissa_tuning_uncertainties = referenceDataArrayWithAbscissa*1.0 #creating copy with abscissa, then will fill.
#The referenceDataArray_tuning_uncertainties are absolute uncertainties (note that above the factor_uncertainty is multiplied by the actual value)
referenceDataArrayWithAbscissa_tuning_uncertainties[:,1:] = referenceDataArray_tuning_uncertainties
return referenceDataArrayWithAbscissa, referenceDataArrayWithAbscissa_tuning_uncertainties
'''Makes a mixed reference pattern from two reference patterns, including tuning correction.'''
def createReferencePatternWithTuningCorrection(ReferenceData, verbose=True, returnMixedPattern=False):
#If tuning corrector is off (as of Nov 2021, variable tuningCorrection), then we return ReferenceData unchanged.
if G.tuningCorrection =='no':
return ReferenceData
# standardize the reference data columns such that the maximum intensity value per molecule is 100 and everything else is scaled to that maximum value.
ReferenceData.ExportCollector('StandardizedReferencePattern', use_provided_reference_patterns=False)
if ReferenceData.referenceFileNameExtension == 'csv':
ReferenceData.exportReferencePattern('ExportedReferencePatternOriginalAnalysis.csv')
if ReferenceData.referenceFileNameExtension == 'tsv':
ReferenceData.exportReferencePattern('ExportedReferencePatternOriginalAnalysis.tsv')
if G.calculateUncertaintiesInConcentrations == True:
if type(G.referencePatterns_uncertainties) != type(None):
ReferenceData.update_relative_standard_uncertainties()
if verbose:
print('beginning TuningCorrector')
if type(G.referenceCorrectionCoefficients) == type({}):#check if it's a dictionary.
G.referenceCorrectionCoefficients = [G.referenceCorrectionCoefficients['A'],G.referenceCorrectionCoefficients['B'],G.referenceCorrectionCoefficients['C']]
#only apply the tuning correction if the list is not 0 0 1.
try:
type(G.referenceCorrectionCoefficients_cov)#If it doesn't exist, we'll make it a None type.
except:
G.referenceCorrectionCoefficients_cov = None
#Wanted to do something like "if list(G.referenceCorrectionCoefficients) != [0,0,1]:" but can't do it out here. Can do it inside function.
if G.tuningCorrection =='no':
G.createMixedTuningPattern = False #override the mixed tuning pattern choice if there is no measured reference.
resetReferenceFileDesiredTuningAndForm = False #initializing. At end, will reset G.referenceFileDesiredTuningAndForm if it was originally blank.
if hasattr(G, 'tuningCorrectPatternInternalVsExternal') == False: #Check if the tuningCorrectPatternInternalVsExternal attribute has been filled. If not, then fill it with the default which is 'External'
G.tuningCorrectPatternInternalVsExternal = 'External'
if ((G.createMixedTuningPattern == False) or (G.tuningCorrectPatternInternalVsExternal == 'Internal')):
if G.tuningCorrectPatternInternalVsExternal == 'Internal':
if ((len(G.referenceFileExistingTuningAndForm) == 0) and (len(G.referenceFileStandardTuningAndForm) == 0)): #if no external pattern provided, we will use the coefficients provided directly.
G.referenceCorrectionCoefficients = G.referenceCorrectionCoefficients
G.referenceCorrectionCoefficients_cov = G.referenceCorrectionCoefficients_cov
elif ((G.referenceFileExistingTuningAndForm !=[]) or (G.referenceFileStandardTuningAndForm!=[])) : #in this case, we are going to overwrite any coefficients provided in order to apply the desired tuning to the external pattern.
referenceFileDesiredTuningAndForm = G.referenceFileDesiredTuningAndForm
referenceFileExistingTuningAndForm = G.referenceFileExistingTuningAndForm
if len(referenceFileExistingTuningAndForm) == 0: #Use the standard tuning file if blank.
referenceFileExistingTuningAndForm = G.referenceFileStandardTuningAndForm
ReferenceDataExistingTuning = createReferenceDataObject ( referenceFileExistingTuningAndForm[0],referenceFileExistingTuningAndForm[1], AllMID_ObjectsDict=G.AllMID_ObjectsDict)
if ReferenceDataExistingTuning.referenceFileNameExtension =='csv':
ReferenceDataExistingTuning.exportReferencePattern('ExportedReferencePatternExistingOriginal.csv')
if ReferenceDataExistingTuning.referenceFileNameExtension =='tsv':
ReferenceDataExistingTuning.exportReferencePattern('ExportedReferencePatternExistingOriginal.tsv')
if len(referenceFileDesiredTuningAndForm) == 0:#TODO: this isn't very good logic, but it allows automatic population of referenceFileDesiredTuningAndForm. The problem is it is reading from file again instead of using the already made ReferenceData object. ABCDetermination and possibly TuningCorrector should be changed so that it can take *either* a ReferenceData object **or** a ReferenceData filename. The function can check if it is receiving a string, and if it's not receiving a string it can assume it's receiving an object.
resetReferenceFileDesiredTuningAndForm = True
if ReferenceDataExistingTuning.referenceFileNameExtension =='csv':
referenceFileDesiredTuningAndForm = [ "ExportedReferencePatternOriginalAnalysis.csv","xyyy" ] #Take the first item from G.referencePatternsFileNamesList and from G.referencePatternsFormsList.
if ReferenceDataExistingTuning.referenceFileNameExtension =='tsv':
referenceFileDesiredTuningAndForm = [ "ExportedReferencePatternOriginalAnalysis.tsv","xyyy" ]
abcCoefficients, abcCoefficients_cov = ABCDetermination(referenceFileExistingTuningAndForm,referenceFileDesiredTuningAndForm)
referenceCorrectionCoefficients = numpy.zeros(3)
referenceCorrectionCoefficients[0],referenceCorrectionCoefficients[1],referenceCorrectionCoefficients[2]= abcCoefficients
G.referenceCorrectionCoefficients = referenceCorrectionCoefficients #TODO: Maybe this logic should be changed, since it will result in an exporting of the last coefficients used, whether a person is doing forward tuning or reverse tuning.
referenceCorrectionCoefficients_cov = abcCoefficients_cov
G.referenceCorrectionCoefficients_cov = referenceCorrectionCoefficients_cov
#if we are not returning a mixed pattern, we are applying the tuning correction directly to the original ReferenceData object.
ReferenceData.standardized_reference_patterns_tuning_corrected, ReferenceData.standardized_reference_patterns_tuning_uncertainties = TuningCorrector(ReferenceData.standardized_reference_patterns,
G.referenceCorrectionCoefficients,G.referenceCorrectionCoefficients_cov)
#TuningCorrector un-standardizes the patterns, so the patterns have to be standardized again.
ReferenceData.standardized_reference_patterns=StandardizeReferencePattern(ReferenceData.standardized_reference_patterns_tuning_corrected,len(ReferenceData.molecules))
#add "TuningCorrected" to SourceOfFragmentationPatterns
ReferenceData.SourceOfFragmentationPatterns = list(ReferenceData.SourceOfFragmentationPatterns)
for sourceIndex in range(len(ReferenceData.SourceOfFragmentationPatterns)):
ReferenceData.SourceOfFragmentationPatterns[sourceIndex] = ReferenceData.SourceOfFragmentationPatterns[sourceIndex] + '-TuningCorrected'
ReferenceData.ExportCollector('TuningCorrector') #export the pattern.
#Now check if uncertainties already exist, and if they do then the two uncertainties need to be combined. Else, made equal.
try:
ReferenceData.absolute_standard_uncertainties[:,1:] = (ReferenceData.absolute_standard_uncertainties[:,1:]**2 + ReferenceData.standardized_reference_patterns_tuning_uncertainties[:,1:]**2)**0.5
except:
ReferenceData.absolute_standard_uncertainties = ReferenceData.standardized_reference_patterns_tuning_uncertainties
ReferenceData.ExportCollector('TuningCorrector_absolute_standard_uncertainties', export_tuning_uncertainties= True) #These are not yet actually standardized. Happens below.
ReferenceData.ExportCollector('StandardizeReferencePattern_absolute_standard_uncertainties', export_standard_uncertainties= True)
if G.calculateUncertaintiesInConcentrations == True:
if type(G.referencePatterns_uncertainties) != type(None):
ReferenceData.update_relative_standard_uncertainties()
ReferenceData.ExportCollector('StandardizeReferencePattern_relative_standard_uncertainties', export_relative_uncertainties= True)
elif G.tuningCorrectPatternInternalVsExternal =='External':
if ((len(G.referenceFileExistingTuningAndForm) == 0) and (len(G.referenceFileStandardTuningAndForm) == 0)): #if no external pattern provided, we will use the coefficients provided directly.
G.referenceCorrectionCoefficients = G.referenceCorrectionCoefficients
G.referenceCorrectionCoefficients_cov = G.referenceCorrectionCoefficients_cov
elif ((G.referenceFileExistingTuningAndForm !=[]) or (G.referenceFileStandardTuningAndForm!=[])) : #in this case, we are going to overwrite any coefficients provided in order to apply the desired tuning to the external pattern.
referenceFileDesiredTuningAndForm = G.referenceFileDesiredTuningAndForm
referenceFileExistingTuningAndForm = G.referenceFileExistingTuningAndForm
if len(referenceFileExistingTuningAndForm) == 0: #Use the standard tuning file if blank.
referenceFileExistingTuningAndForm = G.referenceFileStandardTuningAndForm
if len(referenceFileDesiredTuningAndForm) == 0: #Use the original reference pattern if blank.
resetReferenceFileDesiredTuningAndForm = True
referenceFileDesiredTuningAndForm = [G.referencePatternsFileNamesList[0], G.referencePatternsFormsList[0]] #Take the first item from G.referencePatternsFileNamesList and from G.referencePatternsFormsList.
#We don't use the function GenerateReferenceDataList because that function does more than just making a reference object.
ReferenceDataExistingTuning = createReferenceDataObject ( referenceFileExistingTuningAndForm[0],referenceFileExistingTuningAndForm[1], AllMID_ObjectsDict=G.AllMID_ObjectsDict)
if ReferenceDataExistingTuning.referenceFileNameExtension == 'csv':
ReferenceDataExistingTuning.exportReferencePattern('ExportedReferencePatternExistingOriginal.csv')
if ReferenceDataExistingTuning.referenceFileNameExtension == 'tsv':
ReferenceDataExistingTuning.exportReferencePattern('ExportedReferencePatternExistingOriginal.tsv')
ReferenceDataDesiredTuning = createReferenceDataObject ( referenceFileDesiredTuningAndForm[0],referenceFileDesiredTuningAndForm[1], AllMID_ObjectsDict=G.AllMID_ObjectsDict)
if ReferenceDataDesiredTuning.referenceFileNameExtension == 'csv':
ReferenceDataDesiredTuning.exportReferencePattern('ExportedReferencePatternDesiredOriginal.csv')
if ReferenceDataDesiredTuning.referenceFileNameExtension == 'tsv':
ReferenceDataDesiredTuning.exportReferencePattern('ExportedReferencePatternDesiredOriginal.tsv')
abcCoefficients, abcCoefficients_cov = ABCDetermination(referenceFileExistingTuningAndForm,referenceFileDesiredTuningAndForm)
referenceCorrectionCoefficients = numpy.zeros(3)
referenceCorrectionCoefficients[0],referenceCorrectionCoefficients[1],referenceCorrectionCoefficients[2]= abcCoefficients
G.referenceCorrectionCoefficients = referenceCorrectionCoefficients #TODO: Maybe this logic should be changed, since it will result in an exporting of the last coefficients used, whether a person is doing forward tuning or reverse tuning.
referenceCorrectionCoefficients_cov = abcCoefficients_cov
G.referenceCorrectionCoefficients_cov = referenceCorrectionCoefficients_cov
#Now that the coefficients are obtained, we need to to a tuning correction on the external reference pattern.
ReferenceDataExternalTuningCorrected = copy.deepcopy(ReferenceDataExistingTuning)
ReferenceDataExternalTuningCorrected.standardized_reference_patterns_tuning_corrected, ReferenceDataExternalTuningCorrected.standardized_reference_patterns_tuning_uncertainties = TuningCorrector(
ReferenceDataExternalTuningCorrected.standardized_reference_patterns, G.referenceCorrectionCoefficients,G.referenceCorrectionCoefficients_cov)
#add "TuningCorrected" to SourceOfFragmentationPatterns
ReferenceDataExternalTuningCorrected.SourceOfFragmentationPatterns = list(ReferenceDataExternalTuningCorrected.SourceOfFragmentationPatterns)
for sourceIndex in range(len(ReferenceDataExternalTuningCorrected.SourceOfFragmentationPatterns)):
ReferenceDataExternalTuningCorrected.SourceOfFragmentationPatterns[sourceIndex] = ReferenceDataExternalTuningCorrected.SourceOfFragmentationPatterns[sourceIndex] + '-TuningCorrected'
#TuningCorrector un-standardizes the patterns, so the patterns have to be standardized again.
ReferenceDataExternalTuningCorrected.standardized_reference_patterns=StandardizeReferencePattern(ReferenceDataExternalTuningCorrected.standardized_reference_patterns_tuning_corrected)
if ReferenceDataExternalTuningCorrected.referenceFileNameExtension == 'csv':
ReferenceDataExternalTuningCorrected.exportReferencePattern('ExportedReferencePatternExternalTuningCorrected.csv')
if ReferenceDataExternalTuningCorrected.referenceFileNameExtension == 'tsv':
ReferenceDataExternalTuningCorrected.exportReferencePattern('ExportedReferencePatternExternalTuningCorrected.tsv')
#Now check if uncertainties already exist, and if they do then the two uncertainties need to be combined. Else, made equal.
#We don't export these uncertainties since this is just an intermediate step.
try:
ReferenceDataExternalTuningCorrected.absolute_standard_uncertainties = (ReferenceDataExternalTuningCorrected.absolute_standard_uncertainties**2 + ReferenceDataExternalTuningCorrected.standardized_reference_patterns_tuning_uncertainties**2)**0.5
except:
ReferenceDataExternalTuningCorrected.absolute_standard_uncertainties = ReferenceDataExternalTuningCorrected.standardized_reference_patterns_tuning_uncertainties
if G.calculateUncertaintiesInConcentrations == True:
if type(G.referencePatterns_uncertainties) != type(None):
ReferenceDataExternalTuningCorrected.update_relative_standard_uncertainties()
ReferenceData.ExportCollector('StandardizeReferencePattern_relative_standard_uncertainties', export_relative_uncertainties= True)
elif ((G.createMixedTuningPattern== True) and (G.tuningCorrectPatternInternalVsExternal =='External')): #in this case, we are going to apply the current tuning to the external pattern, and also create a mixed pattern. So the ReferenceData pointer will point to a mixed pattern by the end of this if statement.
if G.tuningCorrection =='yes':
#First read in the existing tuning patterns.
referenceFileDesiredTuningAndForm = G.referenceFileDesiredTuningAndForm
referenceFileExistingTuningAndForm = G.referenceFileExistingTuningAndForm
if len(referenceFileExistingTuningAndForm) == 0: #Use the standard tuning file if blank.
referenceFileExistingTuningAndForm = G.referenceFileStandardTuningAndForm
if len(referenceFileDesiredTuningAndForm) == 0: #Use the original reference pattern if blank.
resetReferenceFileDesiredTuningAndForm = True
referenceFileDesiredTuningAndForm = [G.referencePatternsFileNamesList[0], G.referencePatternsFormsList[0]] #Take the first item from G.referencePatternsFileNamesList and from G.referencePatternsFormsList.
#We don't use the function GenerateReferenceDataList because that function does more than just making a reference object.
ReferenceDataExistingTuning = createReferenceDataObject ( referenceFileExistingTuningAndForm[0],referenceFileExistingTuningAndForm[1], AllMID_ObjectsDict=G.AllMID_ObjectsDict)
if ReferenceDataExistingTuning.referenceFileNameExtension == 'csv':
ReferenceDataExistingTuning.exportReferencePattern('ExportedReferencePatternExistingOriginal.csv')
if ReferenceDataExistingTuning.referenceFileNameExtension == 'tsv':
ReferenceDataExistingTuning.exportReferencePattern('ExportedReferencePatternExistingOriginal.tsv')
ReferenceDataDesiredTuning = createReferenceDataObject ( referenceFileDesiredTuningAndForm[0],referenceFileDesiredTuningAndForm[1], AllMID_ObjectsDict=G.AllMID_ObjectsDict)
if ReferenceDataDesiredTuning.referenceFileNameExtension == 'csv':
ReferenceDataDesiredTuning.exportReferencePattern('ExportedReferencePatternDesiredOriginal.csv')
if ReferenceDataDesiredTuning.referenceFileNameExtension == 'tsv':
ReferenceDataDesiredTuning.exportReferencePattern('ExportedReferencePatternDesiredOriginal.tsv')
abcCoefficients, abcCoefficients_cov = ABCDetermination(referenceFileExistingTuningAndForm,referenceFileDesiredTuningAndForm)
referenceCorrectionCoefficients = numpy.zeros(3)
referenceCorrectionCoefficients[0],referenceCorrectionCoefficients[1],referenceCorrectionCoefficients[2]= abcCoefficients
G.referenceCorrectionCoefficients = referenceCorrectionCoefficients #TODO: Maybe this logic should be changed, since it will result in an exporting of the last coefficients used, whether a person is doing forward tuning or reverse tuning.
referenceCorrectionCoefficients_cov = abcCoefficients_cov
G.referenceCorrectionCoefficients_cov = referenceCorrectionCoefficients_cov
#Now that the coefficients are obtained, we need to to a tuning correction on the external reference pattern.
ReferenceDataExternalTuningCorrected = copy.deepcopy(ReferenceDataExistingTuning)
ReferenceDataExternalTuningCorrected.standardized_reference_patterns_tuning_corrected, ReferenceDataExternalTuningCorrected.standardized_reference_patterns_tuning_uncertainties = TuningCorrector(
ReferenceDataExternalTuningCorrected.standardized_reference_patterns, G.referenceCorrectionCoefficients,G.referenceCorrectionCoefficients_cov)
#add "TuningCorrected" to SourceOfFragmentationPatterns
ReferenceDataExternalTuningCorrected.SourceOfFragmentationPatterns = list(ReferenceDataExternalTuningCorrected.SourceOfFragmentationPatterns)
for sourceIndex in range(len(ReferenceDataExternalTuningCorrected.SourceOfFragmentationPatterns)):
ReferenceDataExternalTuningCorrected.SourceOfFragmentationPatterns[sourceIndex] = ReferenceDataExternalTuningCorrected.SourceOfFragmentationPatterns[sourceIndex] + '-TuningCorrected'
#TuningCorrector un-standardizes the patterns, so the patterns have to be standardized again.
ReferenceDataExternalTuningCorrected.standardized_reference_patterns=StandardizeReferencePattern(ReferenceDataExternalTuningCorrected.standardized_reference_patterns_tuning_corrected)
if ReferenceDataExternalTuningCorrected.referenceFileNameExtension == 'csv':
ReferenceDataExternalTuningCorrected.exportReferencePattern('ExportedReferencePatternExternalTuningCorrected.csv')
if ReferenceDataExternalTuningCorrected.referenceFileNameExtension == 'tsv':
ReferenceDataExternalTuningCorrected.exportReferencePattern('ExportedReferencePatternExternalTuningCorrected.tsv')
#Now check if uncertainties already exist, and if they do then the two uncertainties need to be combined. Else, made equal.
#We don't export these uncertainties since this is just an intermediate step.
try:
ReferenceDataExternalTuningCorrected.absolute_standard_uncertainties = (ReferenceDataExternalTuningCorrected.absolute_standard_uncertainties**2 + ReferenceDataExternalTuningCorrected.standardized_reference_patterns_tuning_uncertainties**2)**0.5
except:
ReferenceDataExternalTuningCorrected.absolute_standard_uncertainties = ReferenceDataExternalTuningCorrected.standardized_reference_patterns_tuning_uncertainties
if G.calculateUncertaintiesInConcentrations == True:
if type(G.referencePatterns_uncertainties) != type(None):
ReferenceDataExternalTuningCorrected.update_relative_standard_uncertainties()
ReferenceData.ExportCollector('StandardizeReferencePattern_relative_standard_uncertainties', export_relative_uncertainties= True)
#Now to make the mixed reference pattern using a function that extends one reference pattern by another.
#The extendReferencePattern function uses existing add molecule and remove molecules functions.
#The function *automatically* does not extend by molecules that already exist.
ReferenceData, addedReferenceSlice = extendReferencePattern(OriginalReferenceData=ReferenceData,ReferenceDataToExtendBy=ReferenceDataExternalTuningCorrected)
#extendReferencePattern can't currently add the uncertainties sub-objects, so we do that in the lines below.
ReferenceData.ExportAtEachStep = G.ExportAtEachStep
if ReferenceData.referenceFileNameExtension == 'csv':
ReferenceData.exportReferencePattern('ExportedReferencePatternMixed.csv')
if ReferenceData.referenceFileNameExtension == 'tsv':
ReferenceData.exportReferencePattern('ExportedReferencePatternMixed.tsv')
#If specific molecules is on, then we need to potentially remove molecules from the mixed reference pattern, unless PreparingAllMoleculesReferenceDataList.
if PreparingAllMoleculesReferenceDataList == False: #THIS IS A GLOBAL FLAG. IT IS AN IMPLIED ARGUMENT.
if G.specificMolecules.lower() == "yes":
moleculesToRemove = []
for moleculeName in ReferenceData.molecules:
if moleculeName not in G.chosenMoleculesNames:
moleculesToRemove.append(moleculeName)
if len(moleculesToRemove) > 0:
ReferenceData = ReferenceData.removeMolecules(moleculesToRemove)
if ReferenceData.referenceFileNameExtension == 'csv':
ReferenceData.exportReferencePattern("ExportedReferencePatternMixedChosenMolecules.csv")
if ReferenceData.referenceFileNameExtension == 'tsv':
ReferenceData.exportReferencePattern("ExportedReferencePatternMixedChosenMolecules.tsv")
# else:
# if ReferenceDataList[ReferenceDataIndex].ExportAtEachStep == 'yes':
# ReferenceDataList[ReferenceDataIndex].ExportCollector('StandardizedReferencePattern' + str(ReferenceDataIndex), use_provided_reference_patterns=False)
# ReferenceDataList[ReferenceDataIndex].exportReferencePattern("TuningCorrectorMixedPattern" + str(ReferenceDataIndex) + ".csv")
# #Note: it is assumed that the relative_standard_uncertainties correspond to original reference, so before tuning corrector, thus we do not recalculate that factor.
# ReferenceData.ExportCollector('StandardizeReferencePattern_absolute_standard_uncertainties', export_standard_uncertainties= True)
# ReferenceData.update_relative_standard_uncertainties()
# ReferenceData.ExportCollector('StandardizeReferencePattern_absolute_standard_uncertainties', export_standard_uncertainties= True)
# ReferenceData.ExportCollector('StandardizeReferencePattern_relative_standard_uncertainties', export_relative_uncertainties= True)
else:
print("Error: Incompatible Tuning Choices Detected. Mixed Tuning Pattern is currently only compatible with changing the external pattern.")
if resetReferenceFileDesiredTuningAndForm == True: #Reset this variable if needed.
G.referenceFileDesiredTuningAndForm = []
return ReferenceData
#tuningCorrectorGasMixture will correct all items in the ReferenceDataList, but uses a single tuning correction that it applies to each of the ReferenceData objects in the list.
#This function takes a measured gas mixture set of signals, then creates simulated signals (from NIST patterns, for example) to get a tuning correction for the spectrometer.
def tuningCorrectorGasMixture(ReferenceDataList, G, ExperimentData=None): #making it clear that there are UserInput choices that are needed for this feature.
if ReferenceDataList[0].referenceFileNameExtension == 'csv':
ReferenceDataList[0].exportReferencePattern('ExportedReferencePatternOriginalAnalysis.csv')
if ReferenceDataList[0].referenceFileNameExtension == 'tsv':
ReferenceDataList[0].exportReferencePattern('ExportedReferencePatternOriginalAnalysis.tsv')
#First read in the existing tuning patterns.
#We don't use the function GenerateReferenceDataList because that function does more than just making a reference object.
ReferenceDataExistingTuning = createReferenceDataObject ( G.referenceFileExistingTuningAndForm[0],G.referenceFileExistingTuningAndForm[1], AllMID_ObjectsDict=G.AllMID_ObjectsDict)
ReferenceDataExistingTuning.ExportAtEachStep = G.ExportAtEachStep
#Copying what is in GenerateReferenceDataList and ReferenceInputPreProcessing, we need to add the following block of code in if we want to allow uncertainties.
if G.calculateUncertaintiesInConcentrations == True:
if type(G.referencePatterns_uncertainties) != type(None):
if type(G.referencePatterns_uncertainties) == type(float(5)) or type(G.referencePatterns_uncertainties) == type(int(5)) :
#TODO: Low priority. The below results in "nan" values. It could be better to change it to make zeros using a numpy "where" statement.
G.referencePatterns_uncertainties = float(G.referencePatterns_uncertainties) #Make sure we have a float.
#Get what we need.
provided_reference_patterns = ReferenceDataExistingTuning.provided_reference_patterns
provided_reference_patterns_without_masses = ReferenceDataExistingTuning.provided_reference_patterns[:,1:] #[:,0] is mass fragments, so we slice to remove those.
#Make our variables ready.
absolute_standard_uncertainties = provided_reference_patterns*1.0 #First we make a copy.
absolute_standard_uncertainties_without_masses = (provided_reference_patterns_without_masses/provided_reference_patterns_without_masses)*G.referencePatterns_uncertainties #We do the division to get an array of ones. Then we multiply to get an array of the same size.
#now we populate our copy's non-mass area.
absolute_standard_uncertainties[:,1:] = absolute_standard_uncertainties_without_masses
ReferenceDataExistingTuning.absolute_standard_uncertainties = absolute_standard_uncertainties
#We can't convert to relative uncertainties yet because the file may not be standardized yet.
if type(G.referencePatterns_uncertainties) == type('string'):
if '.csv' in G.referenceFileExistingTuningAndForm[0]:
provided_reference_patterns_absolute_uncertainties, electronnumbers, molecules, molecularWeights, SourceOfFragmentationPatterns, sourceOfIonizationData, relativeIonizationEfficiencies, moleculeIonizationType, mass_fragment_numbers_monitored, referenceFileName, form = readReferenceFile(G.referenceFileExistingTuningAndForm[0][:-4]+"_absolute_uncertainties.csv",G.referenceFileExistingTuningAndForm[1])
if '.tsv' in G.referenceFileExistingTuningAndForm[0]:
provided_reference_patterns_absolute_uncertainties, electronnumbers, molecules, molecularWeights, SourceOfFragmentationPatterns, sourceOfIonizationData, relativeIonizationEfficiencies, moleculeIonizationType, mass_fragment_numbers_monitored, referenceFileName, form = readReferenceFile(G.referenceFileExistingTuningAndForm[0][:-4]+"_absolute_uncertainties.tsv",G.referenceFileExistingTuningAndForm[1])
ReferenceDataExistingTuning.absolute_standard_uncertainties = provided_reference_patterns_absolute_uncertainties #Just initializing the variable before filling it properly.
maximum_absolute_intensities = numpy.amax(ReferenceDataExistingTuning.provided_reference_patterns[:,1:], axis = 0) #Find the maximum intensity for each molecule.
ReferenceDataExistingTuning.absolute_standard_uncertainties[:,1:] = 100*ReferenceDataExistingTuning.absolute_standard_uncertainties[:,1:]/maximum_absolute_intensities
if G.calculateUncertaintiesInConcentrations == True:
if type(G.referencePatterns_uncertainties) != type(None):
ReferenceDataExistingTuning.update_relative_standard_uncertainties()
#TODO: For above function call, Still need to put the last argument in later which is the ionization information: AllMID_ObjectsDict={})
#Currently, in a "regular" MSRESOLVE run, the calculation of the correction values normally occurs inside the ReferenceInputPreProcessing. That is a problem because
#the funciton ReferenceInputPreProcessing applies a tuning correction. So we need to get the CorrectionValues directly.
ReferenceDataExistingTuning.correction_values, ReferenceDataExistingTuning.correction_values_relative_uncertainties = CorrectionValuesObtain(ReferenceDataExistingTuning)
#We don't need a desired tuning data object right now, but we will make one since we'll need it for creating the mixed reference pattern later.
#By default, the regular reference data object will be considered the desired tuning, if not specified.
if len(G.referenceFileDesiredTuningAndForm) == 0:
#Take the first item from ReferenceDataList (which may have been extracted from the experiment file)
ReferenceDataDesiredTuning = ReferenceDataList[0]
if ReferenceDataDesiredTuning.referenceFileNameExtension == 'csv':
ReferenceDataDesiredTuning.exportReferencePattern("ExportedReferencePatternOriginal.csv") #Exported for during TuningCorrection.
referenceFileDesiredTuningAndForm = ["ExportedReferencePatternOriginal.csv", "xxxy"]
if ReferenceDataDesiredTuning.referenceFileNameExtension == 'tsv':
ReferenceDataDesiredTuning.exportReferencePattern("ExportedReferencePatternOriginal.tsv") #Exported for during TuningCorrection.
referenceFileDesiredTuningAndForm = ["ExportedReferencePatternOriginal.tsv", "xxxy"]
else:
ReferenceDataDesiredTuning = createReferenceDataObject ( G.referenceFileDesiredTuningAndForm[0],G.referenceFileDesiredTuningAndForm[1], AllMID_ObjectsDict=G.AllMID_ObjectsDict)
ReferenceDataDesiredTuning.ExportAtEachStep = G.ExportAtEachStep
#We are going to make the simulated gas mixture first, because that turns out to be easier (for data structure reasons) when we want to extract the gas mixture signals from the data.
#We need to make prepare an appropriately made list for the concentrations to use in simulating data using the ExistingTuningReferencePattern
#The concentrations must match the molecular order of the literature file.
# Firstly, the literature file can have molecules not part of the concentrations, so we should add concentrations of zero for those. (It's easier than removing those molecules from the literature object).
# Secondly, we need to re-order the concentrations to match the those of the existing reference pattern.
knownConcentrationsArray = list(numpy.zeros(len(ReferenceDataExistingTuning.molecules))) #initializing to number of molecules. Making it a list so we can insert later.
#The desired molecules list is the one in ReferenceDataExistingTuning.molecules, so we will loop across those.
for moleculeIndex,moleculeName in enumerate(ReferenceDataExistingTuning.molecules):
desiredConcentrationIndex = moleculeIndex
#Take the concentration if it's named:
if moleculeName in G.UserChoices['tuningCorrection']['tuningCorrectorGasMixtureMoleculeNames']:
currentConcentrationIndex = list(G.UserChoices['tuningCorrection']['tuningCorrectorGasMixtureMoleculeNames']).index(moleculeName)
knownConcentrationsArray[desiredConcentrationIndex] = G.UserChoices['tuningCorrection']['tuningCorrectorGasMixtureConcentrations'][currentConcentrationIndex]
else: #else set it to zero.
knownConcentrationsArray[desiredConcentrationIndex] = 0
#The first item in the concentrations array is supposed to be the time. We will simply put the integer "1" there, since we will have one point.
knownConcentrationsArray.insert(0, 1)
#knownConcentrationsArray needs to be nested in a numpy array for expected dimensionality when using RawSignalsSimulation
knownConcentrationsArray = numpy.array([knownConcentrationsArray])
#Before simulation, we also need the reciprocal_matching_correction_values array. In order to make the reciprocal_matching_correction_values array, we need to know which masses we need. We can actually just simulate all of the masses from the existingReferencePattern, and then let tuning corrector use whichever masses are useful
#Below we directly call Populate_reciprocal_matching_correction_values because PrepareReferenceObjectsAndCorrectionValues could potentially apply an undesired "before comparisons" tuning factor correction. We will need this done before we simulate any signals.
ReferenceDataExistingTuningMassFragments = ReferenceDataExistingTuning.standardized_reference_patterns[:,0]
ReferenceDataExistingTuning = Populate_reciprocal_matching_correction_values(ReferenceDataExistingTuningMassFragments, ReferenceDataExistingTuning)
#Now need to make the inputs for simulating raw signals of the gas mixture. A properly ordered and formatted concentration array, as well as properly formatted reciprocal_matching_correction_values.
#reciprocal_matching_correction_values needs to be nested in a numpy array for expected dimensionality when using RawSignalsSimulation
reciprocal_matching_correction_values_array = numpy.array([ReferenceDataExistingTuning.reciprocal_matching_correction_values])
#now need to do the actual simulation of the gas mixture signals.
simulateddata = RawSignalsSimulation(knownConcentrationsArray, reciprocal_matching_correction_values_array)
ExportXYYYData("TuningCorrectorGasMixtureHypotheticalSimulatedSignals.csv", simulateddata, ReferenceDataExistingTuningMassFragments, abscissaHeader = "Time", fileSuffix = G.iterationSuffix, dataType = 'simulated')
#now need to make a fake reference file for that. We'll make an MSReferenceObject and then use the internally built exportReferencePattern class function.
simulateddata_intensities = simulateddata[0][1:] #The "0" is because it's a nested object. THe slicing is becuase the abscissa (time) value needs to be removed.
#Now to transpose the simulateddata to make the simulated_reference_pattern
simulated_reference_pattern = numpy.vstack((ReferenceDataExistingTuningMassFragments, simulateddata_intensities)).transpose()
ReferenceDataTuningCorrectorGasMixtureSimulatedHypothetical = MSReference(simulated_reference_pattern, electronnumbers=[1], molecules=["GasMixture"], molecularWeights=[1], SourceOfFragmentationPatterns=["simulated"], sourceOfIonizationData=["referenceFileExistingTuningAndForm"], relativeIonizationEfficiencies=['GasMixture'], moleculeIonizationType=["GasMixture"]) #We fill some variables with the word "GasMixture" because there is no single ionization factor for the gas mixture.
ReferenceDataTuningCorrectorGasMixtureSimulatedHypothetical.referenceFileNameExtension = ReferenceDataList[0].referenceFileNameExtension #This needs to be populated since there was no filename initially.
if ReferenceDataTuningCorrectorGasMixtureSimulatedHypothetical.referenceFileNameExtension == 'csv':
ReferenceDataTuningCorrectorGasMixtureSimulatedHypothetical.exportReferencePattern("ExportedReferencePatternGasMixtureSimulatedHypothetical.csv")
if ReferenceDataTuningCorrectorGasMixtureSimulatedHypothetical.referenceFileNameExtension == 'tsv':
ReferenceDataTuningCorrectorGasMixtureSimulatedHypothetical.exportReferencePattern("ExportedReferencePatternGasMixtureSimulatedHypothetical.tsv")
if len(G.UserChoices['tuningCorrection']['tuningCorrectorGasMixtureSignals']) == 0: #if the length is zero, we will populate this with the max and min of the times.
G.UserChoices['tuningCorrection']['tuningCorrectorGasMixtureSignals'] = [ min(ExperimentData.times), max(ExperimentData.times)]
if len(G.UserChoices['tuningCorrection']['tuningCorrectorGasMixtureSignals']) == 2: #We assume in this case that we have a pair of times.
ReferenceDataTuningCorrectorGasMixtureMeasuredHypothetical = copy.deepcopy(ReferenceDataTuningCorrectorGasMixtureSimulatedHypothetical) #just initializing.
ReferenceDataTuningCorrectorGasMixtureMeasuredHypothetical.SourceOfFragmentationPatterns = ["measured"]
#Get the mass fragments we need.
expectedMassFragments = ReferenceDataTuningCorrectorGasMixtureSimulatedHypothetical.standardized_reference_patterns[:,0]
#need to get the masses in common between the experiment data and ReferenceDataExistingTuningMassFragments
commonMF = []
for massValue in numpy.array(ExperimentData.mass_fragment_numbers, dtype=float):
if massValue in numpy.array(expectedMassFragments, dtype=float): #the 0 is because this object is nested.
commonMF.append(massValue)
#Fill all of the fragments intensities with ones, this will make the extraction easier.
ReferenceDataTuningCorrectorGasMixtureMeasuredHypothetical.provided_reference_patterns[:,1:] = numpy.ones(numpy.shape(ReferenceDataTuningCorrectorGasMixtureMeasuredHypothetical.provided_reference_patterns[:,1:]))
ReferenceDataTuningCorrectorGasMixtureMeasuredHypothetical.provided_mass_fragments = ReferenceDataTuningCorrectorGasMixtureMeasuredHypothetical.provided_reference_patterns[:,0]
#note that the last two arguments need to be further nested.
ReferenceDataTuningCorrectorGasMixtureMeasuredHypothetical.standardized_reference_patterns,ReferenceDataTuningCorrectorGasMixtureMeasuredHypothetical_uncertainties = ExtractReferencePatternFromData(ExperimentData=ExperimentData, referenceData=ReferenceDataTuningCorrectorGasMixtureMeasuredHypothetical,rpcChosenMolecules=["GasMixture"],rpcChosenMoleculesMF=[commonMF],rpcTimeRanges=[G.UserChoices['tuningCorrection']['tuningCorrectorGasMixtureSignals']])
if ReferenceDataTuningCorrectorGasMixtureMeasuredHypothetical.referenceFileNameExtension == 'csv':
ReferenceDataTuningCorrectorGasMixtureMeasuredHypothetical.exportReferencePattern("ExportedReferencePatternGasMixtureMeasuredHypothetical.csv")
if ReferenceDataTuningCorrectorGasMixtureMeasuredHypothetical.referenceFileNameExtension == 'tsv':
ReferenceDataTuningCorrectorGasMixtureMeasuredHypothetical.exportReferencePattern("ExportedReferencePatternGasMixtureMeasuredHypothetical.tsv")
if G.tuningCorrection =='yes':
#we are using specific files for obtaining the tuning correction coefficients in the case of GasMixtureTuningCorrector
if ReferenceDataTuningCorrectorGasMixtureMeasuredHypothetical.referenceFileNameExtension == 'csv':
referenceFileExistingTuningAndForm=["ExportedReferencePatternGasMixtureSimulatedHypothetical.csv","XYYY"]
referenceFileDesiredTuningAndForm=["ExportedReferencePatternGasMixtureMeasuredHypothetical.csv", "XYYY"]
if ReferenceDataTuningCorrectorGasMixtureMeasuredHypothetical.referenceFileNameExtension == 'tsv':
referenceFileExistingTuningAndForm=["ExportedReferencePatternGasMixtureSimulatedHypothetical.tsv","XYYY"]
referenceFileDesiredTuningAndForm=["ExportedReferencePatternGasMixtureMeasuredHypothetical.tsv", "XYYY"]
abcCoefficients, abcCoefficients_cov = ABCDetermination(referenceFileExistingTuningAndForm,referenceFileDesiredTuningAndForm)
referenceCorrectionCoefficients = numpy.zeros(3)
referenceCorrectionCoefficients[0],referenceCorrectionCoefficients[1],referenceCorrectionCoefficients[2]= abcCoefficients
G.referenceCorrectionCoefficients = referenceCorrectionCoefficients #TODO: Maybe this logic should be changed, since it will result in an exporting of the last coefficients used, whether a person is doing forward tuning or reverse tuning.
referenceCorrectionCoefficients_cov = abcCoefficients_cov
G.referenceCorrectionCoefficients_cov = referenceCorrectionCoefficients_cov
ReferenceDataExistingTuningAfterCorrection = copy.deepcopy(ReferenceDataExistingTuning) #just initializing here, then actual tuning correction occurs in next lines.
#Tuning corrector does not operate on a ReferenceData object, it operates on standardized reference patterns.
ReferenceDataExistingTuningAfterCorrection.standardized_reference_patterns, ReferenceDataExistingTuningAfterCorrection.standardized_reference_patterns_tuning_uncertainties = TuningCorrector(ReferenceDataExistingTuning.standardized_reference_patterns, G.referenceCorrectionCoefficients, G.referenceCorrectionCoefficients_cov)
# TuningCorrectorGasMixtureCorrectedReferenceDataObject = MSReference(ReferenceDataExistingTuning.standardized_reference_patterns, electronnumbers=ReferenceDataExistingTuning.electronnumbers, molecules=ReferenceDataExistingTuning.molecules, molecularWeights=ReferenceDataExistingTuning.molecularWeights, SourceOfFragmentationPatterns=ReferenceDataExistingTuning.SourceOfFragmentationPatterns, sourceOfIonizationData=ReferenceDataExistingTuning.sourceOfIonizationData, relativeIonizationEfficiencies=ReferenceDataExistingTuning.relativeIonizationEfficiencies, moleculeIonizationType=ReferenceDataExistingTuning.moleculeIonizationType)
ReferenceDataExistingTuningAfterCorrection.addSuffixToSourceOfFragmentationPatterns("_TuningCorrected")
# #Now check if uncertainties already exist, and if they do then the two uncertainties need to be combined. Else, made equal.
if G.calculateUncertaintiesInConcentrations == True:
if type(G.referencePatterns_uncertainties) != type(None):
try:
ReferenceDataExistingTuningAfterCorrection.absolute_standard_uncertainties = (ReferenceDataExistingTuningAfterCorrection.absolute_standard_uncertainties**2 + ReferenceDataExistingTuningAfterCorrection.standardized_reference_patterns_tuning_uncertainties**2)**0.5
except:
ReferenceDataExistingTuningAfterCorrection.absolute_standard_uncertainties = ReferenceDataExistingTuningAfterCorrection.standardized_reference_patterns_tuning_uncertainties
ReferenceDataExistingTuningAfterCorrection.update_relative_standard_uncertainties()
#TuningCorrector un-standardizes the patterns, so the patterns have to be standardized again.
ReferenceDataExistingTuningAfterCorrection.standardized_reference_patterns=StandardizeReferencePattern(ReferenceDataExistingTuningAfterCorrection.standardized_reference_patterns,len(ReferenceDataExistingTuningAfterCorrection.molecules))
if ReferenceDataExistingTuningAfterCorrection.referenceFileNameExtension == 'csv':
ReferenceDataExistingTuningAfterCorrection.exportReferencePattern('ExportedReferencePatternExternalTuningCorrected.csv')
if ReferenceDataExistingTuningAfterCorrection.referenceFileNameExtension == 'tsv':
ReferenceDataExistingTuningAfterCorrection.exportReferencePattern('ExportedReferencePatternExternalTuningCorrected.tsv')
###The below code block is probably deprecated###
# #Copying what is in GenerateReferenceDataList and ReferenceInputPreProcessing, we need to add the following block of code in if we want to allow uncertainties.
# if G.calculateUncertaintiesInConcentrations == True:
# if type(G.referencePatterns_uncertainties) != type(None):
# if type(G.referencePatterns_uncertainties) == type(float(5)) or type(G.referencePatterns_uncertainties) == type(int(5)) :
# #TODO: Low priority. The below results in "nan" values. It could be better to change it to make zeros using a numpy "where" statement.
# G.referencePatterns_uncertainties = float(G.referencePatterns_uncertainties) #Maks sure we have a float.
# #Get what we need.
# provided_reference_patterns = ReferenceDataExistingTuningAfterCorrection.provided_reference_patterns
# provided_reference_patterns_without_masses = ReferenceDataExistingTuningAfterCorrection.provided_reference_patterns[:,1:] #[:,0] is mass fragments, so we slice to remove those.
# #Make our variables ready.
# absolute_standard_uncertainties = provided_reference_patterns*1.0 #First we make a copy.
# absolute_standard_uncertainties_without_masses = (provided_reference_patterns_without_masses/provided_reference_patterns_without_masses)*G.referencePatterns_uncertainties #We do the division to get an array of ones. Then we multiply to get an array of the same size.
# #now we populate our copy's non-mass area.
# absolute_standard_uncertainties[:,1:] = absolute_standard_uncertainties_without_masses
# ReferenceDataExistingTuningAfterCorrection.absolute_standard_uncertainties = absolute_standard_uncertainties
# #We can't convert to relative uncertainties yet because the file may not be standardized yet.
# if type(G.referencePatterns_uncertainties) == type('string'):
# provided_reference_patterns_absolute_uncertainties, electronnumbers, molecules, molecularWeights, SourceOfFragmentationPatterns, sourceOfIonizationData, relativeIonizationEfficiencies, moleculeIonizationType, mass_fragment_numbers_monitored, referenceFileName, form = readReferenceFile("ReferenceDataExistingTuningAfterCorrection.csv"[:-4]+"_absolute_uncertainties.csv","xyyy")
# TuningCorrectorGasMixtureCorrectedReferenceDataObject.absolute_standard_uncertainties = provided_reference_patterns_absolute_uncertainties #Just initializing the variable before filling it properly.
# maximum_absolute_intensities = numpy.amax(TuningCorrectorGasMixtureCorrectedReferenceDataObject.provided_reference_patterns[:,1:], axis = 0) #Find the maximum intensity for each molecule.
# TuningCorrectorGasMixtureCorrectedReferenceDataObject.absolute_standard_uncertainties[:,1:] = 100*TuningCorrectorGasMixtureCorrectedReferenceDataObject.absolute_standard_uncertainties[:,1:]/maximum_absolute_intensities
###The above code block is probably deprecated###