-
Notifications
You must be signed in to change notification settings - Fork 0
/
weibulls_platHistCopy.R
498 lines (467 loc) · 26 KB
/
weibulls_platHistCopy.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
### Fit many weibulls and produce weibull plots
# Define Functions: #
CensUncens <- function(df) {
# Transforms a subset of the repair/removals TOW data into a format digestible for fitdistcens() functions
# Args:
# df: a subset of the repair/removals data as broken down by a ddply() call according to specific and general classifiers
# Out:
# newdf: the repair/removals data as a two-column dataframe with just the TOW
if(dim(subset(df,CENS==0))[1]==0) {censdf<-NULL # set to null b/c empty
} else { censdf<-data.frame(subset(df,CENS==0)$TOW,NA)
colnames(censdf)<-c("left","right")}
if(dim(subset(df,CENS==1))[1]==0) {uncensdf<-NULL # set to null b/c empty
} else {uncensdf<-data.frame(subset(df,CENS==1)$TOW,subset(df,CENS==1)$TOW)
colnames(uncensdf)<-c("left","right")}
newdf<-rbind(censdf,uncensdf)
(newdf)
} # end CensUncens
getWeibullFromDF <- function(df, plot=FALSE, catgs="",modkm, plotdir,unbug) {
# Outputs a row with fitted weibulls and can generate a weibull plot in the specified directory
# Args:
# df: a subset of the removals/repair TOW data as broken down by a previous call to ddply()
# plot: boolean if the function should call getPlotFromDF to save a plot
# catgs: text string of all the specific classifiers used in the previous steps to pass df to this function
# modkm: boolean if the function should calculate the modified kaplan meier plot points; required for plotting
# determines whether to calculate anderson darling statistics
# plotdir: directory to store plots
# unbug: boolean if unbug remarks should be printed to console
# Out:
# out: a row with weibull & exponential paramaters and goodness of fit info for one set of classifiers
# ddply will append this to the classifiers columns to make a row of the final output
# calls getPlotFromDF to make a plot, if option is on
#make sure the passed data frame has enough data: at least three failure/removal/events
# (anything less may give errors in fitting the weibull with mle method)
if(length(df[df$CENS==1,8])<3) {
# with too little data: report the number of events and give NAs for everything else
out<-data.frame(NA,NA,NA,NA,NA,0,0,NA,NA,NA,NA,NA,NA,NA)
colnames(out)<-c("shape","scale","shapeSE","scaleSE","MeanTime","Events","Censored","NLogLik","BetaTestPval","AD*weib","AD*expo","AD*logn","AD*norm","ExpoScale")
out[1,6]<-length(df[df$CENS==1,8])
out[1,7]<-length(df[df$CENS==0,8])
(out) # no plot generated
} else {
#shape the newTOW table into a two-column dataframe with time on wing and censored information for fitdistcens()
newdf<-CensUncens(df)
# make sure the causal failures are not all the same number; otherwise the weibull is horizontal and impossible to fit
if(length(unique(newdf[,2][complete.cases(newdf[,2])]))==1) {
# with too little data: report the number of events and give NAs for everything else
out<-data.frame(NA,NA,NA,NA,NA,0,0,NA,NA,NA,NA,NA,NA,NA)
colnames(out)<-c("shape","scale","shapeSE","scaleSE","MeanTime","Events","Censored","NLogLik","BetaTestPval","AD*weib","AD*expo","AD*logn","AD*norm","ExpoScale")
out[1,6]<-length(df[df$CENS==1,8])
out[1,7]<-length(df[df$CENS==0,8])
(out) # no plot generated
} else {
#calculate a weibull from that new 2-column dataframe
if(unbug){print(df[1,])}
if(unbug){print(dim(newdf))}
options(warn=-1)
weib<-fitdistcens(newdf,"weibull")
options(warn=0)
#find the modified Kaplan Meier rank and the fitted values
if (modkm) {
rankedpoints<-getModKM(newdf,weib[[1]]) # takes cens/uncens TOW and weibull parameters
}
#find the exponential parameters and the beta = 1 p value
betaout<-BetaTest(newdf,weib$loglik)
#calculate the Anderson Darling Adjusted statistics for weibull,exponential,lognormal and normal distributions
if (modkm) {
ADstats<-calcADAvals(rankedpoints,newdf,betaout[[2]])
} else {ADstats<-data.frame(NA,NA,NA,NA)} # finding modkm is slow so option out
#save a plot of the weibull. this requires modkm
if (plot) {
getPlotFromDF(rankedpoints, head(df), weib[[1]], catgs, plotdir, unbug) #ranked points, sample of input data, break-out categories
}
# save the scale and shape parameters & their standard errors
# & # of events and beta-test-p-value in a dataframe
out<-cbind(t(data.frame(weib[1])),t(data.frame(weib[2])))
out<-as.data.frame(out)
out[1,5]<-weib[[1]][2]*gamma(1+1/weib[[1]][1]) # mean time b/w events (scale*gamma(1+1/shape))
out[1,6]<-length(is.na(newdf$right)[is.na(newdf$right)==F]) # uncensored events
out[1,7]<-length(is.na(newdf$right)[is.na(newdf$right)==T]) # censored
out[1,8]<-weib$loglik # negative log likelihood of the weibull fit (will be used to compare fits)
out[1,9]<-betaout[[1]] # get the Log-Likelihood Ratio Test p-value for Beta=1 test (approximation of Minitab Wald Test p-value)
out[1,10]<-ADstats[1] # weibull AD* statistic
out[1,11]<-ADstats[2] # exponential AD* statistic
out[1,12]<-ADstats[3] # lognormal AD* statistic
out[1,13]<-ADstats[4] # normal AD* statistic
out[1,14]<-betaout[[2]] # exponential scale (1/rate) parameter
colnames(out)<-c("shape","scale","shapeSE","scaleSE","MeanTime","Events","Censored","NLogLik","BetaTestPval","AD*weib","AD*expo","AD*logn","AD*norm","ExpoScale")
(out)
} # end if there are different values in the causal events
} # end if there are enough data
} # end getWeibullFromDF function
# Find the p value of the test comparing the weibull fit to the exponential fit
# takes as inputs: censored and uncensored times and the negative log likelihood of the weibull fit
BetaTest<-function(input,weibnll) {
expofit<-fitdistcens(input,"weibull",start=list(scale=median(input$left)),fix.arg=list(shape=1)) # most likely expo fit to work
teststat<-2*(weibnll-expofit$loglik)
pvalue<-1-pchisq(teststat,1)
(list(pval=pvalue,param=expofit[[1]]))
}
# this will always break out by all classifiers (must give it classifier names)
# uses GetWeibullsFromDF and CensUncens functions
gatherallweibulls <- function(weibulls_table,verbose=FALSE,unbug=FALSE,plot=FALSE,modkm=FALSE,plotdir=getwd()){
if(plot==TRUE & modkm==FALSE) {stop("modified kaplan meier statistics are required for plotting")}
if(verbose==FALSE){print("Please Wait")}
# Six classifier columns including WUC, which is always specified/included
# Total of 5 Choose 5:0 combinations for a total of 32 calls to calloneddply()
classcolnames<-c("WUC","LOCATION","PN","NHA_PN","LAST_REPAIR","REPAIR_COUNT")
numcolnames<-c("shape", "scale", "shapeSE", "scaleSE", "MeanTime", "Events", "Censored",
"NLogLik","BetaTestPval", "AD*weib", "AD*expo", "AD*logn","AD*norm","ExpoScale")
cls<-length(classcolnames) # number of classifier columns, including WUC
first <- 1 # initialize a dummy binary variable to start or append to a data frame
weibs<-data.frame() # initialize an empty data frame to hold output
for(ii in seq(from=0,to=(cls-1))) { # loop once per classifier column [except wuc]
# call combn() one time during an interation of this outer loop
mat<-combn(cls:2,ii) # gives all the ways to choose 'ii' digits out of quantity of classifier columns less one [b/c wuc is always in]
for(jj in seq_len(dim(mat)[2])) {
# call calloneddply() once per column of the combn output
weibs1<-calloneDDPLY(weibulls_table,include=sort(c(1,mat[,jj])),plots=plot,classcolnames,numcolnames,modkm,plotdir,verbose,unbug)
if(first==1) {
weibs<-weibs1 # create
first<-0
} else {
weibs<-rbind(weibs,weibs1)
} # end else
} # end inner for
} # end outer for
# convert description columns to factors
for (jj in seq(from=2,to=length(classcolnames))){
weibs[,jj]<-factor(weibs[,jj])
}
(weibs)
} # end gatherallweibulls
calloneDDPLY<-function(weibulls_table,include,plots,classifiernames,numericalnames,modkm,plotdir,verbose,unbug) {
includenames<-classifiernames[include]
excludenames<-classifiernames[-include]
if(verbose){print(includenames)}
# Define classifiers as one long string: (for loop is a fail, but I'm stumped)
classifiers<-as.character("") # initialize
for (ii in seq(include)) {
classifiers<-paste(classifiers,includenames[ii],sep="")
}
# Call the ddply
weibs<-ddply(weibulls_table,includenames,getWeibullFromDF,plot=plots,catgs=classifiers,modkm,plotdir,unbug)
# Add the "ALL" description columns to the end of the data frame
for (ii in seq(excludenames)) {
weibs[,length(weibs)+1]<-"ALL"
names(weibs)[length(weibs)]<-excludenames[ii]
}
# Move description columns to the correct order
weibs<-weibs[,c(classifiernames,numericalnames)]
(weibs)
} # end calloneDDPLY
# plot a probability graph plot of the events and their fit
# overlayed with a histogram of the censored values
# called from within getweibullsfromdf
# both: modKM-ranked points for all repairs
# df: sample of original data frame with classifiers
# params: two parameters for the weibull distribution
# catgs: grouping categories passed to ddply to define which factors to not use "ALL"
# uses Modified Kaplan-Meier method for plotting uncensored and censored data
getPlotFromDF<- function(both, df, params, catgs, plotdir, unbug=FALSE) {
# assign titles for the plot
title.WUC<-as.character(df$WUC[1])
title.name<-as.character(wucnames[wucnames$WUC==title.WUC,2])
# this looks if the different classifiers are specific (broken out) or general (all)
# there is almost certainly a better way to do this, but...
title.Loc<-ifelse(strsplit(catgs,"LOCATION")==catgs,"ALL",as.character(df$LOCATION[1]))
title.Loc<-ifelse(title.Loc=="N/A","NA",title.Loc) # png name can't have a slash in it
# distinguishing between nha_pn and pn is more involved: first check for nha_pn
title.NPN<-ifelse(strsplit(catgs,"NHA_PN")==catgs,"ALL",as.character(df$NHA_PN[1]))
# then split on nha_pn and check what wasn't split for PN or check both sides for PN
a<-strsplit(paste0(" ",catgs," "),"NHA_PN")[[1]] # have to get the first element of the list that strsplit returns
b<-0
if(length(a)==1){ # if nha_pn not found, check catgs string
title.PN<-ifelse(strsplit(catgs,"PN")==catgs,"ALL",as.character(df$PN[1]))}
else {
for(ii in 1:2){ifelse(strsplit(a[ii],"PN")[[1]]==a[ii],b<-b+0,b<-b+1)}; # if b==0 then no match
if(b>0){title.PN<-as.character(df$PN[1])}
else {title.PN<-"ALL"}
}
title.RC<-ifelse(strsplit(catgs,"REPAIR_COUNT")==catgs,"ALL",as.character(df$REPAIR_COUNT[1]))
title.LR<-ifelse(strsplit(catgs,"LAST_REPAIR")==catgs,"ALL",as.character(df$LAST_REPAIR[1]))
# Plot
op<-par() # save the plot parameters
#dots
png(file = paste(plotdir,"WeibullPlot._WUC_",title.WUC,";Loc_",title.Loc,";PN_",title.PN,";RepInt_",
title.RC,".png",sep=""), width=5.96, height=4.54, units="in", res=144)
par(mar=c(3.6, 4, 4, 2) + 0.1) # push the plot down a little
weibplot(both$tow[which(both$cens==1)],both$p[which(both$cens==1)],forcexlim=c(.9,log10(max(both$tow))+.02),
forceylim=c(-7,0), xlab="",ylab="Percent",col=rgb(255/255,0/255,0/255),pch=16,
main=paste("Weibull Removal Plot\n",title.name,
ifelse(title.Loc=="ALL"," - All Locations",ifelse(title.Loc=="NA"," - No Majority Location",paste(" - ",title.Loc))),
"\n",ifelse(title.PN=="ALL","All Part Numbers - ",paste("PN ",title.PN," - ")),
ifelse(title.RC=="ALL","All Removal Intervals",paste("Removal Interval ",title.RC))))
title(sub="Flight Hours",line=2)
abline(h=c(log(-log(1-c(.001,.01,.1)))),v=c(c(seq(-1,3))),col=rgb(1,0,0)) # Red
abline(h=c(log(-log(1-c(.003,.02,.05,.25,.5,.75,.9,.96,.99,.999)))),col=rgb(38/255,201/255,38/255))
abline(v=c(log10(seq(7,9)),log10(seq(20,90,10)),log10(seq(200,900,100)),log10(seq(2000,9000,1000))),col=rgb(38/255,201/255,38/255)) # Green
#fitted line
line.41<-qweibull(seq(0.0001,.99,.0005),params[1],params[2],log.p=F)
lines(x=log10(line.41),y=log(-log(1-seq(.0001,.99,.0005))))
#legend
legend(x="topleft",legend=c(paste("Shape: ",round(params[1],digits=3)),paste("Scale: ",round(params[2],digits=3)),
paste("Mean: ",round(params[2]*gamma(1+1/params[1]),digits=3)),paste("Observed: ",length(both$p[which(both$cens==1)])),
paste("Censored: ",length(both$p[which(both$cens==0)]))),cex=.85,ncol=1,inset=c(-0.05,-0.03))
#histogram (if there are censored values)
if(length(both$p[which(both$cens==0)])==0) { # just plot a zero at median of non-censored hours
text(x=log10(median(both$tow[which(both$cens==1)])),y=-7,labels="0")
} else { # plot the histogram
par(new=T,mar=c(3.14,4.1,4.1,2.1)) #5.96 x 4.54 to line up the histogram to the bottom of the plot area
histholder<-hist(log10(both$tow[which(both$cens==0)]),breaks="Sturges",plot=F) # to get the bin heights and counts
histplot<-hist(log10(both$tow[which(both$cens==0)]),breaks="Sturges",xlim=c(.9,log10(max(both$tow))+.02),ylim=c(0,max(histholder$counts)*2.2),border=1,col=rgb(149/255,184/255,251/255),ylab=NULL,xlab=NULL,main=NULL,labels=T,axes=F,plot=T)
} # end if there is enough data to plot the histogram
options(warn=-1)
par(op)
options(warn=0)
invisible(dev.off())
} # end plot Function
# gives a data frame with the modified Kaplan Meier ranks and
# both Censored and non-Censored data
# takes a data frame with "left" and "right" columns from CensUncens
# function and the weibull parameters from the fit
getModKM <- function(towtimes,params) {
cens<-data.frame(tow=sort(towtimes$left[is.na(towtimes$right)==T]),
fitprob=pweibull(sort(towtimes$left[is.na(towtimes$right)==T]),params[1],params[2]))
uncens<-data.frame(tow=sort(towtimes$left[is.na(towtimes$right)==F]),
fitprob=pweibull(sort(towtimes$left[is.na(towtimes$right)==F]),params[1],params[2]))
if (length(cens[,1])>0) { # combine both data frames if there are both censored and
# non-censored data points. Otherwise just use the non-censored data
both<-rbind(cbind(cens,cens=0),cbind(uncens,cens=1))
} else { both<-cbind(uncens,cens=1)} # no way there could be zero non-censored data points
# b/c there are tests before this function is called in the getWeibullsFromDf function
both<-both[order(both$tow),]
both$rank<-seq(1,length(both$tow)) # rank censored and uncensored data together
# calculate Modified Kaplan-Meier rank (just the non-censored events) with two for-loops
for(jj in 1:length(both$tow)) { # find intermediate value
both$p_intmd[jj]<-((length(both$tow)-jj)^both$cens[jj])/((length(both$tow)-jj+1)^both$cens[jj])
#if(b==1){print(jj);b<-1000}else{b<-b-1}
} # end make p intermediate
for(jj in 1:length(both$tow)) { # calculate p rank (y-axis value)
both$p_prime[jj]<-1-prod(both$p_intmd[1:jj]) # p prime
if (jj==1) {
both$p[jj]<-1-((1-both$p_prime[jj])+1)/2
} else {
both$p[jj]<-1-((1-both$p_prime[jj])+(1-both$p_prime[jj-1]))/2
} # end if it's the first row
} # end calculate p rank (y-axis value)
(both) # return both
} # end getModKM
# return a data frame with the four anderson darling adjusted statistics
# takes "both" data frame with modified Kaplan Meier ranks (empirical dist) and weibull fit
# as well as original cens/uncens data and the exponential parameters
calcADAvals<-function(rankedpts, origTOW, expoparam) {
# calculate the two missing distributions
normalfit<-fitdistcens(origTOW,"norm")
lognormalfit<-fitdistcens(origTOW,"lnorm")
#only take the plot (noncensored) points
rankedpts<-rankedpts[rankedpts$cens==1,]
rankedpts$rank<-seq_len(length(rankedpts[,1]))#recalculate the rank
#find AD for the weibull distribution
weibAD<-getOneADstat(rankedpts)
#recalculate the fits for the exponential distribution
rankedpts$fitprob<-pexp(q=rankedpts$tow,rate=1/expoparam[1]) # takes the rate
#and find the AD stat for this
expoAD<-getOneADstat(rankedpts)
# do the same for Normal and Lognormal distributions
rankedpts$fitprob<-plnorm(rankedpts$tow,meanlog=lognormalfit$estimate[1],sdlog=lognormalfit$estimate[2])
lognormAD<-getOneADstat(rankedpts)
rankedpts$fitprob<-pnorm(rankedpts$tow,mean=normalfit$estimate[1],sd=normalfit$estimate[2])
normalAD<-getOneADstat(rankedpts)
#output all four GOF stats
out<-c(weibAD,expoAD,lognormAD,normalAD)
} # end calculate anderson darling statistics function
# gets an anderson darling adjusted stat
# requires the "both" dataframe with the right fitprob for that distribution
# as well as the empirical modified Kaplan Meier value plot points (p)
getOneADstat<-function(rankedpts) {
lgth<-length(rankedpts$p)
# use the findArow function to get A values of each row, then add the n+1th row A value
A<-sum(apply(X=as.matrix(rankedpts),MARGIN=1,FUN=findArow,fitprob=rankedpts$fitprob)) - (1-1E-12) - log(1-(1-1E-12)) +
rankedpts$fitprob[lgth] + log(1-rankedpts$fitprob[lgth])
B<-sum(apply(X=as.matrix(rankedpts),MARGIN=1,FUN=findBrow,fitprob=rankedpts$fitprob,nonpar=rankedpts$p)) +
2*log(1-(1-1E-12))*rankedpts$p[lgth] - 2*log(1-rankedpts$fitprob[lgth])*rankedpts$p[lgth]
C<-sum(apply(X=as.matrix(rankedpts),MARGIN=1,FUN=findCrow,fitprob=rankedpts$fitprob,nonpar=rankedpts$p)) +
log(1-(1E-12))*rankedpts$p[lgth]^2 -
log(1-(1-(1E-12)))*rankedpts$p[lgth]^2 -
log(rankedpts$fitprob[lgth])*rankedpts$p[lgth]^2 +
log(1-rankedpts$fitprob[lgth])*rankedpts$p[lgth]^2
out<-lgth*(A+B+C)
}
# returns a column for the "both" data frame: A in the AD* (anderson darling adjusted) calculation.
# to be used in the AD* function and with apply by row; needs the "both" data frame and the fitprob column from "both"
findArow<-function(input,fitprob) {
if (input[4]==1) { # first row is special
out<- (-input[2]) - log(1-input[2]) + 0 + log(1-0)
} else { # not the first row
out<- (-input[2]) - log(1-input[2]) + fitprob[input[4]-1] + log(1-(fitprob[input[4]-1]))
} # end not the first row
} # end find A
# returns a column for the "both" data frame: B in the AD* calculation
findBrow<-function(input,fitprob,nonpar) {
if (input[4]==1) { # first row is special
out<- 0
} else { # not the first row
out<- 2*log(1-input[2])*nonpar[input[4]-1] - 2*log(1-fitprob[input[4]-1])*nonpar[input[4]-1]
} # end not the first row
} # end find B
# returns a column for the "both" data frame: C in the AD* calculation
findCrow<-function(input,fitprob,nonpar) {
if (input[4]==1) { # first row is special
out<- 0
} else { # not the first row
out<- log(input[2])*nonpar[input[4]-1]^2 - log(1-input[2])*nonpar[input[4]-1]^2 -
log(fitprob[input[4]-1])*nonpar[input[4]-1]^2 + log(1-fitprob[input[4]-1])*nonpar[input[4]-1]^2
} # end not the first row
} # end find C
# testA<-apply(X=as.matrix(ranksubdf),MARGIN=1,FUN=findArow,fitprob=ranksubdf$fitprob)
# testB<-apply(X=as.matrix(ranksubdf),MARGIN=1,FUN=findBrow,fitprob=ranksubdf$fitprob,nonpar=ranksubdf$p)
# testC<-apply(X=as.matrix(ranksubdf),MARGIN=1,FUN=findCrow,fitprob=ranksubdf$fitprob,nonpar=ranksubdf$p)
## plot
# plot(uncens$tow,uncens$yval,log="xy")
# http://rtricks.wordpress.com/2009/10/30/decimal-log-scale-on-a-plot/
#log-scale example:
# op<-par()
# lx <- seq(1,5, length=41)
# yl <- expression(e^{-frac(1,2) * {log[10](x)}^2})
# y <- exp(-.5*lx^2)
# par(mfrow=c(2,1), mar=par("mar")+c(0,1,0,0))
# plot(10^lx, y, log="xy", type="l", col="purple",
# + main="Log-Log plot", ylab=yl, xlab="x")
# plot(10^lx, y, log="xy", type="o", pch='.', col="forestgreen",
# + main="Log-Log plot with custom axes", ylab=yl, xlab="x",
# + axes = FALSE, frame.plot = TRUE)
# my.at <- 10^(1:5)
# axis(1, at = my.at, labels = formatC(my.at, format="fg"))
# at.y <- 10^(-5:-1)
# axis(2, at = at.y, labels = formatC(at.y, format="fg"), col.axis="red")
# par(op)
###plotting functions###
#fix the scale to Weibull:
# draw the plot:
weibplot <- function(x,y,log='xy',...,forceylim=c(0,0),forcexlim=c(0,0))
{
x <- log(x,10)
y <- log(-log(1-y))
xlg = TRUE # hard-coded for now
ylg = TRUE
yl <- ifelse(forceylim==c(0,0),range(y),forceylim)
xl <- ifelse(forcexlim==c(0,0),range(x),forcexlim)
plot(x,y,...,axes=FALSE,ylim=yl,xlim=xl)
if(xlg){drawlogaxis(1,xl)}else{axis(1,at=pretty(xl),labels=pretty(xl))}
if(ylg){drawweibaxis()}else{axis(2,at=pretty(yl),labels=pretty(yl))}
box()
}
# draw the axes:
drawlogaxis <- function(side,range) {
par(tck=0.02)
mlog <- floor(min(range))
Mlog <- ceiling(max(range))
SeqLog <- c(mlog:Mlog)
Nlog <- (Mlog-mlog)+1
axis(side,at=SeqLog,labels=10^SeqLog)
ats <- log(seq(from=2,to=9,by=1),10)
mod <- NULL
for(i in SeqLog)
{
mod <- c(mod,rep(i,length(ats)))
}
ats <- rep(ats,Nlog)
ats <- ats+mod
par(tck=0.02/3)
axis(side,at=ats,labels=NA)
}
drawweibaxis <- function() {
par(tck=0.02)
SeqWeib <- c(.001,.003,.01,.02,.05,.1,.25,.5,.75,.9,.96,.99,.999)
axis(2,labels=SeqWeib,at=(log(-log(1-SeqWeib))),las=2)
}
####################################################################
#not code to use, but workspace to hold on to good things
# subset
# str(((allweibulls[allweibulls$WUC=="06A",])[allweibulls[allweibulls$WUC=="06A",]$Loc=="ALL",])[(allweibulls[allweibulls$WUC=="06A",])[allweibulls[allweibulls$WUC=="06A",]$Loc=="ALL",]$adjRepInt=="ALL",])
# # better
# str(((allweibulls[allweibulls$WUC=="06A" & allweibulls$Loc=="ALL" & allweibulls$PN=="7-311310001-41",])))
#
#
# newdf<-CensUncens(weibulls_initial[weibulls_initial$WUC=="06A" & weibulls_initial$PN=="7-311310001-41",])
# cens<-data.frame(tow=sort(newdf$left[is.na(newdf$right)==T]),
# rank=seq_along(1:length(newdf$left[is.na(newdf$right)==T])),
# yval=(seq_along(1:length(newdf$left[is.na(newdf$right)==T]))-.5)
# /(length(newdf$left[is.na(newdf$right)==T])),
# yval2=(seq_along(1:length(newdf$left[is.na(newdf$right)==T]))-.3)
# /(length(newdf$left[is.na(newdf$right)==T]+.4)),
# fitprob=pweibull(sort(newdf$left[is.na(newdf$right)==T]),1.204782,870.6467))
# uncens<-data.frame(tow=sort(newdf$left[is.na(newdf$right)==F]),
# rank=seq_along(1:length(newdf$left[is.na(newdf$right)==F])),
# yval=(seq_along(1:length(newdf$left[is.na(newdf$right)==F]))-.5)
# /(length(newdf$left[is.na(newdf$right)==F])),
# yval2=(seq_along(1:length(newdf$left[is.na(newdf$right)==F]))-.3)
# /(length(newdf$left[is.na(newdf$right)==F]+.4)),
# fitprob=pweibull(sort(newdf$left[is.na(newdf$right)==F]),1.204782,870.6467))
# weibull.41<-fitdistcens(newdf,"weibull")
# normal.41<-fitdistcens(newdf,"norm")
# lognormal.41<-fitdistcens(newdf,"lnorm")
# expo.41<-fitdistcens(newdf,"weibull",start=c(shape=1))
#
# ### combine censored and uncensored data and find the Modified Kaplan-Meier rank
# both<-rbind(cbind(cens,cens=0),cbind(uncens,cens=1))
# both<-both[order(both$tow),]
# both$rank<-seq(1,length(both$tow))
# for(jj in 1:length(both$tow)) {
# both$p_intmd[jj]<-((length(both$tow)-jj)^both$cens[jj])/((length(both$tow)-jj+1)^both$cens[jj])
# }
# for(jj in 1:length(both$tow)) {
# both$p_prime[jj]<-1-prod(both$p_intmd[1:jj])
# if (jj==1) {
# both$p[jj]<-1-((1-both$p_prime[jj])+1)/2
# } else {
# both$p[jj]<-1-((1-both$p_prime[jj])+(1-both$p_prime[jj-1]))/2
# }
# }
#
#
# ###TESTING###
#
# op<-par()
# #dots & line
# png(file = "WeibullPlot.png", width=5.96, height=4.54, units="in", res=72)
# par(op)
# weibplot(both$tow[which(both$cens==1)],both$p[which(both$cens==1)],forcexlim=c(.9,3.2),forceylim=c(-7,0),xlab="Hours Elapsed",ylab="Percent",col=rgb(255/255,0/255,0/255),pch=16,main="WUC 06A; Loc ALL; PN 7-31130001-41; RepInt ALL")
# #weibplot(uncens$tow,uncens$yval,forcexlim=c(.9,3.2),forceylim=c(-7,0),xlab="Hours Elapsed",ylab="Percent",col=rgb(255/255,0/255,0/255),pch=16)
# abline(h=c(log(-log(1-c(.001,.01,.1)))),v=c(c(seq(-1,3))),col=rgb(1,0,0))
# abline(h=c(log(-log(1-c(.003,.02,.05,.25,.5,.75,.9,.96,.99,.999)))),col=rgb(38/255,201/255,38/255))
# abline(v=c(log10(seq(8,9)),log10(seq(20,90,10)),log10(seq(200,900,100))),col=rgb(38/255,201/255,38/255))
# line.41<-qweibull(seq(0.0001,.99,.0005),weibull.41$estimate[1],weibull.41$estimate[2],log.p=F)
# lines(x=log10(line.41),y=log(-log(1-seq(.0001,.99,.0005))))
# #histogram
# # par(new=T,mar=c(3.32,4.1,4.1,2.1)) (full screen on my second monitor)
# par(new=T,mar=c(4.58,4.1,4.1,2.1)) #5.96 x 4.54 to line up the histogram to the bottom of the plot area
# thishist<-hist(log10(cens$tow),breaks="FD",xlim=c(.9,3.2),ylim=c(0,100),border=1,col=rgb(149/255,184/255,251/255),ylab=NULL,xlab=NULL,main=NULL,labels=T,axes=F)
# #thishist<-hist(log10(cens$tow),breaks="FD",density=19,angle=c(45,135),xlim=c(.9,3.2),ylim=c(0,100),border=1,col=c(rgb(61/255,61/255,255/255),rgb(149/255,184/255,251/255)),ylab=NULL,xlab=NULL,main=NULL,labels=T,axes=F)
# dev.off()
#
# subdf<-weibulls_initial[weibulls_initial$WUC=="06A" & (weibulls_initial$PN=="7-311310001-41" | weibulls_initial$PN=="7-311310001-43") ,]
# subdf<-weibulls_initial[weibulls_initial$WUC=="06A" & weibulls_initial$PN=="7-311310001-43" ,]
# subdf<-droplevels(subdf)
# censsubdf<-CensUncens(subdf)
# weibsubdf<-fitdistcens(censsubdf,"weibull")
# heresone<-fitdistcens(censsubdf,"weibull",)
# expsubdf<-fitdistcens(censsubdf,"weibull",start=list(scale=median(censsubdf$right,na.rm=T)),fix.arg=list(shape=1))
# lnmsubdf<-fitdistcens(censsubdf,"lnorm")
# nmsubdf<-fitdistcens(censsubdf,"norm",start=list(mean=650))
# ranksubdf<-modKM(censsubdf,weibsubdf[[1]])
# getPlotFromDF(ranksubdf,head(subdf),weibsubdf[[1]],"WUCadjRepInt")
# BetaTest(censsubdf,weibsubdf$loglik)
#
# weibstest<-getWeibullsFromDF(subdf,0,"WUCadjRepInt")
#
# betaout<-BetaTest(censsubdf,weibsubdf[[1]])
# rankedpoints <- getModKM(censsubdf, weibsubdf[[1]])
# ADstats <- calcADAvals(rankedpoints, censsubdf, betaout[[2]])
#
# # For Survival objects with packages Surv: 0=alive, 1=dead
#
# info<-matrix(data=c(176.5095883,-37569.56341,-37569.56341,200250529.1),nrow=2,byrow=T)
#
# ####