-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpostprocessing.R
68 lines (64 loc) · 2.44 KB
/
postprocessing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#rep.data.frame <- function(x, times) {
# rnames <- attr(x, "row.names")
# x <- lapply(x, rep.int, times = times)
# class(x) <- "data.frame"
# if (!is.numeric(rnames))
# attr(x, "row.names") <- make.unique(rep.int(rnames, times))
# else
# attr(x, "row.names") <- .set_row_names(length(rnames) * times)
# x
#}
fixCommas<-function(specs){
commas<-grepl(" , ",specs$specno)
test<-subset(specs,commas)
fixedRows<-do.call(rbind.data.frame,apply(test,1,splitByString,string=" , ",names=colnames(test)))
specimensFixed<-rbind(specs[commas==F,],subset(fixedRows,fixedRows$fixed==T)[,-8])
ands<-grepl("and ",specimensFixed$specno)
test<-subset(specimensFixed,ands)
fixedRows<-do.call(rbind.data.frame,apply(test,1,splitByString,string="and ",names=colnames(test)))
specimensFixed<-rbind(specimensFixed[ands==F,],subset(fixedRows,fixedRows$fixed==T)[,-8])
specimensFixed<-subset(specimensFixed,specimensFixed$specno!="")
return(specimensFixed)
}
splitByString<-function(specrow,string,names){
numbers<-unlist(strsplit(unlist(specrow["specno"]),string))
columns<-length(specrow)
specrow.df<-as.data.frame(t(specrow))
if(abs(max(nchar(numbers))-min(nchar(numbers)))<3){
temp<-setNames(data.frame(matrix(nrow=length(numbers),ncol=columns+1)),c(names,"fixed"))
temp[,1:columns]<-specrow.df
temp$specno<-numbers
temp$fixed<-T
} else {
temp<-setNames(data.frame(matrix(nrow=2,ncol=columns+1)),c(names,"fixed"))
temp[,1:columns]<-specrow.df
temp$fixed<-F
}
return(temp)
}
fixSequences<-function(specs){
dashes<-grepl(" - ",specs$specno)
test<-subset(specs,dashes)
fixedRows<-do.call(rbind.data.frame,apply(test,1,expandSeq,names=colnames(test)))
specimensFixed<-rbind(specs[dashes==F,],subset(fixedRows,fixedRows$fixed==T)[,-8])
return(specimensFixed)
}
expandSeq<-function(specrow,names){
ends.char<-unlist(strsplit(specrow["specno"]," - "))
ends<-suppressWarnings(as.numeric(ends.char))
columns<-length(specrow)
specrow.df<-as.data.frame(t(specrow))
if(!any(is.na(ends)) & abs(max(ends)-min(ends))<200){
numbers<-seq(ends[1],ends[2])
#temp<-rep.data.frame(specrow,length(numbers))
temp<-setNames(data.frame(matrix(nrow=length(numbers),ncol=columns+1)),c(names,"fixed"))
temp[,1:columns]<-specrow.df
temp$specno<-numbers
temp$fixed<-T
} else {
temp<-setNames(data.frame(matrix(nrow=2,ncol=columns+1)),c(names,"fixed"))
temp[,1:columns]<-specrow.df
temp$fixed<-F
}
return(temp)
}