forked from z0on/2bRAD_denovo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfastStructurePlotting_functions.R
72 lines (69 loc) · 1.95 KB
/
fastStructurePlotting_functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#############################
ggplotStructure=function(str,q){
# ggplotStructure:
# infile: input fastStructure table; assuming the second column has population labels
# qfile: table with .meanQ output; must have column headers
require(ggplot2)
pops=str[seq(1,nrow(str),2),2]
inds=str[seq(1,nrow(str),2),1]
struc5=q
s5=stack(struc5)
s5$indiv=rep(1:nrow(struc5))
# s5$indiv=rep(inds)
s5$pops=rep(pops)
plo=ggplot(s5,aes(factor(indiv),values))+geom_bar(stat="identity",width=1,aes(fill=factor(ind)))+theme_bw()+scale_x_discrete(labels=pops)+xlab("pops")
return(plo)
}
###############################
JSD.pair <- function(x, y){
###Function to compute Shannon-Jensen Divergence
###x and y are the frequencies for the same p categories
u <- x/sum(x)
v <- y/sum(y)
m <- (u+v)/2
if (all(u*v>0)){
d <- (u*log(u/m)+v*log(v/m))/2
} else {
P1 <- u*log(u/m)
P2 <- v*log(v/m)
P1[is.nan(P1)] <- 0
P2[is.nan(P2)] <- 0
d <- (P1+P2)/2
}
return(sum(d))
}
##############################
matchPops=function(ga, gb, niter=3000) {
### function to match population identifiers between fastStructure runs
### based on permutations of column names and Shannon-Jensen divergences
minsum=1000
for (i in 1:niter) {
names(gb)=sample(names(gb))
sumjsd=0
for (n in names(ga)) {
sumjsd=sumjsd+JSD.pair(ga[,n],gb[,n])
}
if (sumjsd<minsum) {
minsum=sumjsd
gbnames=names(gb)
}
}
return(list("pops"=gbnames,"min.JSD"=minsum))
}
##############################
averageBest=function(likelihoods,top=25) {
# matches populations assignments among best-likelihood runs,
# averages assignemnt probabilities, returns averaged meanQ table
bests=head(likes[order(likes[,2],decreasing=T),1],top)
gs=read.table(paste(bests[1],".meanQ",sep=""))
g1=gs
print("top 1")
for (b in 2:top) {
gn=read.table(paste(bests[b],".meanQ",sep=""))
names(gn)=matchPops(g1,gn)$pops
print(paste("top",b))
gs=gs+gn[,names(g1)]
}
return(gs/top)
}
###############################