Skip to content

Visualization

jbedia edited this page Oct 20, 2014 · 3 revisions

Visualization of uncertainty

# Install packages downscaleR and ecomsUDG.Raccess
devtools::install_github(c("SantanderMetGroup/downscaleR.java@stable",
                           "SantanderMetGroup/downscaleR@stable",
                           "SantanderMetGroup/ecomsUDG.Raccess@stable"))
require(ecomsUDG.Raccess)
require(verification)
require(RColorBrewer)

loginECOMS_UDG('username', 'password')

var <- "tas"
year.ini <- 1991
year.end <- 2000
year.target <- 2002
season <- c(12,1,2)
lagged.season <- c(8,9,10,11,12,1,2)
lead.month <- 3
members <- 1:9
lonlim <- c(-10,5)
latlim <- c(35,45)

# Load predictions
prd <- loadECOMS(dataset = "System4_seasonal_15", var=var,
  lonLim=lonlim, latLim=latlim, season=season, years=year.ini:year.end,
  leadMonth=lead.month, members=members, time="DD"
)

# Load observations
obs <- loadECOMS(dataset = "WFDEI", var=var,
  lonLim=lonlim, latLim=latlim, season=season, years=year.ini:year.end
)
# Interpolate observations onto the predictions grid
obs <- interpGridData(obs, new.grid = getGrid(prd), method = "nearest")

Tercile plot*

*This function is already integrated in downscaleR

tercileValidation(prd, obs = obs)

Tercile plot

Fan chart (absolute values)

year.filter <- getYearsAsINDEX(prd) == year.target
prd.spavg <- apply(prd$Data[ ,year.filter, , ], MARGIN = 1:2, FUN = mean) # Spatial avg
ens.quant <- apply(prd.spavg, MARGIN = 2, FUN = quantile, probs = c(0.0, .25, .5, .75, 1.0))
dates <- as.POSIXlt(prd$Dates$start[year.filter], tz = "GMT")
par(bg = "white", mar = c(4, 4, 1, 1))
plot(dates, ens.quant[3, ], ylim = range(prd.spavg), ty = 'n', ylab = "tas - Daily Mean", xlab = "time")
polygon(x = c(dates, rev(dates)), y = c(ens.quant[1, ], rev(ens.quant[5, ])), border = "transparent", col = rgb(0, 0, 1, .4))
polygon(x = c(dates, rev(dates)), y = c(ens.quant[2, ], rev(ens.quant[4, ])), border = "transparent", col = rgb(0, 0, 1, .4))
lines(dates, ens.quant[3,], lwd=2)

Fan chart

Lagged plot

lagged.data <- array(data = NaN, dim = c(11, 5))
lags <- lagged.season
for (lead in 5:1) {
  ref <- loadECOMS(dataset = "System4_seasonal_15", var = var,
                   lonLim = lonlim, latLim = latlim, season = lags, years = year.ini:year.end,
                   leadMonth = 0, members = members, time = "DD"
  )
  target <- loadECOMS(dataset = "System4_seasonal_15", var=var,
                      lonLim=lonlim, latLim=latlim, season=lags, years=year.target,
                      leadMonth=0, members=members, time="DD"
  )
  # Monthly means
  target.means <- monmean(target, MARGIN=c(1, 3, 4))
  # Member and spatial mean
  target.means <- apply(target.means, MARGIN = 1, FUN = mean)
  # Monthly anomalies (drift removal on monthly basis)
  ref.means <- monmean(ref, MARGIN = c(1, 3, 4))
  ref.means <- apply(ref.means, MARGIN = 1, FUN = mean)
  target.anom <- target.means - ref.means
  # fill matrix (properly ordered! watch out for seasons crossing Dec-Jan)  
  lagged.data[(1:7) + 5 - lead, lead] <- target.anom[as.character(lags)]
  # next lead month
  lags <- lags + 1
  lags <- ifelse(lags > 12, lags - 12, lags)
}
# Plot the lagged plot (image)
pal <- brewer.pal(11, "RdBu")
maxz <- max(abs(lagged.data), na.rm = TRUE)
par(bg = "lightgray",mar = c(1, 4.5, 2.5, 1))
image(1:11, 0:4, lagged.data, col = pal, zlim = c(-maxz, maxz), ylab = "lead time (months)")
mtext(text = lagged.season, side = 3, at = 1:7, line = 1)
abline(v = c(4.5, 7.5), lwd = 2)

Lagged plot

Bubble plot with uncertainty

nyears <- year.end - year.ini + 1
iyear <- which(year.ini:year.end == year.target)
member.dim <- margin.dim(prd, 'member')
time.dim <- margin.dim(prd, 'time')
lon.dim <- margin.dim(prd, 'lon')
lat.dim <- margin.dim(prd, 'lat')
lon <- prd$xyCoords$x
lat <- prd$xyCoords$y

# Transform both data to the same grid (model grid)
O <- interpGridData(obs, new.grid = getGrid(prd), method = "nearest")
#
# Terciles for the forecasts
#
# yearmean changes the data dimension. time.dim is in the first dimension!!
y.mean <- yearmean(prd, MARGIN = c(lat.dim, lon.dim, member.dim))
terciles <- apply(y.mean, MARGIN = c(2, 3, 4), FUN = quantile, c(1/3, 2/3))
# Compute the probability for each tercile
t.u <- array(data = NaN, dim = dim(y.mean)[1:3])
t.l <- array(data = NaN, dim = dim(y.mean)[1:3])
t.m <- array(data = NaN, dim = dim(y.mean)[1:3])
prob <- array(data = NaN, dim = c(3, dim(y.mean)[1:3]))
for (i in seq(1, dim(y.mean)[1])) {
  t.u[i,,] <- apply(y.mean[i,,,] > terciles[2,,,], MARGIN = c(1, 2), FUN = sum) / nmembers(prd)
  t.l[i,,] <- apply(y.mean[i,,,] < terciles[1,,,], MARGIN = c(1, 2), FUN = sum) / nmembers(prd)
  t.m[i,,] <- 1 - t.u[i,,] - t.l[i,,]
  prob[1,i,,] <- t.l[i,,]
  prob[2,i,,] <- t.m[i,,]
  prob[3,i,,] <- t.u[i,,]
}
# Maximum probability from the terciles
max.prob <- apply(prob, MARGIN = c(2, 3, 4), FUN = max)
# Tercile for the maximum probability from the terciles
t.max.prob <- apply(prob, MARGIN=c(2, 3, 4), FUN = which.max)
#
# Terciles for the observations
#
obs.y.mean <- apply(O$Data, MARGIN = c(2, 3), FUN = ndex.mean, INDEX = years(obs))
obs.terciles <- apply(obs.y.mean, MARGIN = c(2, 3), FUN = quantile, c(1/3, 2/3), na.rm = TRUE)    
obs.t.u <- array(data = NaN, dim = dim(obs.y.mean))
obs.t.l <- array(data = NaN, dim = dim(obs.y.mean))
obs.t.m <- array(data = NaN, dim = dim(obs.y.mean))
for (i in seq(1,dim(obs.y.mean)[1])) {
  obs.t.u[i,,] <- (obs.y.mean[i,,] > obs.terciles[2,,])
  obs.t.l[i,,] <- (obs.y.mean[i,,] < obs.terciles[1,,])
  obs.t.m[i,,] <- (obs.y.mean[i,,] >= obs.terciles[1,,] & obs.y.mean[i,,] <= obs.terciles[2,,])
}
obs.t <- obs.t.u * 1 + obs.t.l * -1 # 1 upper tercile, 0 middle tercile, -1 lower tercile
#
# Filter points with observations in model data 
#
# Select a year and remove NaN cases detected for the observations. 
v.max.prob <- as.vector(max.prob[iyear,,])
v.t.max.prob <- as.vector(t.max.prob[iyear,,])
v.nans <- complete.cases(as.vector(obs.t[iyear,,]))
df <- data.frame(max.prob=v.max.prob[v.nans], t.max.prob=v.t.max.prob[v.nans])
df$color <- "black"
df$color[df$t.max.prob == 3] <- "red"
df$color[df$t.max.prob == 2] <- "gold"
df$color[df$t.max.prob == 1] <- "blue"
latlon <- as.matrix(expand.grid(lat, lon))
nn.latlon <- latlon[v.nans, ]
#
# Compute score (ROCSS \in [-1,1]])
#
v.score <- c()
count <- 1
for (ilon in seq(1,length(lon))) {
  for (ilat in seq(1,length(lat))) {
    n.nan <- sum(is.na(obs.t[,ilat,ilon]))
    if (n.nan == 0) {
      # Compute the score only for the tercile with maximum probability
      select.tercile <- t.max.prob[iyear,ilat,ilon] 
      if (select.tercile == 1){
        res <- roc.area(obs.t.l[ ,ilat,ilon], t.l[ ,ilat,ilon])
        v.score[count] <- res$A * 2 - 1
      } 
      if (select.tercile == 2){
        res <- roc.area(obs.t.m[ ,ilat, ilon], t.m[ ,ilat, ilon])
        v.score[count] <- res$A * 2 - 1
      }
      if (select.tercile == 3){
        res <- roc.area(obs.t.u[ ,ilat, ilon], t.u[ ,ilat, ilon])
        v.score[count] <- res$A * 2 - 1
      }      
      count <- count + 1
    }
  }  
}
# Select positive score values from negative values (circles and squares in the plot)
pos.val <- v.score >= 0
neg.val <- v.score < 0
# Bubble plot
par(bg = "white", mar = c(3, 3, 1, 5))
plot(nn.latlon[pos.val, 2], nn.latlon[pos.val, 1], cex = df$max.prob[pos.val] * 3,
  col = alpha(df$color[pos.val], v.score[pos.val]), pch=19, xlab="", ylab="")
  #col=df$color[pos.val], pch=19, xlab="", ylab=""
)
points(nn.latlon[neg.val,2], nn.latlon[neg.val,1], pch=4, cex=0.75)
map("world", ylim=range(lat), xlim=range(lon), col="black", lwd=1, interior=FALSE, add=TRUE)
par(fig = c(0, 1, 0, 1), oma = c(0, 0, 0, 0), mar = c(0, 0, 0, 0), new = TRUE)
plot(0, 0, type = "n", bty = "n", xaxt = "n", yaxt = "n")
legend('right', c("T1","T2","T3","Negat.\nScore"), pch=c(19,19,19,4), col=c("blue","gold","red","black"),
  inset=c(0,0),xpd=T,bty="n"
)

Bubble plot

Clone this wiki locally