Skip to content

Commit

Permalink
Update functions.R
Browse files Browse the repository at this point in the history
  • Loading branch information
HSalat authored Jun 16, 2021
1 parent dea8741 commit f68ab6f
Showing 1 changed file with 65 additions and 38 deletions.
103 changes: 65 additions & 38 deletions functions.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
# Most of the functions in this sheet are ad-hoc variants of usual functions created to have more control over hidden details when trying different methods.
# The actual clustering algorithm used in the article corresponds to the last section: AutoClustering


################
### Gridding ###
################


# This section contains utility functions to ready data for rasterisation.
# <country> is a bounding box around the studied area
# <holes> are pixels that are known to not be land officially belonging to the Country (e.g. Ocean, Gambia)
# These were later replaced by tools taken directly from the R raster package

griddingNation <- function(X,Y,Z,country,holes = NULL,fun="mean"){
# X and Y are vectors of coordinates, Z is a vecotr of values at these coordinates
# "country" must be a vector containing: (minLong,maxLong,minLat,maxLat,resX,resY)
# resX/resY is expressed as the number of cells contained in the grid
l <- length(X)
Expand All @@ -23,6 +33,7 @@ griddingNation <- function(X,Y,Z,country,holes = NULL,fun="mean"){
return(result)
}

# In principle deprecated
randtoGrid <- function(X,Y,Z,MX,MY,fun)
# X: 1st coord, Y: 2nd coord, Z: values,
# MX = c(min,max,nbr of X intervals), MY=c(min,max,nbr of Y intervals)
Expand Down Expand Up @@ -51,7 +62,7 @@ randtoGrid <- function(X,Y,Z,MX,MY,fun)
return(result)
}

# Griding the data: fixed boundaries
# Griding the data: fixed boundaries; in principle deprecated
newrandtoGrid <- function(X,Y,Z,MX,MY,fun){
# X: 1st coord, Y: 2nd coord, Z: values,
# MX = nbr of X intervals, MY = nbr of Y intervals
Expand Down Expand Up @@ -81,6 +92,7 @@ newrandtoGrid <- function(X,Y,Z,MX,MY,fun){
return(result)
}

# Deprecated function to aggregate spatially.
GridingAgg <- function(InitGrid,multp,startpos=1)
# 'multp' is a vector containing the new grid unit sizes as multiples of the initial unit size
# 'startpos' indicates from which corner to start (counted clockwise)
Expand Down Expand Up @@ -117,6 +129,9 @@ GridingAgg <- function(InitGrid,multp,startpos=1)
### Data preparation ###
########################


# Likely deprecated functions to load and apply basic preparation to the data

monthlyText <- function(i,folderin){
ref <- data.frame(tower=1:1666)
temp <- read.csv(paste(folderin,"SET1/SET1S_",i,".csv",sep=""),header = F)
Expand Down Expand Up @@ -169,7 +184,8 @@ hourlyVoice <- function(i,folderin){
### Plotting ###
################



# Function to plot several ggplots side by side
multiplot <- function(...,plotlist=NULL,file,cols=1,layout=NULL) {
library(grid)
plots <- c(list(...),plotlist)
Expand All @@ -191,6 +207,7 @@ multiplot <- function(...,plotlist=NULL,file,cols=1,layout=NULL) {
}
}

# Function to visualise matrices, useful to do color plots of correlation matrices
plotgrid <- function(grid,log=F){
meltedgrid <- melt(grid)
if(log==T){
Expand All @@ -205,6 +222,8 @@ plotgrid <- function(grid,log=F){
### Voronoi ###
###############


# Simple definition of voronoi neighbourhood
voronoi <- function(i,j,ref){
d <- sqrt((ref[,1]-i)^2+(ref[,2]-j)^2)
return(c(which.min(d),ref[which.min(d),]))
Expand All @@ -230,7 +249,7 @@ avg_voronoi <- function(grid){
return(result)
}

# [Aggregation:] voronoi compared to ref
# [Aggregation:] voronoi compared to ref of centres
avg_voronoi_ref <- function(grid,ref){
result <- matrix(0,nrow=nrow(grid),ncol=ncol(grid))
whichm <- matrix(0,nrow=nrow(grid),ncol=ncol(grid))
Expand All @@ -251,7 +270,7 @@ avg_voronoi_ref <- function(grid,ref){
return(list(result,meanr))
}

# [Aggregation:] voronoi compared to ref
# [Aggregation:] voronoi compared to ref of centres that defaults to NA instead of 0 when no value is found
avg_voronoi_ref_NA <- function(grid,ref){
result <- matrix(NA,nrow=nrow(grid),ncol=ncol(grid))
whichm <- matrix(0,nrow=nrow(grid),ncol=ncol(grid))
Expand Down Expand Up @@ -279,10 +298,14 @@ avg_voronoi_ref_NA <- function(grid,ref){
return(list(result,meanr,sumr,countr))
}


#######################
##### Correlation #####
#######################


# Simple automation of Poisson fittings and R^2 computation

pearson <- function(D1,D2){
a1 <- mean(D1)
a2 <- mean(D2)
Expand All @@ -297,6 +320,7 @@ rsq <- function(a,b,subset=1:length(a),l=1){
}
}

# Poisson fittings adapted to the data
rsqP <- function(a,b,c=NULL,subset=1:length(a)){
if(!is.null(c)){
temp <- glm(a[subset] ~ log(b[subset]) + log(c[subset]),family = poisson(link = "log"),na.action = na.exclude)
Expand All @@ -307,6 +331,7 @@ rsqP <- function(a,b,c=NULL,subset=1:length(a)){
}
}

# Linear fittings adapted to the data
rsqL <- function(a,b,c=NULL,subset=1:length(a)){
temp <- lm(log(a[subset]) ~ log(b[subset]) + log(c[subset]),na.action = na.exclude)
rsq(a,exp(temp$coefficients[1])*b^temp$coefficients[2]*c^temp$coefficients[3])
Expand All @@ -330,6 +355,9 @@ adHoc <- function(aa,bb,m=100,by=0.01){
##### Multifractals #####
#########################


# Used only for side projects

boxCounting <- function(M,S)
{
nr <- nrow(M)
Expand Down Expand Up @@ -457,6 +485,9 @@ MMoment_MGBox <- function(grid,sizes,q,dd,neigh=1)
######################


# Custom functions to create distance matrices

# Based on Sd of the difference between two curves
distMatSd <- function(df){
l <- nrow(df)
m <- matrix(0,nrow=l,ncol=l)
Expand All @@ -472,6 +503,7 @@ distMatSd <- function(df){
return(m)
}

# Based on direct correlation between two curves (point by point)
distMatCor <- function(df){
l <- nrow(df)
m <- matrix(0,nrow=l,ncol=l)
Expand All @@ -487,6 +519,7 @@ distMatCor <- function(df){
return(m)
}

# Likely deprecated; for testing only
customClust <- function(df,num){
l <- nrow(df)
m <- matrix(0,nrow=l,ncol=l)
Expand Down Expand Up @@ -517,10 +550,14 @@ customClust <- function(df,num){
return(result)
}


##########################################
##### Variable coefficients analysis #####
##########################################


# Only useful for side projects

bin <- function(data,n){
vect <- rep(list(NULL),n)
int <- (max(data)-min(data))/n
Expand Down Expand Up @@ -571,12 +608,15 @@ multidensAnalysis2 <- function(n){
estimate2 <- dens^(fit2$coefficients[1]+dens*fit2$coefficients[2])
return(cor(night,estimate2)^2)
}



######################################
##### Distance Matrix Clustering #####
######################################


# Updated functions to create distance matrices, see above

distMatSd <- function(df){
l <- nrow(df)
m <- matrix(0,nrow=l,ncol=l)
Expand Down Expand Up @@ -641,11 +681,14 @@ distMatCorNA <- function(df,n){
return(1-m)
}


##############################
##### Network Clustering #####
##############################


# creates a feature table to apply clustering (useful for network clustering). See article to identify all the features used.

clustFeat <- function(G,t,dist){
# G is the original graph.
# t is the edge threshold(s) to build subgraph. Please start with 0 to include the original graph.
Expand Down Expand Up @@ -709,7 +752,9 @@ radian.rescale <- function(x, start=0, direction=1) {
##########################
##### Autoclustering #####
##########################



# These functions are meant to apply autoimatically the purity-based method of clustering (see SI)

# Functions:
# autoClust(data,var,t,comp = NULL,method.clust = 'complete',method.purity = 'bins')
Expand All @@ -732,14 +777,6 @@ radian.rescale <- function(x, start=0, direction=1) {
# <time> Length of considered period: 'D', 'W' or 'Y'
# <unit> Tower ('T') or Voronoi cell ('V'). Currently, only Voronoi is supported.



#######
####### Functions
#######



# Cluster purity based on bins
ispure1 <- function(dend,var,comp,t){
test <- FALSE
Expand Down Expand Up @@ -784,27 +821,23 @@ autoClust <- function(data,var,t,comp = NULL,method.clust = 'complete',method.pu
}
}

##### New dens
nameClust <- function(mpd,t,comp = NULL,dist = 'Sd',time = 'D',unit = "V",var = 'dens',method.clust = 'complete',method.purity = 'bins'){
data <- read.csv(file = paste("Data/z_dMat_",time,dist,"_",mpd,"_",unit,".csv",sep=""),header = F)
data <- as.matrix(data)
colnames(data) <- 1:nrow(data)
if(var == 'dens' & unit == 'V'){
var <- vorData$dens
}else if(var == 'night' & unit == 'V'){
var <- vorData$nightlight
}else if(var == 'dens' & unit == 'T'){
var <- densV
}else if(var == 'night' & unit == 'T'){
var <- elecV
}
return(autoClust(data,var,t,comp,method.clust,method.purity))
#nameClust <- function(mpd,t,comp = NULL,dist = 'Sd',time = 'D',unit = "V",var = 'dens',method.clust = 'complete',method.purity = 'bins'){
# data <- read.csv(file = paste("Data/z_dMat_",time,dist,"_",mpd,"_",unit,".csv",sep=""),header = F)
# data <- as.matrix(data)
# colnames(data) <- 1:nrow(data)
# if(var == 'dens' & unit == 'V'){
# var <- vorData$dens
# }else if(var == 'night' & unit == 'V'){
# var <- vorData$nightlight
# }else if(var == 'dens' & unit == 'T'){
# var <- densV
# }else if(var == 'night' & unit == 'T'){
# var <- elecV
# }
# return(autoClust(data,var,t,comp,method.clust,method.purity))
}


#####


# From names to clustered
nameClust <- function(mpd,t,comp = NULL,dist = 'Sd',time = 'D',unit = "V",var = 'dens',method.clust = 'complete',method.purity = 'bins'){
data <- read.csv(file = paste("Data/z_dMat_",time,dist,"_",mpd,"_",unit,".csv",sep=""),header = F)
Expand Down Expand Up @@ -895,12 +928,6 @@ autoExpctdFeatP <- function(mpd,t,frame,comp,unit = "V",var = 'dens',method.clus
return(frame)
}

N = 1
frame = clustExpectedDens
var='night',unit="")

range(nightV)

autoExpctdFeatNetP <- function(N,t,frame,comp,unit = "T",var = 'dens',method.clust = 'complete',method.purity = 'bins'){
data <- read.csv(file = paste("Data/featureNetSub",unit,as.character(N),".csv",sep=""),header=F)
data <- dist(data)
Expand Down

0 comments on commit f68ab6f

Please sign in to comment.