Skip to content

Commit

Permalink
Fixed dependency on local timezone; replaced with UTC
Browse files Browse the repository at this point in the history
  • Loading branch information
JuKo007 committed May 23, 2023
1 parent efcdfd9 commit 1c3f752
Show file tree
Hide file tree
Showing 44 changed files with 373 additions and 329 deletions.
32 changes: 16 additions & 16 deletions R/create_chatlog.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
#' @param n_diff_smilies Number of different smilies that are used in the simulated chat.
#' @param n_media Number of messages that contain omitted media files. Must be smaller or equal to n_messages.
#' @param n_sdp Number of messages that contain self-deleting photos. Must be smaller or equal to n_messages.
#' @param startdate Earliest possible date for messages. Format is 'dd.mm.yyyy'. Timestamps for messages are created automatically between startdate and enddate.
#' @param enddate Latest possible date for messages. Format is 'dd.mm.yyyy'. Timestamps for messages are created automatically between startdate and enddate.
#' @param startdate Earliest possible date for messages. Format is 'dd.mm.yyyy'. Timestamps for messages are created automatically between startdate and enddate. Input is interpreted as UTC
#' @param enddate Latest possible date for messages. Format is 'dd.mm.yyyy'. Timestamps for messages are created automatically between startdate and enddate. Input is interpreted as UTC
#' @param language Parameter for the language setting of the exporting phone. Influences structure of system messages
#' @param time_format Parameter for the time format setting of the exporting phone (am/pm vs. 24h). Influences the structure of timestamps.
#' @param os Parameter for the operating system setting of the exporting phone. Influences the structure of timestamps and WhatsApp system messages.
Expand Down Expand Up @@ -87,20 +87,20 @@ create_chatlog <- function(n_messages = 150,
}

# validating startdate
startdate_check <- try(as.Date(startdate, format = "%d.%m.%Y"))
startdate_check <- try(as.Date(startdate, format = "%d.%m.%Y", tz = "UTC"))
if ("try-error" %in% class(startdate_check) || is.na(startdate_check)) {
print("Variable 'startdate' musst be a character string of format dd.mm.YYYY")
}

# validating enddate
enddate_check <- try(as.Date(enddate, format = "%d.%m.%Y"))
enddate_check <- try(as.Date(enddate, format = "%d.%m.%Y", tz = "UTC"))
if ("try-error" %in% class(enddate_check) || is.na(enddate_check)) {
print("Variable 'enddate' musst be a character string of format dd.mm.YYYY")
}

# validate that startdate is before enddate
sDate <- as.POSIXct(as.Date(startdate, format = "%d.%m.%Y"))
eDate <- as.POSIXct(as.Date(enddate, format = "%d.%m.%Y"))
sDate <- as.POSIXct(as.Date(startdate, format = "%d.%m.%Y", tz = "UTC"), tz = "UTC")
eDate <- as.POSIXct(as.Date(enddate, format = "%d.%m.%Y", tz = "UTC"), tz = "UTC")

if (sDate >= eDate) {
warning("starting date must be earlier than ending date.")
Expand Down Expand Up @@ -172,8 +172,8 @@ create_chatlog <- function(n_messages = 150,

# Timestamp function (taken from: https://stackoverflow.com/questions/42021394/random-incremental-timestamp-in-r)
RandomTimeStamp <- function(M, sDate = startdate, eDate = enddate) {
sDate <- as.POSIXct(as.Date(sDate, format = "%d.%m.%Y"))
eDate <- as.POSIXct(as.Date(eDate, format = "%d.%m.%Y"))
sDate <- as.POSIXct(as.Date(sDate, format = "%d.%m.%Y", tz = "UTC"), tz = "UTC")
eDate <- as.POSIXct(as.Date(eDate, format = "%d.%m.%Y", tz = "UTC"), tz = "UTC")
dTime <- as.numeric(difftime(eDate, sDate, units = "sec"))
sTimeStamp <- sort(runif(M, 0, dTime))
TimeStamp <- sDate + sTimeStamp
Expand All @@ -188,29 +188,29 @@ create_chatlog <- function(n_messages = 150,
if (language == "german") {
if (os == "android") {
if (time_format == "24h") {
ts <- strftime(ts, format = "%d.%m.%y, %H:%M - ")
ts <- strftime(ts, format = "%d.%m.%y, %H:%M - ", tz = "UTC")
} else {
ts <- strftime(ts, format = "%d.%m.%y, %I:%M %p - ")
ts <- strftime(ts, format = "%d.%m.%y, %I:%M %p - ", tz = "UTC")
}
} else {
if (time_format == "24h") {
ts <- strftime(ts, format = "[%d.%m.%y, %H:%M:%S] ")
ts <- strftime(ts, format = "[%d.%m.%y, %H:%M:%S] ", tz = "UTC")
} else {
ts <- strftime(ts, format = "[%m/%d/%y, %I:%M:%S %p] ")
ts <- strftime(ts, format = "[%m/%d/%y, %I:%M:%S %p] ", tz = "UTC")
}
}
} else {
if (os == "android") {
if (time_format == "24h") {
ts <- strftime(ts, format = "%m/%d/%y, %H:%M - ")
ts <- strftime(ts, format = "%m/%d/%y, %H:%M - ", tz = "UTC")
} else {
ts <- strftime(ts, format = "%m/%d/%y, %I:%M %p - ")
ts <- strftime(ts, format = "%m/%d/%y, %I:%M %p - ", tz = "UTC")
}
} else {
if (time_format == "24h") {
ts <- strftime(ts, format = "[%m/%d/%y, %H:%M:%S] ")
ts <- strftime(ts, format = "[%m/%d/%y, %H:%M:%S] ", tz = "UTC")
} else {
ts <- strftime(ts, format = "[%m/%d/%y, %I:%M:%S %p] ")
ts <- strftime(ts, format = "[%m/%d/%y, %I:%M:%S %p] ", tz = "UTC")
}
}
}
Expand Down
39 changes: 22 additions & 17 deletions R/plot_emoji.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
#' @description Plots four different types of graphs for the emoji contained in a parsed WhatsApp chat log. Returns dataframe used for plotting if desired.
#' @param data A WhatsApp chat log that was parsed with \code{\link[WhatsR]{parse_chat}}.
#' @param names A vector of author names that the plots will be restricted to.
#' @param starttime Datetime that is used as the minimum boundary for exclusion. Is parsed with \code{\link[anytime]{anytime}}. Standard format is "yyyy-mm-dd hh:mm".
#' @param endtime Datetime that is used as the maximum boundary for exclusion. Is parsed with \code{\link[anytime]{anytime}}. Standard format is "yyyy-mm-dd hh:mm".
#' @param starttime Datetime that is used as the minimum boundary for exclusion. Is parsed with \code{\link[anytime]{anytime}}. Standard format is "yyyy-mm-dd hh:mm". Is interpreted as UTC to be compatible with WhatsApp timestamps.
#' @param endtime Datetime that is used as the maximum boundary for exclusion. Is parsed with \code{\link[anytime]{anytime}}. Standard format is "yyyy-mm-dd hh:mm". Is interpreted as UTC to be compatible with WhatsApp timestamps.
#' @param min_occur Minimum number of occurrences for emoji to be included in the plots. Default is 1.
#' @param return_data If TRUE, returns the subsetted data frame used for plotting. Default is FALSE.
#' @param emoji_vec A vector of emoji that the visualizations and data will be restricted to.
Expand All @@ -29,7 +29,7 @@
plot_emoji <- function(data,
names = "all",
starttime = "1960-01-01 00:00",
endtime = as.character(Sys.time()),
endtime = as.character(as.POSIXct(Sys.time(),tz = "UTC")),
min_occur = 1,
return_data = FALSE,
emoji_vec = "all",
Expand All @@ -43,10 +43,14 @@ plot_emoji <- function(data,
Date <- Sender <- day <- hour <- `Number of Emoji` <- ave <- total <- Var1 <- Freq <- n <- emoji <- Emoji <- Glyph <- NULL

# catching bad params

# checking data
if(!is.data.frame(data)){stop("'data' must be a dataframe parsed with parse_chat()")}

# start- and endtime are convertable to POSIXct
if (is.character(starttime) == FALSE | is.na(anytime(starttime))) stop("starttime has to be a character string in the form of 'yyyy-mm-dd hh:mm' that can be converted by anytime().")
if (is.character(endtime) == FALSE | is.na(anytime(endtime))) stop("endtime has to be a character string in the form of 'yyyy-mm-dd hh:mm' that can be converted by anytime().")
if (anytime(starttime) >= anytime(endtime)) stop("starttime has to be before endtime.")
if (is.character(starttime) == FALSE | is.na(anytime(starttime, asUTC=TRUE,tz="UTC"))) stop("starttime has to be a character string in the form of 'yyyy-mm-dd hh:mm' that can be converted by anytime().")
if (is.character(endtime) == FALSE | is.na(anytime(endtime, asUTC=TRUE,tz="UTC"))) stop("endtime has to be a character string in the form of 'yyyy-mm-dd hh:mm' that can be converted by anytime().")
if (anytime(starttime, asUTC=TRUE,tz="UTC") >= anytime(endtime, asUTC=TRUE,tz="UTC")) stop("starttime has to be before endtime.")

# min_occur needs to be 1 or bigger
if (min_occur < 1) stop("Please provide a min_occur of >= 1.")
Expand All @@ -70,17 +74,17 @@ plot_emoji <- function(data,
Dictionary <- read.csv(system.file("EmojiDictionary.csv", package = "WhatsR"))

# setting starttime
if (starttime == anytime("1960-01-01 00:00")) {
starttime <- min(anytime(data$DateTime, asUTC = TRUE))
if (anytime(starttime, asUTC=TRUE,tz="UTC") <= min(anytime(data$DateTime, asUTC=TRUE,tz="UTC"))) {
starttime <- min(anytime(data$DateTime, asUTC=TRUE,tz="UTC"))
} else {
starttime <- anytime(starttime, asUTC = TRUE)
starttime <- anytime(starttime, asUTC=TRUE,tz="UTC")
}

# setting endtime
if (difftime(Sys.time(), endtime, units = "min") < 1) {
endtime <- max(anytime(data$DateTime, asUTC = TRUE))
if (anytime(endtime, asUTC=TRUE,tz="UTC") >= max(anytime(data$DateTime, asUTC=TRUE,tz="UTC"))) {
endtime <- max(anytime(data$DateTime, asUTC=TRUE,tz="UTC"))
} else {
endtime <- anytime(endtime, asUTC = TRUE)
endtime <- anytime(endtime, asUTC=TRUE,tz="UTC")
}

# setting names argument
Expand Down Expand Up @@ -140,16 +144,17 @@ plot_emoji <- function(data,

}

NewDates <- as.POSIXct(unlist(NewDates),origin = '1970-01-01')
NewDates <- as.POSIXct(unlist(NewDates),origin = '1970-01-01', tz="UTC")

# pasting together
options(stringsAsFactors = FALSE)
NewFrame <- cbind.data.frame(NewDates,NewSender,NewEmoji)

# creating time data
NewFrame$hour <- as.POSIXlt(NewFrame$NewDates)$hour
NewFrame$year <- as.POSIXlt(NewFrame$NewDates)$year + 1900
NewFrame$day <- weekdays(as.POSIXlt(NewFrame$NewDates), abbreviate = FALSE)
# TODO: Maybe this is where it goes wrong?
NewFrame$hour <- as.POSIXlt(NewFrame$NewDates,tz = "UTC")$hour
NewFrame$year <- as.POSIXlt(NewFrame$NewDates,tz = "UTC")$year + 1900
NewFrame$day <- weekdays(as.POSIXlt(NewFrame$NewDates,tz = "UTC"), abbreviate = FALSE)

# setting correct emoji_vec
if (length(emoji_vec) == 1 && emoji_vec == "all") {
Expand Down Expand Up @@ -180,7 +185,7 @@ plot_emoji <- function(data,
# factor ordering
weekdays <- rev(c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"))

# transalte to english for better compatibility
# translate to english for better compatibility
helperframe2$day <- mgsub(helperframe2$day,
pattern = c("Sonntag","Samstag","Freitag","Donnerstag","Mittwoch","Dienstag","Montag"),
replacement = weekdays)
Expand Down
28 changes: 16 additions & 12 deletions R/plot_lexical_dispersion.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
#' @description Visualizes the occurrence of specific keywords within the chat. Requires the raw message content to be contained in the preprocessed data
#' @param data A WhatsApp chatlog that was parsed with \code{\link[WhatsR]{parse_chat}} using anonimize = FALSE or anonimize = "add".
#' @param names A vector of author names that the plots will be restricted to.
#' @param starttime Datetime that is used as the minimum boundary for exclusion. Is parsed with \code{\link[anytime]{anytime}}. Standard format is "yyyy-mm-dd hh:mm".
#' @param endtime Datetime that is used as the maximum boundary for exclusion. Is parsed with \code{\link[anytime]{anytime}}. Standard format is "yyyy-mm-dd hh:mm".
#' @param starttime Datetime that is used as the minimum boundary for exclusion. Is parsed with \code{\link[anytime]{anytime}}. Standard format is "yyyy-mm-dd hh:mm". Is interpreted as UTC to be compatible with WhatsApp timestamps.
#' @param endtime Datetime that is used as the maximum boundary for exclusion. Is parsed with \code{\link[anytime]{anytime}}. Standard format is "yyyy-mm-dd hh:mm". Is interpreted as UTC to be compatible with WhatsApp timestamps.
#' @param keywords A vector of keywords to be displayed, default is c("hello","world").
#' @param return_data Default is FALSE, returns data frame used for plotting when TRUE.
#' @param exclude_sm If TRUE, excludes the WhatsApp System Messages from the descriptive statistics. Default is FALSE.
Expand All @@ -23,17 +23,21 @@
plot_lexical_dispersion <- function(data,
names = "all",
starttime = "1960-01-01 00:00",
endtime = as.character(Sys.time()),
endtime = as.character(as.POSIXct(Sys.time(),tz = "UTC")),
keywords = c("hello", "world"),
return_data = FALSE,
exclude_sm = FALSE,
...) {

# catching bad params

# checking data
if(!is.data.frame(data)){stop("'data' must be a dataframe parsed with parse_chat()")}

# start- and endtime are convertable to POSIXct
if (is.character(starttime) == FALSE | is.na(anytime(starttime))) stop("starttime has to be a character string in the form of 'yyyy-mm-dd hh:mm' that can be converted by anytime().")
if (is.character(endtime) == FALSE | is.na(anytime(endtime))) stop("endtime has to be a character string in the form of 'yyyy-mm-dd hh:mm' that can be converted by anytime().")
if (anytime(starttime) >= anytime(endtime)) stop("starttime has to be before endtime.")
if (is.character(starttime) == FALSE | is.na(anytime(starttime, asUTC=TRUE,tz="UTC"))) stop("starttime has to be a character string in the form of 'yyyy-mm-dd hh:mm' that can be converted by anytime().")
if (is.character(endtime) == FALSE | is.na(anytime(endtime, asUTC=TRUE,tz="UTC"))) stop("endtime has to be a character string in the form of 'yyyy-mm-dd hh:mm' that can be converted by anytime().")
if (anytime(starttime, asUTC=TRUE,tz="UTC") >= anytime(endtime, asUTC=TRUE,tz="UTC")) stop("starttime has to be before endtime.")

# Mesage column must be contained
if (!is.character(data$Flat)) {stop("'data' must contain a character column named 'Message'")}
Expand All @@ -58,17 +62,17 @@ plot_lexical_dispersion <- function(data,
keywords <- tolower(keywords)

# setting starttime
if (starttime == anytime("1960-01-01 00:00")) {
starttime <- min(anytime(data$DateTime, asUTC = TRUE))
if (anytime(starttime, asUTC=TRUE,tz="UTC") <= min(anytime(data$DateTime, asUTC=TRUE,tz="UTC"))) {
starttime <- min(anytime(data$DateTime, asUTC=TRUE,tz="UTC"))
} else {
starttime <- anytime(starttime, asUTC = TRUE)
starttime <- anytime(starttime, asUTC=TRUE,tz="UTC")
}

# setting endtime
if (difftime(Sys.time(), endtime, units = "min") < 1) {
endtime <- max(anytime(data$DateTime, asUTC = TRUE))
if (anytime(endtime, asUTC=TRUE,tz="UTC") >= max(anytime(data$DateTime, asUTC=TRUE,tz="UTC"))) {
endtime <- max(anytime(data$DateTime, asUTC=TRUE,tz="UTC"))
} else {
endtime <- anytime(endtime, asUTC = TRUE)
endtime <- anytime(endtime, asUTC=TRUE,tz="UTC")
}

# setting names argument
Expand Down
36 changes: 20 additions & 16 deletions R/plot_links.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
#' @description Visualizes the occurrence of links in a WhatsApp chatlog
#' @param data A WhatsApp chatlog that was parsed with \code{\link[WhatsR]{parse_chat}}.
#' @param names A vector of author names that the plots will be restricted to.
#' @param starttime Datetime that is used as the minimum boundary for exclusion. Is parsed with \code{\link[anytime]{anytime}}. Standard format is "yyyy-mm-dd hh:mm".
#' @param endtime Datetime that is used as the maximum boundary for exclusion. Is parsed with \code{\link[anytime]{anytime}}. Standard format is "yyyy-mm-dd hh:mm".
#' @param starttime Datetime that is used as the minimum boundary for exclusion. Is parsed with \code{\link[anytime]{anytime}}. Standard format is "yyyy-mm-dd hh:mm". Is interpreted as UTC to be compatible with WhatsApp timestamps.
#' @param endtime Datetime that is used as the maximum boundary for exclusion. Is parsed with \code{\link[anytime]{anytime}}. Standard format is "yyyy-mm-dd hh:mm". Is interpreted as UTC to be compatible with WhatsApp timestamps.
#' @param use_domains If TRUE, links are shortened to domains. This includes the inputs in link_vec. Default is TRUE.
#' @param exclude_long Either NA or a numeric value. If numeric value is provided, removes all links/domains longer than x characters. Default is 50.
#' @param min_occur The minimum number of occurrences a link has to have to be included in the visualization. Default is 1.
Expand All @@ -28,7 +28,7 @@
plot_links <- function(data,
names = "all",
starttime = "1960-01-01 00:00",
endtime = as.character(Sys.time()),
endtime = as.character(as.POSIXct(Sys.time(),tz = "UTC")),
use_domains = TRUE,
exclude_long = 50,
min_occur = 1,
Expand All @@ -41,10 +41,14 @@ plot_links <- function(data,
Date <- Sender <- Links <- URL <- day <- hour <- n <- `Number of Links` <- ave <- total <- Var1 <- Freq <- NULL

# catching bad params

# checking data
if(!is.data.frame(data)){stop("'data' must be a dataframe parsed with parse_chat()")}

# start- and endtime are convertable to POSIXct
if (is.character(starttime) == FALSE | is.na(anytime(starttime))) stop("starttime has to be a character string in the form of 'yyyy-mm-dd hh:mm' that can be converted by anytime().")
if (is.character(endtime) == FALSE | is.na(anytime(endtime))) stop("endtime has to be a character string in the form of 'yyyy-mm-dd hh:mm' that can be converted by anytime().")
if (anytime(starttime) >= anytime(endtime)) stop("starttime has to be before endtime.")
if (is.character(starttime) == FALSE | is.na(anytime(starttime, asUTC=TRUE,tz="UTC"))) stop("starttime has to be a character string in the form of 'yyyy-mm-dd hh:mm' that can be converted by anytime().")
if (is.character(endtime) == FALSE | is.na(anytime(endtime, asUTC=TRUE,tz="UTC"))) stop("endtime has to be a character string in the form of 'yyyy-mm-dd hh:mm' that can be converted by anytime().")
if (anytime(starttime, asUTC=TRUE,tz="UTC") >= anytime(endtime, asUTC=TRUE,tz="UTC")) stop("starttime has to be before endtime.")

# min_occur must be >= 1
if (min_occur < 1) stop("Please provide a min_occur of >= 1.")
Expand Down Expand Up @@ -89,17 +93,17 @@ plot_links <- function(data,
options(dplyr.summarise.inform = FALSE)

# setting starttime
if (starttime == anytime("1960-01-01 00:00")) {
starttime <- min(anytime(data$DateTime, asUTC = TRUE))
if (anytime(starttime, asUTC=TRUE,tz="UTC") <= min(anytime(data$DateTime, asUTC=TRUE,tz="UTC"))) {
starttime <- min(anytime(data$DateTime, asUTC=TRUE,tz="UTC"))
} else {
starttime <- anytime(starttime, asUTC = TRUE)
starttime <- anytime(starttime, asUTC=TRUE,tz="UTC")
}

# setting endtime
if (difftime(Sys.time(), endtime, units = "min") < 1) {
endtime <- max(anytime(data$DateTime, asUTC = TRUE))
if (anytime(endtime, asUTC=TRUE,tz="UTC") >= max(anytime(data$DateTime, asUTC=TRUE,tz="UTC"))) {
endtime <- max(anytime(data$DateTime, asUTC=TRUE,tz="UTC"))
} else {
endtime <- anytime(endtime, asUTC = TRUE)
endtime <- anytime(endtime, asUTC=TRUE,tz="UTC")
}

# setting names argument
Expand Down Expand Up @@ -151,7 +155,7 @@ plot_links <- function(data,
NewDates[[i]] <- rep(data$DateTime[i], NoElements[i])
}

NewDates <- as.POSIXct(unlist(NewDates), origin = "1970-01-01")
NewDates <- as.POSIXct(unlist(NewDates), origin = "1970-01-01",tz = "UTC")

# shorten URLs to domain
if (use_domains == TRUE) {
Expand All @@ -166,9 +170,9 @@ plot_links <- function(data,
NewFrame <- cbind.data.frame(NewDates, NewSender, NewUrls)

# creating time data
NewFrame$hour <- as.POSIXlt(NewFrame$NewDates)$hour
NewFrame$year <- as.POSIXlt(NewFrame$NewDates)$year + 1900
NewFrame$day <- weekdays(as.POSIXlt(NewFrame$NewDates), abbreviate = FALSE)
NewFrame$hour <- as.POSIXlt(NewFrame$NewDates,tz = "UTC")$hour
NewFrame$year <- as.POSIXlt(NewFrame$NewDates,tz = "UTC")$year + 1900
NewFrame$day <- weekdays(as.POSIXlt(NewFrame$NewDates,tz = "UTC"), abbreviate = FALSE)

# setting correct link_vec
if (length(link_vec) == 1 && link_vec == "all") {
Expand Down
Loading

0 comments on commit 1c3f752

Please sign in to comment.