release v0.1.4 - implement tolerance

- simple day/amount tolerance implementation - worst/best case scenarios are plotted as light shadow in the background
nobodyinperson · Jan 13, 2017 · d96d513 · d96d513
1 parent 549acf8
commit d96d513
Show file tree

Hide file tree

Showing 5 changed files with 175 additions and 123 deletions.
diff --git a/debian/changelog b/debian/changelog
@@ -1,8 +1,9 @@
-simbuto (0.1.4) UNRELEASED; urgency=medium
+simbuto (0.1.4) unstable; urgency=medium
 
-  * implement tolerance
+  * implement amount and day tolerance
+  * worst/best case scenarios are plotted as light shadow in the background
 
- -- Yann Büchau <[email protected]>  Wed, 11 Jan 2017 22:38:12 +0100
+ -- Yann Büchau <[email protected]>  Fri, 13 Jan 2017 12:34:50 +0100
 
 simbuto (0.1.3) unstable; urgency=medium
 

diff --git a/usr/lib/simbuto/python/simbuto/gui.py b/usr/lib/simbuto/python/simbuto/gui.py
@@ -480,6 +480,7 @@ def update_graph_from_editor(self, *args, size=None):
         name =  "{}.png".format(currentfile)
         filename = os.path.join(config.personal_simbuto_dotfolder(),
             "plots",name)
+        self.update_statusbar(_("updating graph..."))
         success = self.signalmanager.emit_signal("create-graph-from-text",
             filename=filename, # to this file
             text = self.current_editor_content, # this text

diff --git a/usr/lib/simbuto/python/simbuto/manager.py b/usr/lib/simbuto/python/simbuto/manager.py
@@ -130,10 +130,8 @@ def create_png_graph_from_text(self, text, filename,
             # create the budget from text
             budget_frame = R.read_budget_from_text(text = text)
             # create the timeseries from the budget
-            # timeseries_frame = R.timeseries_from_budget(budget = budget_frame,
-            #     start = start_date, end = end_date)
-            timeseries_frame = R.budget_ensemble(budget = budget_frame,
-                start = start_date, end = end_date, ensemble_size = 100)
+            timeseries_frame = R.timeseries_from_budget(budget = budget_frame,
+                start = start_date, end = end_date)
             # plot to png
             R.plot_budget_timeseries_to_png(filename=filename,
                 timeseries = timeseries_frame, width = width, height = height)

diff --git a/usr/lib/simbuto/r/simbuto-functions.R b/usr/lib/simbuto/r/simbuto-functions.R
@@ -5,92 +5,154 @@
 read_budget_from_text <- function(text) {
     BUDGET <- read.csv2(text=text, stringsAsFactors = F, na.strings = c("NA"),comment.char="#")
     BUDGET$amount <- as.numeric(BUDGET$amount)
-    if(!is.null(BUDGET$tolerance))
-        BUDGET$tolerance <- as.numeric(BUDGET$tolerance)
+    if(!is.null(BUDGET$tolerance_amount))
+        BUDGET$tolerance_amount <- as.numeric(BUDGET$tolerance_amount)
     BUDGET$start <- as.Date(BUDGET$start,format="%F")
     BUDGET$end <- as.Date(BUDGET$end,format="%F")
+    BUDGET$frequency[BUDGET$frequency == "monthly"] = "month"
+    BUDGET$frequency[BUDGET$frequency == "weekly"] = "week"
+    BUDGET$frequency[BUDGET$frequency == "yearly"] = "year"
+    BUDGET$frequency[BUDGET$frequency == "daily"] = "day"
     return(BUDGET)
 }
 
 timeseries_from_budget <- function(
     budget, 
     start = Sys.Date(), end = Sys.Date() + 365,
-    with_tolerance = FALSE,
-    random_tolerance = FALSE
+    ensemble_size = NULL
     ) {
     # create empty frame with day series
     all.days <- seq.Date(from = start, to = end, by = "days")
     MONEY <- data.frame(day = all.days, amount = 0)
-    if(with_tolerance) 
-        MONEY$mincase <- MONEY$maxcase <- 0
 
-    for (factnr in 1:nrow(budget)) { # loop over all facts
+    # start with empty series
+    worstcase <- bestcase <- undisturbed <- rep(0, nrow(MONEY))
+    # loop over all facts
+    for (factnr in 1:nrow(budget)) {
         fact <- budget[factnr,] # current fact
         # create sequence of occurence days
         fact.start <- if(is.na(fact$start)){start}else{fact$start}
         fact.end   <- if(is.na(fact$end)){end}else{fact$end}
         # cat("fact ",fact$title," occurs ",fact$frequency," from ",fact.start," to ",fact.end,"\n")
-
-        number.occurences <- NULL
-        if(fact$frequency == "once") {
-            number.occurences <- 1
-            interval <- NULL
-        } else if(fact$frequency == "monthly") {
-            interval <- "month"
-        } else if(fact$frequency == "yearly") {
-            interval <- "year"
-        } else if(fact$frequency == "weekly") {
-            interval <- "week"
-        } else if(fact$frequency == "dayly") {
-            interval <- "day"
-        } else {
-            interval <- fact$frequency
+        interval = fact$frequency
+        if(interval == "once") {
+            fact.end <- fact.start
+            interval = "day" # pick any interval, doesn't matter
         }
-
-
         # cat("from=",fact.start," to=",fact.end," by=",interval," length.out=",number.occurences,"\n")
         occurences <- c()
-        if(fact.start < fact.end) {
-            if(is.numeric(number.occurences)){
-                occurences <- seq.Date(from = fact.start, to = fact.end, length.out = number.occurences)
-            } else {
-                occurences <- seq.Date(from = fact.start, to = fact.end, by = interval)
-            }
+        if(fact.start <= fact.end) {
+            occurences <- seq.Date(from = fact.start, to = fact.end, by = interval)
         }
 
+        occurences_bool <- MONEY$day %in% occurences
+
         # get the indices
         indices <- na.omit(match(x = occurences, table = MONEY$day))
-        # cat("indices: ",indices)
+        undisturbed <- undisturbed + fact_amounts_series(
+            occurences = occurences_bool, fact = fact,with_tolerance = FALSE)
+        worstcase <- worstcase + fact_amounts_series(
+            occurences = occurences_bool, fact = fact,with_tolerance = TRUE, 
+            worst_case = TRUE )
+        bestcase <- bestcase + fact_amounts_series(
+            occurences = occurences_bool, fact = fact,with_tolerance = TRUE, 
+            worst_case = FALSE )
+    }
 
-        # generate random sequence
-        fact.amount = fact$amount
-        amounts <- fact.amount
-        fact.mincase <- fact.maxcase <- fact.amount
-        if(with_tolerance) { # only if specified
-            if(any(is.finite(fact$tolerance))) { # only if tolerance is given
-                fact.tolerance = abs(as.numeric(fact["tolerance"]))
-                if(random_tolerance) {
-                    # cat("tolerance: ",fact.tolerance,"\n")
-                    amounts <- runif(length(indices),
-                                    min = fact.amount - fact.tolerance,
-                                    max = fact.amount + fact.tolerance)
-                } else {
-                    fact.mincase <- fact.amount - fact.tolerance
-                    fact.maxcase <- fact.amount + fact.tolerance
-                }
+    # cumulate
+    MONEY$amount    = cumsum(undisturbed)
+    MONEY$worstcase = cumsum(worstcase)
+    MONEY$bestcase  = cumsum(bestcase)
+    # empty data frame
+    return(MONEY)
+}
+
+fact_amounts_series <- function(
+    fact, # the fact dataframe row/list
+    occurences, # boolean vector with TRUE where the fact occurs, output has same length
+    with_tolerance = FALSE, # use the tolerance?
+    worst_case = FALSE, # TRUE = worst_case, FALSE = best_case
+    random_tolerance = FALSE # if using the tolerance, randomize?
+    ) {
+    stopifnot(nrow(fact)==1)
+
+    # the indices where the fact occurs
+    indices <- which(occurences)
+    # the output sequence starts with zeros everywhere
+    out <- rep(0,length(occurences))
+    # output length
+    N <- length(out)
+
+    # the tolerances
+    fact_tolerance_day <- 0
+    if(any(is.finite(fact$tolerance_day)))
+            fact_tolerance_day = as.integer(abs(fact$tolerance_day))
+    fact_tolerance_amount <- 0
+    if(any(is.finite(fact$tolerance_amount)))
+            fact_tolerance_amount = abs(fact$tolerance_amount)
+    # fact data
+    fact_amount <- 0
+    if(any(is.finite(fact$amount)))
+            fact_amount = fact$amount
+
+    if(with_tolerance) {
+        if(random_tolerance) {
+            # modify amount randomly
+            amounts <- runif( n = length(indices), 
+                              min = fact_amount - fact_tolerance_amount,
+                              max = fact_amount + fact_tolerance_amount
+                              )
+            # modify indices randomly
+            indices <- indices + runif( n = length(indices), 
+                              min = - fact_tolerance_amount,
+                              max = + fact_tolerance_amount
+                              )
+        } else {
+            if(worst_case) {
+                # worst case: all costs are highest
+                # worst case: all incomes are lowest
+                amounts <- rep(fact_amount - fact_tolerance_amount, length(indices))
+                # worst case: all costs are earliest
+                # worst case: all incomes are latest
+                indices <- indices + sign(fact_amount) * fact_tolerance_day
+            } else {
+                # best case: all costs are lowest
+                # best case: all incomes are highest
+                amounts <- rep(fact_amount + fact_tolerance_amount, length(indices))
+                # best case: all costs are latest
+                # best case: all incomes are earliest
+                indices <- indices - sign(fact_amount) * fact_tolerance_day
             }
-            MONEY[indices,"mincase"] = MONEY[indices,"mincase"] + fact.mincase
-            MONEY[indices,"maxcase"] = MONEY[indices,"maxcase"] + fact.maxcase
+        # fix indices that lie outside the output vector
+        # cat("indices before fixing: ",indices,"\n")
+        # cat("amounts before fixing: ",amounts,"\n")
+        tooearly <- which(indices < 1)
+        # cat("too early indices: ",indices[tooearly],"\n")
+        # stopifnot(length(tooearly)==0)
+        toolate  <- which(indices > N)
+        # cat("too late indices: ",indices[toolate],"\n")
+        outside  <- sort(unique(c(tooearly,toolate)))
+        out[1] <- out[1] + sum(amounts[tooearly])
+        out[N] <- out[N] + sum(amounts[toolate])
+        if(length(outside)>0) {
+            amounts <- amounts[-(outside)]
+            indices <- indices[-(outside)]
         }
-
-        # add to time series
-        MONEY[indices,"amount"] = MONEY[indices,"amount"] + amounts
+        # cat("indices after fixing: ",indices,"\n")
+        # cat("amounts after fixing: ",amounts,"\n")
+        }
+    } else {
+        # keep amount
+        amounts <- rep(fact_amount, length(indices))
     }
-    MONEY$amount = cumsum(MONEY$amount)
-    MONEY$mincase = cumsum(MONEY$mincase)
-    MONEY$maxcase = cumsum(MONEY$maxcase)
-    # empty data frame
-    return(MONEY)
+    # cat("amounts before putting into out: ",amounts,"\n")
+
+    # set the amounts to the indices
+    out[indices] <- amounts
+    # cat("out: ",out,"\n")
+
+    # return out vector
+    return(out)
 }
 
 budget_ensemble<- function( budget,  
@@ -103,7 +165,7 @@ budget_ensemble<- function( budget,
         random_tolerance = FALSE)
     # the ensemble out starts with the bare run
     ENSEMBLE_OUT <- timeseries_without_tolerance
-    if(any(is.finite(budget$tolerance))) {
+    if(any(is.finite(budget$tolerance_amount))) {
         # create ensemble matrix
         ENSEMBLE <- matrix(NA,nrow=ensemble_size, ncol = nrow(ENSEMBLE_OUT))
         # do the runs
@@ -128,8 +190,8 @@ budget_ensemble<- function( budget,
 }
 
 plot_budget_timeseries <- function(timeseries) {
-    plotrange <- range(c(timeseries$amount,timeseries$mincase,
-                         timeseries$maxcase,timeseries$ensmin,timeseries$ensmax))
+    plotrange <- range(c(timeseries$amount,timeseries$worstcase,
+                         timeseries$bestcase,timeseries$ensmin,timeseries$ensmax))
     # base plot
     plot(timeseries$day,timeseries$amount,type="n",xaxt="n",yaxt="n"
          ,ylab="",xlab="",ylim=plotrange,
@@ -158,23 +220,11 @@ plot_budget_timeseries <- function(timeseries) {
     do.call(rect, bad)
 
     # worst/best cases
-    if(!is.null(timeseries$mincase) & !is.null(timeseries$maxcase)) {
+    if(!is.null(timeseries$worstcase) & !is.null(timeseries$bestcase)) {
         polygon(x = c(timeseries$day,rev(timeseries$day)), 
-                y = c(timeseries$mincase,rev(timeseries$maxcase)),
+                y = c(timeseries$worstcase,rev(timeseries$bestcase)),
                 col = "#00000022",border=NA)
     }
-    # ensemble max/min
-    if(!is.null(timeseries$ensmin) & !is.null(timeseries$ensmax)) {
-        polygon(x = c(timeseries$day,rev(timeseries$day)), 
-                y = c(timeseries$ensmax,rev(timeseries$ensmin)),
-                col = "#00000033",border=NA)
-    }
-    # ensemble quantiles
-    if(!is.null(timeseries$ensquant25) & !is.null(timeseries$ensquant75)) {
-        polygon(x = c(timeseries$day,rev(timeseries$day)), 
-                y = c(timeseries$ensquant75,rev(timeseries$ensquant25)),
-                col = "#00000044",border=NA)
-    }
     # raw run
     lines(x = timeseries$day, y = timeseries$amount
           ,lwd = 4
@@ -190,9 +240,7 @@ plot_budget_timeseries_to_png <- function(timeseries,filename,width=600,height=4
 
 #### read data ####
 # BUDGET <- read_budget_from_text(readLines("~/Downloads/budget.simbuto"))
-# # MONEY <- timeseries_from_budget(budget = BUDGET, with_tolerance = TRUE)
-# MONEY <- budget_ensemble(budget = BUDGET)
+# MONEY <- timeseries_from_budget(budget = BUDGET)
 # cat("plotting...")
 # plot_budget_timeseries(MONEY)
 # cat("done!\n")
-#