Merge pull request #41 from USEPA/disagg_state_data

Minor updates to support disaggregation
USEPA · Oct 26, 2024 · 2c18a58 · 2c18a58
2 parents cfad0cd + 7a53ca2
commit 2c18a58
Show file tree

Hide file tree

Showing 10 changed files with 136 additions and 2,113 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -40,7 +40,7 @@ Imports:
 Depends:
   R (>= 3.6)
 Remotes:
-  github::USEPA/useeior@linkstateior
+  github::USEPA/useeior@linkstateiorImportF
 License: file LICENSE
 Encoding: UTF-8
 LazyData: true

diff --git a/R/BuildModel.R b/R/BuildModel.R
@@ -550,7 +550,9 @@ buildTwoRegionUseModel <- function(state, year, ioschema, iolevel,
   } else {
     q_SoI_use <- rowSums(SoI2SoI_Use[, c(industries, FD_cols, "ExportResidual")]) + rowSums(SoI2RoUS_Use[, c(industries, FD_cols)])
   }
-  if (max(abs((q_SoI - q_SoI_use)/q_SoI_use)) > 1E-2) {
+  q_SoI_check <- abs((q_SoI - q_SoI_use)/q_SoI_use)
+  q_SoI_check[is.na(q_SoI_check)] <- 0 # Convert all N/As to 0
+  if (max(q_SoI_check) > 1E-2) {
     if (domestic) {
       stop(paste0(state, "'s commodity output summed from two-region Domestic Use table ",
                   "doesn't equal to ", state, "'s commodity output."))
@@ -566,7 +568,9 @@ buildTwoRegionUseModel <- function(state, year, ioschema, iolevel,
   } else {
     q_RoUS_use <- rowSums(RoUS2RoUS_Use[, c(industries, FD_cols, "ExportResidual")]) + rowSums(RoUS2SoI_Use[, c(industries, FD_cols)])
   }
-  if (max(abs((q_RoUS - q_RoUS_use)/q_RoUS_use)) > 1E-2) {
+  q_RoUS_check <- abs((q_RoUS - q_RoUS_use)/q_RoUS_use)
+  q_RoUS_check[is.na(q_RoUS_check)] <- 0 # Convert all N/As to 0
+  if (max(q_RoUS_check) > 1E-2) {
     if (domestic) {
       stop(paste0("RoUS (of ", state, ")'s commodity output summed from two-region Domestic Use table ",
                   "doesn't equal to RoUS's commodity output."))
@@ -631,8 +635,7 @@ assembleTwoRegionIO <- function(year, iolevel, disagg_specs=NULL) {
 
     # Initialize model 
     model <- getStateModelDisaggSpecs(disagg_specs)
-    #stateDisaggFile <- "Employment_ratios.csv"
-    #model <- getStateModelDisaggSpecs(disagg_specs, stateDisaggFile)
+
 
     if(length(model$DisaggregationSpecs)!=0){
       disagg <- model$DisaggregationSpecs[[1]]
@@ -647,7 +650,15 @@ assembleTwoRegionIO <- function(year, iolevel, disagg_specs=NULL) {
       model$DomesticFullUse <- US_DomesticUse # Note that the domestic full use object does not include value added rows
 
       # Disaggregate national model objects once (i.e. not for each state)
+      if(!is.null(disagg$stateDF)){
+
+        #model <- createDisaggFilesFromProxyData(model, disagg, year, "US") #Function to disagg by proxy
+        model <- useeior:::createDisaggFilesFromProxyData(model, disagg, year, "US") #Function to disagg by proxy
+        disagg <- model$DisaggregationSpecs[[disagg$OriginalSectorCode]] #update disagg
+      }
+
       model <- disaggregateNationalObjectsInStateModel(model, disagg)
+      ## ^^ this should always use national level disaggregation data
 
       # Assign the disaggregated model objects to the original stateior objects, rename some as national
       US_Make <- model$MakeTransactions
@@ -666,6 +677,12 @@ assembleTwoRegionIO <- function(year, iolevel, disagg_specs=NULL) {
         model$CommodityOutput <- State_CommodityOutput_ls[[state]]
         model$IndustryOutput <- State_IndustryOutput_ls[[state]]
 
+        if(!is.null(disagg$stateDF)) {
+          # Get the correct disaggregation percentages for each state
+          #model <- createDisaggFilesFromProxyData(model, disagg, year, state)
+          model <- useeior:::createDisaggFilesFromProxyData(model, disagg, year, state)
+          disagg <- model$DisaggregationSpecs[[disagg$OriginalSectorCode]]
+        } 
         model <- disaggregateStateModel(model, state)
 
         # Assign the disaggregated model objects to the stateior lists
@@ -693,13 +710,7 @@ assembleTwoRegionIO <- function(year, iolevel, disagg_specs=NULL) {
 
   # Assemble two-region IO tables 
   TwoRegionIO <- list()
-  #TEMPORARY FOR DEBUGGING
-  for (state in c("Virginia")){
-#  for (state in sort(c(state.name, "District of Columbia"))) {
-
-    if(state == "Virginia"){
-      temp <-1
-    }
+  for (state in sort(c(state.name, "District of Columbia"))) {
 
     ## Two-region Make
     model$MakeTransactions <- State_Make_ls[[state]]
@@ -787,6 +798,10 @@ assembleTwoRegionIO <- function(year, iolevel, disagg_specs=NULL) {
     names(RoUS_ITA) <- getBEASectorCodeLocation("Commodity", "RoUS", iolevel, disagg)
     TwoRegionIO[["InternationalTradeAdjustment"]][[state]] <- c(SoI_ITA, RoUS_ITA)
 
+    # if(!is.null(disagg_specs)){
+    #   model <- useeior:::balanceDisagg(model, disagg)
+    # }
+
     print(state)
   }
   return(TwoRegionIO)

diff --git a/R/InteregionalCommodityFlowFunctions.R b/R/InteregionalCommodityFlowFunctions.R
@@ -143,7 +143,8 @@ generateDomestic2RegionICFs <- function(state, year, ioschema, iolevel,
   # Merge ICF_2r_wide with complete BEA Commodity list
   CommodityCodeName <- loadDatafromUSEEIOR(paste(iolevel,
                                                  "CommodityCodeName_2012",
-                                                 sep = "_"))
+                                                 sep = "_"),
+                                           appendSchema = FALSE)
   # Update commodities from disaggregation
   if (!is.null(disagg)){
     disagg_df <- data.frame(BEA_2012_Summary_Commodity_Code = c("221100", "221200", "221300"),

diff --git a/R/StateDisaggFunctions.R b/R/StateDisaggFunctions.R
@@ -12,8 +12,12 @@ getStateModelDisaggSpecs <- function(configfile, statefile = NULL){
   disaggConfigpath <- system.file(paste0("extdata/disaggspecs/"), paste0(configfile,".yml"), package = "stateior")
   model <- useeior:::getDisaggregationSpecs(model, disaggConfigpath, pkg = "stateior")
 
-  if(!is.null(statefile)){
-    model$specs$StateDisaggSpecs <- getStateSpecificDisaggSpecs(disaggConfigpath, statefile)
+  for(disagg in model$DisaggregationSpecs)
+  {
+    if(!is.null(disagg$stateFile)){
+      disagg$stateDF <- getStateSpecificDisaggSpecs(disaggConfigpath, disagg$stateFile) 
+      model$DisaggregationSpecs[[disagg$OriginalSectorCode]] <- disagg
+    }
   }
 
   return(model)
@@ -26,8 +30,6 @@ getStateModelDisaggSpecs <- function(configfile, statefile = NULL){
 #' @param disaggConfigpath str, path for statefile 
 #' @return A stateior model object with the state-specific disaggregation specs included in model$specs$STateDisaggSpecs object
 getStateSpecificDisaggSpecs <- function(disaggConfigpath, statefile){
-
-  temp <-1
   filename <- file.path(dirname(disaggConfigpath), statefile)
   stateFileDF <- utils::read.table(filename, sep = ",", header = TRUE, stringsAsFactors = FALSE, check.names = FALSE)
   return(stateFileDF)
@@ -44,7 +46,7 @@ disaggregateStateModel <- function(model, state){
 
   for (disagg in model$DisaggregationSpecs){
 
-    logging::loginfo(paste0("Disaggregating ", disagg$OrignalSectorName," for ", state))
+    logging::loginfo(paste0("Disaggregating ", disagg$OriginalSectorName," for ", state))
 
     # Formatting model objects according to useeior disaggregation formats
     model$MakeTransactions <- formatMakeFromStateToUSEEIO(model, state) #Formatting MakeTransactions object
@@ -105,8 +107,8 @@ disaggregateNationalObjectsInStateModel <- function(model, disagg){
   model <- splitFullUse(model, domestic = FALSE)
 
   # Disaggregate model objects
-  model$MakeTransactions <- useeior:::disaggregateMakeTable(model, disagg)
-  model$MakeTransactions[is.na(model$MakeTransactions)] <- 0
+  # model$MakeTransactions <- useeior:::disaggregateMakeTable(model, disagg)
+  # model$MakeTransactions[is.na(model$MakeTransactions)] <- 0
 
   model$DomesticUseTransactions <- useeior:::disaggregateUseTable(model, disagg, domestic = TRUE)
   model$DomesticUseTransactions[is.na(model$DomesticUseTransactions)] <- 0
@@ -119,6 +121,9 @@ disaggregateNationalObjectsInStateModel <- function(model, disagg){
   model$UseValueAdded <- useeior:::disaggregateVA(model, disagg)
   model$UseValueAdded[is.na(model$UseValueAdded)] <- 0  
 
+  model$MakeTransactions <- useeior:::disaggregateMakeTable(model, disagg)
+  model$MakeTransactions[is.na(model$MakeTransactions)] <- 0
+
   model$Industries <- disaggregateStateSectorLists(model$Industries, disagg)
   model$Commodities <- disaggregateStateSectorLists(model$Commodities, disagg)
 
@@ -218,7 +223,7 @@ formatFullUseFromUSEEIOtoState <- function(model, state, domestic = FALSE){
 
     tempVA <- cbind(model$UseValueAdded, VAbyFDSection) # combine UseValueAdded and VAbyFDSection columns
 
-    # Assemble FullUse table and remane according to stateior formats
+    # Assemble FullUse table and rename according to stateior formats
     model$FullUse <- rbind(tempFullUse, tempVA)
     rownames(model$FullUse) <- gsub("\\/.*","",rownames(model$FullUse)) # remove everything after "/"
     colnames(model$FullUse) <- gsub("\\/.*","",colnames(model$FullUse)) # remove everything after "/"
@@ -262,7 +267,7 @@ calculateStateIndustryCommodityOuput <- function(model){
   rownames(model$IndustryOutput) <- rowLabels
 
   # Calculating and formatting CommodityOuput
-  model$CommodityOutput <- data.frame(rowSums(model$UseTransactions) + rowSums(model$FinalDemand))
+  model$CommodityOutput <- data.frame(colSums(model$MakeTransactions))
   colnames(model$CommodityOutput) <- "Output"
   rowLabels <- rownames(model$CommodityOutput)
   rowLabels <- gsub("\\/.*","",rowLabels) # remove everything after "/"
@@ -278,11 +283,89 @@ calculateStateIndustryCommodityOuput <- function(model){
 disaggregateStateSectorLists <- function(code_vector, disagg) {
 
   originalSectorCode <- gsub("\\/.*","",disagg$OriginalSectorCode) # remove everything after "/"
-  disaggCodes <- gsub("\\/.*","",disagg$DisaggregatedSectorCodes) # remove everything after "/"
+  disaggCodes <- gsub("\\/.*","",disagg$NewSectorCodes) # remove everything after "/"
   originalIndex <- grep(paste0("^",originalSectorCode,"$"), code_vector) # the ^ and $ are required to find an exact match
 
   newList <- append(code_vector[1:originalIndex -1], disaggCodes)
   newList <- append(newList, code_vector[-(1:originalIndex)] ) # have to do this in two steps otherwise get an error
 
   return(newList)
 }
+
+
+#### Functions below this line are used for creating disaggFiles from Proxy data, e.g., Make and USe files from Employment ratios.
+
+#' Create Make, Use, and Env ratio files for each state from Proxy data for the relevant sectors.
+#' @param model An stateior model object with model specs and specific IO tables loaded
+#' @param disagg Specifications for disaggregating the current Table
+#' @param disaggYear Integer specifying the state model year
+#' @param disaggState A string value that indicates the state model being disaggregated. For national models, string should be "US"
+#' @return 
+createDisaggFilesFromProxyData <- function(model, disagg, disaggYear, disaggState){
+
+  # Note: this function assumes: 
+  # 1) The disaggregation will use the same proxy values for all disaggregated sectors across all rows and columns. 
+  #    That is, if we are disaggregating Summary 22 into the 3 Detail utility sectors, and the proxy allocations are (for example) 0.5/0.25/0.25, then 
+  #    in the Use table, the three Detail utility commodities (rows) will have that same split for across all columns (industries/final demand)
+  # 2) The disagg parameter will contain a disagg$stateDF variable that includes the data for the relevant disaggState and disaggYear parameters.
+
+  stop("The function is not yet valid")
+
+  temp <-1
+
+  #Get subset of ratios for current year
+  stateDFYear <- subset(disagg$stateDF, Year == disaggYear & State == disaggState)
+
+  # If the state/year combination is not found, assume a uniform split between sectors
+  if(dim(stateDFYear)[1] == 0){
+
+    activity <- unlist(disagg$NewSectorCodes)
+    uniformAllocationVector <- 1/length(disagg$NewSectorCodes)
+    share <- rep(uniformAllocationVector,length(disagg$NewSectorCodes))
+
+    stateDFYear <- data.frame(State = rep(disaggState, length(disagg$NewSectorCodes)),
+                              Activity = activity,
+                              Share = share,
+                              Year = rep(disaggYear, length(disagg$NewSectorCodes)))
+
+  }
+
+  print(paste0("For ",disaggState,"-",disaggYear, " the allocation to disaggregate ", 
+               disagg$OriginalSectorCode, " into ", disagg$NewSectorCodes, " is ", stateDFYear$Share))
+
+  # Default Make DF based on proxy employment values
+  # Specifying commodity disaggregation (column splits) for Make DF
+  industries <- c(rep(disagg$OriginalSectorCode,length(disagg$NewSectorCodes)))
+  commodities <- unlist(disagg$NewSectorCodes)
+  PercentMake <- stateDFYear$Share # need to add code to ensure that the order of stateDF$Share is the same as the order of disagg$NewSectorCodes
+  note <- c(rep("CommodityDisagg", length(disagg$NewSectorCodes)))
+
+  makeDF <- data.frame(cbind(data.frame(industries), data.frame(commodities), data.frame(PercentMake), data.frame(note))) #need to rename the columns with the correct column names
+  colnames(makeDF) <- c("IndustryCode","CommodityCode",	"PercentMake",	"Note")
+
+
+  # Default Use DF based on employment ratios
+  # Specifying industry disaggregation (column splits) for Use DF
+  industries <- unlist(disagg$NewSectorCodes)
+  commodities <- c(rep(disagg$OriginalSectorCode,length(disagg$NewSectorCodes)))
+  PercentUse <- stateDFYear$Share
+  note <- c(rep("IndustryDisagg", length(disagg$NewSectorCodes)))
+
+  useDF <- data.frame(cbind(data.frame(industries), data.frame(commodities), data.frame(PercentUse), data.frame(note))) #need to rename the columns with the correct column names
+  useDF_2 <- makeDF # so that colnames match
+  colnames(useDF) <- c("IndustryCode","CommodityCode",	"PercentUse",	"Note")
+  colnames(useDF_2) <- c("IndustryCode","CommodityCode",	"PercentUse",	"Note")
+
+  useDF <- rbind(useDF, useDF_2) #need to bid makeDF because disaggregation procedure requires the UseDF to have the default commodity and industry output.
+
+  # Add new DFs to disagg and to model
+  disagg$MakeFileDF <- makeDF
+  disagg$UseFileDF <- useDF
+
+  model$DisaggregationSpecs[[disagg$OriginalSectorCode]] <- disagg
+
+
+  temp <-2 
+  return(model)
+
+}
diff --git a/R/UtilityFunctions.R b/R/UtilityFunctions.R
@@ -6,9 +6,17 @@ startLogging <- function() {
 
 #' Load data from useeior using flexible dataset name
 #' @param dataset A string specifying name of the data to load
+#' @param appendSchema bool, set to FALSE to ignore schema in name
 #' @return The data loaded from useeior
-loadDatafromUSEEIOR <- function(dataset) {
-  utils::data(package = "useeior", list = dataset)
+loadDatafromUSEEIOR <- function(dataset, appendSchema = TRUE) {
+  if(appendSchema && !"sch" %in% dataset) {
+    dataset_srch <- paste0(dataset, "_12sch")
+    # currently only uses 2012 schema
+    # required due to renaming in useeior #280
+  } else {
+    dataset_srch <- dataset
+  }
+  utils::data(package = "useeior", list = dataset_srch)
   df <- get(dataset)
   return(df)
 }