Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

investigate differences between new AR format and old AR format #8

Open
cjyetman opened this issue Dec 6, 2022 · 3 comments
Open

Comments

@cjyetman
Copy link
Member

cjyetman commented Dec 6, 2022

Especially with the recent change with AR, we're probably going to eventually need to switch to their new format. We should investigate the differences and see if we can switch to using the new format.

library(tidyverse)
library(pacta.data.preparation)

ar_data_path <- "~/Documents/Data/Asset Resolution/2022-08-15_AR_2021Q4"
ar_advanced_company_indicators_path <- file.path(ar_data_path, "2022-08-24_AR_2021Q4_RMI-Company-Indicators.xlsx")
masterdata_debt_path <- file.path(ar_data_path, "2022-10-05_rmi_masterdata_debt_2021q4.csv")
masterdata_ownership_path <- file.path(ar_data_path, "2022-08-15_rmi_masterdata_ownership_2021q4.csv")

ar_advanced_company_indicators <- import_ar_advanced_company_indicators(ar_advanced_company_indicators_path, fix_names = TRUE)
masterdata_debt <- readr::read_csv(masterdata_debt_path, na = "", show_col_types = FALSE)
masterdata_ownership <- readr::read_csv(masterdata_ownership_path, na = "", show_col_types = FALSE)


# -------------------------------------------------------------------------

ownership_data <- 
  ar_advanced_company_indicators %>% 
  filter(consolidation_method == "Equity Ownership") %>% 
  filter(value_type == "production") %>%
  filter(
    asset_sector == "Aviation" & activity_unit %in% c("pkm", "tkm") |
      asset_sector == "Cement" & activity_unit == "t cement" |
      asset_sector == "Coal" & activity_unit == "t coal" |
      asset_sector == "HDV" & activity_unit == "# vehicles" |
      asset_sector == "LDV" & activity_unit == "# vehicles" |
      asset_sector == "Oil&Gas" & activity_unit == "GJ" |
      asset_sector == "Power" & activity_unit == "MW" |
      asset_sector == "Shipping" & activity_unit == "dwt km" |
      asset_sector == "Steel" & activity_unit == "t steel"
  ) %>% 
  pivot_wider(names_from = "year", values_fill = 0) %>% 
  group_by(asset_sector) %>% 
  summarise(new_2016 = sum(`2016`, na.rm = TRUE), new_n = length(unique(company_id)))

masterdata_ownership %>% 
  filter(company_id %in% ar_advanced_company_indicators$company_id) %>% 
  group_by(sector) %>% 
  summarise(old_2016 = sum(`_2016`, na.rm = TRUE), old_n = length(unique(company_id))) %>% 
  full_join(ownership_data, by = c(sector = "asset_sector")) %>% 
  mutate(
    new_2016 = if_else(is.na(new_2016), as.numeric(0), as.numeric(new_2016)),
    new_n = if_else(is.na(new_n), as.numeric(0), as.numeric(new_n))
  ) %>% 
  mutate(diff = old_2016 - new_2016) %>% 
  mutate(percent_diff = round(diff / old_2016 * 100, digits = 2))

#> # A tibble: 9 × 7
#>   sector   old_2016 old_n new_2016 new_n     diff percent_diff
#>   <chr>       <dbl> <int>    <dbl> <dbl>    <dbl>        <dbl>
#> 1 Aviation  6.01e12  1820  6.01e12  1820 -3.12e-2         0   
#> 2 Cement    5.01e 9  2070  4.67e 9  2035  3.46e+8         6.91
#> 3 Coal      1.09e10  2051  1.09e10  2061 -1.47e-2         0   
#> 4 HDV       0         575  0         575  0             NaN   
#> 5 LDV       1.39e 8   394  1.39e 8   435 -3.73e-1         0   
#> 6 Oil&Gas   7.11e11  4563  7.11e11  4563 -5.32e-2         0   
#> 7 Power     1.30e 7 36846  1.30e 7 36875 -3.26e-1         0   
#> 8 Shipping  2.56e14 11167  2.56e14 11169  2.95e+9         0   
#> 9 Steel     2.73e 9  1105  2.73e 9  1105 -5.76e-3         0


# -------------------------------------------------------------------------

fin_control_data <- 
  ar_advanced_company_indicators %>% 
  filter(consolidation_method == "Financial Control") %>% 
  filter(value_type == "production") %>%
  filter(
    asset_sector == "Aviation" & activity_unit %in% c("pkm", "tkm") |
      asset_sector == "Cement" & activity_unit == "t cement" |
      asset_sector == "Coal" & activity_unit == "t coal" |
      asset_sector == "HDV" & activity_unit == "# vehicles" |
      asset_sector == "LDV" & activity_unit == "# vehicles" |
      asset_sector == "Oil&Gas" & activity_unit == "GJ" |
      asset_sector == "Power" & activity_unit == "MW" |
      asset_sector == "Shipping" & activity_unit == "dwt km" |
      asset_sector == "Steel" & activity_unit == "t steel"
  ) %>% 
  pivot_wider(names_from = "year", values_fill = 0) %>% 
  group_by(asset_sector) %>% 
  summarise(new_2016 = sum(`2016`, na.rm = TRUE), new_n = length(unique(company_id)))

masterdata_debt %>% 
  filter(company_id %in% ar_advanced_company_indicators$company_id) %>% 
  group_by(sector) %>% 
  summarise(old_2016 = sum(`_2016`, na.rm = TRUE), old_n = length(unique(company_id))) %>% 
  full_join(fin_control_data, by = c(sector = "asset_sector")) %>% 
  mutate(
    new_2016 = if_else(is.na(new_2016), as.numeric(0), as.numeric(new_2016)),
    new_n = if_else(is.na(new_n), as.numeric(0), as.numeric(new_n))
  ) %>% 
  mutate(diff = old_2016 - new_2016) %>% 
  mutate(percent_diff = round(diff / old_2016 * 100, digits = 2))

#> # A tibble: 9 × 7
#>   sector   old_2016 old_n new_2016 new_n     diff percent_diff
#>   <chr>       <dbl> <int>    <dbl> <dbl>    <dbl>        <dbl>
#> 1 Aviation  3.24e12  1302  5.94e12  1609 -2.70e12        -83.3
#> 2 Cement    2.89e 9  1459  4.27e 9  1685 -1.39e 9        -48.0
#> 3 Coal      6.31e 9  1349  1.12e10  1629 -4.87e 9        -77.2
#> 4 HDV       0         283  0         426  0              NaN  
#> 5 LDV       9.36e 7   194  1.16e 8   300 -2.24e 7        -23.9
#> 6 Oil&Gas   3.06e11  2864  6.85e11  3864 -3.79e11       -124. 
#> 7 Power     6.12e 6 29582  1.28e 7 34012 -6.68e 6       -109. 
#> 8 Shipping  1.70e14  9844  2.53e14 10518 -8.30e13        -48.9
#> 9 Steel     1.66e 9   757  2.70e 9   924 -1.03e 9        -62.1
@cjyetman
Copy link
Member Author

We recently learned that the masterdata_debt CSV is actually showing direct ownership, not rolled up financial control nor credit parent control. Comparing "Direct Ownership" in the Advanced Indicators XLS to the masterdata_debt data gives a much better picture, but we would still rely on the masterdata_debt file to get the credit parent ID to do the credit parent roll-up we currently rely on.

library(tidyverse)
library(pacta.data.preparation)

ar_data_path <- "~/Documents/Data/Asset Resolution/2022-08-15_AR_2021Q4"
ar_advanced_company_indicators_path <- file.path(ar_data_path, "2022-08-24_AR_2021Q4_RMI-Company-Indicators.xlsx")
masterdata_debt_path <- file.path(ar_data_path, "2022-10-05_rmi_masterdata_debt_2021q4.csv")
masterdata_ownership_path <- file.path(ar_data_path, "2022-08-15_rmi_masterdata_ownership_2021q4.csv")

ar_advanced_company_indicators <- import_ar_advanced_company_indicators(ar_advanced_company_indicators_path, fix_names = TRUE)
masterdata_debt <- readr::read_csv(masterdata_debt_path, na = "", show_col_types = FALSE)
masterdata_ownership <- readr::read_csv(masterdata_ownership_path, na = "", show_col_types = FALSE)


# -------------------------------------------------------------------------

direct_ownership_data <- 
  ar_advanced_company_indicators %>% 
  filter(consolidation_method == "Direct Ownership") %>% 
  filter(value_type == "production") %>%
  filter(
    asset_sector == "Aviation" & activity_unit %in% c("pkm", "tkm") |
      asset_sector == "Cement" & activity_unit == "t cement" |
      asset_sector == "Coal" & activity_unit == "t coal" |
      asset_sector == "HDV" & activity_unit == "# vehicles" |
      asset_sector == "LDV" & activity_unit == "# vehicles" |
      asset_sector == "Oil&Gas" & activity_unit == "GJ" |
      asset_sector == "Power" & activity_unit == "MW" |
      asset_sector == "Shipping" & activity_unit == "dwt km" |
      asset_sector == "Steel" & activity_unit == "t steel"
  ) %>% 
  pivot_wider(names_from = "year", values_fill = 0) %>% 
  group_by(asset_sector) %>% 
  summarise(new_2016 = sum(`2016`, na.rm = TRUE), new_n = length(unique(company_id)))

masterdata_debt %>% 
  filter(company_id %in% ar_advanced_company_indicators$company_id) %>% 
  group_by(sector) %>% 
  summarise(old_2016 = sum(`_2016`, na.rm = TRUE), old_n = length(unique(company_id))) %>% 
  full_join(direct_ownership_data, by = c(sector = "asset_sector")) %>% 
  mutate(
    new_2016 = if_else(is.na(new_2016), as.numeric(0), as.numeric(new_2016)),
    new_n = if_else(is.na(new_n), as.numeric(0), as.numeric(new_n))
  ) %>% 
  mutate(diff = old_2016 - new_2016) %>% 
  mutate(percent_diff = round(diff / old_2016 * 100, digits = 2))

#> # A tibble: 9 × 7
#>   sector   old_2016 old_n new_2016 new_n     diff percent_diff
#>   <chr>       <dbl> <int>    <dbl> <dbl>    <dbl>        <dbl>
#> 1 Aviation  3.24e12  1302  3.24e12  1302 -1.46e-3         0   
#> 2 Cement    2.89e 9  1459  2.73e 9  1448  1.53e+8         5.29
#> 3 Coal      6.31e 9  1349  6.31e 9  1359  0               0   
#> 4 HDV       0         283  0         283  0             NaN   
#> 5 LDV       9.36e 7   194  9.36e 7   228  0               0   
#> 6 Oil&Gas   3.06e11  2864  3.06e11  2864 -2.48e-2         0   
#> 7 Power     6.12e 6 29582  6.12e 6 29611 -8.47e-2         0   
#> 8 Shipping  1.70e14  9844  1.70e14  9846 -4.38e-1         0   
#> 9 Steel     1.66e 9   757  1.66e 9   757 -3.81e-3         0

@cjyetman
Copy link
Member Author

The bespoke masterdata_* files seem to include data for Cement - Grinding assets, while the Advanced Company Indicators does not. Filtering those rows out of the masterdata_* files appears to remove the discrepancies apart from very tiny, probable rounding errors and Shipping discrepancy in Equity Ownership (small relative to the sector (% diff), but still substantial). There are also still discrepancies in the number of rows in some sectors which I still do not have an explanation for.

library(tidyverse)
library(pacta.data.preparation)

ar_data_path <- "~/Documents/Data/Asset Resolution/2022-08-15_AR_2021Q4"
ar_advanced_company_indicators_path <- file.path(ar_data_path, "2022-08-24_AR_2021Q4_RMI-Company-Indicators.xlsx")
masterdata_debt_path <- file.path(ar_data_path, "2022-10-05_rmi_masterdata_debt_2021q4.csv")
masterdata_ownership_path <- file.path(ar_data_path, "2022-08-15_rmi_masterdata_ownership_2021q4.csv")

ar_advanced_company_indicators <- import_ar_advanced_company_indicators(ar_advanced_company_indicators_path, fix_names = TRUE)
masterdata_debt <- readr::read_csv(masterdata_debt_path, na = "", show_col_types = FALSE)
masterdata_ownership <- readr::read_csv(masterdata_ownership_path, na = "", show_col_types = FALSE)


# -------------------------------------------------------------------------

equity_ownership_data <- 
  ar_advanced_company_indicators %>% 
  filter(consolidation_method == "Equity Ownership") %>% 
  filter(value_type == "production") %>%
  filter(
    asset_sector == "Aviation" & activity_unit %in% c("pkm", "tkm") |
      asset_sector == "Cement" & activity_unit == "t cement" |
      asset_sector == "Coal" & activity_unit == "t coal" |
      asset_sector == "HDV" & activity_unit == "# vehicles" |
      asset_sector == "LDV" & activity_unit == "# vehicles" |
      asset_sector == "Oil&Gas" & activity_unit == "GJ" |
      asset_sector == "Power" & activity_unit == "MW" |
      asset_sector == "Shipping" & activity_unit == "dwt km" |
      asset_sector == "Steel" & activity_unit == "t steel"
  ) %>% 
  pivot_wider(names_from = "year", values_fill = 0) %>% 
  group_by(asset_sector) %>% 
  summarise(new_2022 = sum(`2022`, na.rm = TRUE), new_n = length(unique(company_id)))

masterdata_ownership %>% 
  filter(technology != "Grinding") %>%
  group_by(sector) %>% 
  summarise(old_2022 = sum(`_2022`, na.rm = TRUE), old_n = length(unique(company_id))) %>% 
  full_join(equity_ownership_data, by = c(sector = "asset_sector")) %>% 
  mutate(
    new_2022 = if_else(is.na(new_2022), as.numeric(0), as.numeric(new_2022)),
    new_n = if_else(is.na(new_n), as.numeric(0), as.numeric(new_n))
  ) %>% 
  mutate(diff = old_2022 - new_2022) %>% 
  mutate(percent_diff = round(diff / old_2022 * 100, digits = 2))

#> # A tibble: 9 × 7
#>   sector   old_2022 old_n new_2022 new_n     diff percent_diff
#>   <chr>       <dbl> <int>    <dbl> <dbl>    <dbl>        <dbl>
#> 1 Aviation  1.00e13  1820  1.00e13  1820  1.17e-2            0
#> 2 Cement    4.67e 9  2035  4.67e 9  2035 -6.73e-4            0
#> 3 Coal      1.15e10  2051  1.15e10  2061 -1.07e-2            0
#> 4 HDV       9.22e 6   575  9.22e 6   575 -6.36e-2            0
#> 5 LDV       1.26e 8   394  1.26e 8   435 -3.52e-1            0
#> 6 Oil&Gas   7.82e11  4568  7.82e11  4563  2.14e-2            0
#> 7 Power     1.60e 7 36917  1.60e 7 36875 -4.49e-1            0
#> 8 Shipping  2.69e14 11167  2.69e14 11169  3.06e+9            0
#> 9 Steel     2.75e 9  1105  2.75e 9  1105 -5.96e-3            0


# -------------------------------------------------------------------------

direct_ownership_data <- 
  ar_advanced_company_indicators %>% 
  filter(consolidation_method == "Direct Ownership") %>% 
  filter(value_type == "production") %>%
  filter(
    asset_sector == "Aviation" & activity_unit %in% c("pkm", "tkm") |
      asset_sector == "Cement" & activity_unit == "t cement" |
      asset_sector == "Coal" & activity_unit == "t coal" |
      asset_sector == "HDV" & activity_unit == "# vehicles" |
      asset_sector == "LDV" & activity_unit == "# vehicles" |
      asset_sector == "Oil&Gas" & activity_unit == "GJ" |
      asset_sector == "Power" & activity_unit == "MW" |
      asset_sector == "Shipping" & activity_unit == "dwt km" |
      asset_sector == "Steel" & activity_unit == "t steel"
  ) %>% 
  pivot_wider(names_from = "year", values_fill = 0) %>% 
  group_by(asset_sector) %>% 
  summarise(new_2022 = sum(`2022`, na.rm = TRUE), new_n = length(unique(company_id)))

masterdata_debt %>% 
  filter(technology != "Grinding") %>%
  group_by(sector) %>% 
  summarise(old_2022 = sum(`_2022`, na.rm = TRUE), old_n = length(unique(company_id))) %>% 
  full_join(direct_ownership_data, by = c(sector = "asset_sector")) %>% 
  mutate(
    new_2022 = if_else(is.na(new_2022), as.numeric(0), as.numeric(new_2022)),
    new_n = if_else(is.na(new_n), as.numeric(0), as.numeric(new_n))
  ) %>% 
  mutate(diff = old_2022 - new_2022) %>% 
  mutate(percent_diff = round(diff / old_2022 * 100, digits = 2))

#> # A tibble: 9 × 7
#>   sector   old_2022 old_n new_2022 new_n     diff percent_diff
#>   <chr>       <dbl> <int>    <dbl> <dbl>    <dbl>        <dbl>
#> 1 Aviation  5.35e12  1302  5.35e12  1302 -0.00684            0
#> 2 Cement    2.74e 9  1448  2.74e 9  1448  0                  0
#> 3 Coal      6.62e 9  1349  6.62e 9  1359  0                  0
#> 4 HDV       3.63e 6   283  3.63e 6   283  0                  0
#> 5 LDV       8.56e 7   194  8.56e 7   228  0                  0
#> 6 Oil&Gas   3.51e11  2869  3.51e11  2864  0.0242             0
#> 7 Power     7.59e 6 29641  7.59e 6 29611 -0.122              0
#> 8 Shipping  1.79e14  9844  1.79e14  9846 -0.0625             0
#> 9 Steel     1.69e 9   757  1.69e 9   757 -0.00299            0

@jdhoffa
Copy link
Member

jdhoffa commented Mar 28, 2023

Blocked: Until AI adds a new consolidation method to their advanced company indicators, which likely will happen in Q3 this year Consolidation method is likely to be called "Credit Risk Parent" consolidation method, or something similar.

@jdhoffa jdhoffa transferred this issue from another repository Apr 15, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants