From 6321086cf5e5d41bbad5f503773387725ed82a2b Mon Sep 17 00:00:00 2001 From: CarissaGervasi-NOAA Date: Thu, 20 Jun 2024 15:33:08 -0400 Subject: [PATCH] Add outreach indicator --- .Rhistory | 676 +++++++++--------- .../2010 Sea Grant_ PIER_ Annual Report.pdf | Bin .../2011 Sea Grant_ PIER_ Annual Report.pdf | Bin .../2012 Sea Grant_ PIER_ Annual Report.pdf | Bin .../2013 Sea Grant_ PIER_ Annual Report.pdf | Bin .../2014 Sea Grant_ PIER_ Annual Report.pdf | Bin .../2015 Sea Grant_ PIER_ Annual Report.pdf | Bin .../2016 Sea Grant_ PIER_ Annual Report.pdf | Bin .../2017 Sea Grant_ PIER_ Annual Report.pdf | Bin .../2018 Sea Grant_ PIER_ Annual Report.pdf | Bin .../2019 Sea Grant_ PIER_ Annual Report.pdf | Bin .../2020 Sea Grant_ PIER_ Annual Report.pdf | Bin indicator_objects/outreach.RData | Bin 0 -> 488 bytes indicator_processing/non_automated/outreach.R | 90 +++ 14 files changed, 428 insertions(+), 338 deletions(-) rename {indicator_processing/non_automated => indicator_data}/outreach/2010 Sea Grant_ PIER_ Annual Report.pdf (100%) rename {indicator_processing/non_automated => indicator_data}/outreach/2011 Sea Grant_ PIER_ Annual Report.pdf (100%) rename {indicator_processing/non_automated => indicator_data}/outreach/2012 Sea Grant_ PIER_ Annual Report.pdf (100%) rename {indicator_processing/non_automated => indicator_data}/outreach/2013 Sea Grant_ PIER_ Annual Report.pdf (100%) rename {indicator_processing/non_automated => indicator_data}/outreach/2014 Sea Grant_ PIER_ Annual Report.pdf (100%) rename {indicator_processing/non_automated => indicator_data}/outreach/2015 Sea Grant_ PIER_ Annual Report.pdf (100%) rename {indicator_processing/non_automated => indicator_data}/outreach/2016 Sea Grant_ PIER_ Annual Report.pdf (100%) rename {indicator_processing/non_automated => indicator_data}/outreach/2017 Sea Grant_ PIER_ Annual Report.pdf (100%) rename {indicator_processing/non_automated => indicator_data}/outreach/2018 Sea Grant_ PIER_ Annual Report.pdf (100%) rename {indicator_processing/non_automated => indicator_data}/outreach/2019 Sea Grant_ PIER_ Annual Report.pdf (100%) rename {indicator_processing/non_automated => indicator_data}/outreach/2020 Sea Grant_ PIER_ Annual Report.pdf (100%) create mode 100644 indicator_objects/outreach.RData create mode 100644 indicator_processing/non_automated/outreach.R diff --git a/.Rhistory b/.Rhistory index bb846a4..9b42b6c 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,341 +1,3 @@ -strip.text = element_text(size = 10), -legend.title = element_text(size = 10), -legend.text = element_text(size = 10)) + -labs(title = paste("Change in Standardized Indicator Scores Over Time by Community", region), -x = "Year", -y = "Score", -color = "Community") -ggsave(paste0("indicator_plots/CSVI_plots/Line_Plot_", region, ".png"), plot = p, width = 12, height = 8) -} -# Create Heatmaps for each region -for (region in regions) { -p <- ggplot(df_long %>% filter(Region == region), aes(x = as.factor(Year), y = Community, fill = Score)) + -geom_tile() + -facet_wrap(~ Indicator, scales = "free_y") + -scale_fill_gradient(low = "blue", high = "red") + -theme_minimal() + -theme(panel.background = element_rect(fill = "white", color = NA), -plot.background = element_rect(fill = "white", color = NA), -axis.text.x = element_text(angle = 45, hjust = 1, size = 10), -axis.text.y = element_text(size = 10), -strip.text = element_text(size = 10), -legend.title = element_text(size = 10), -legend.text = element_text(size = 10)) + -labs(title = paste("Heatmap of Standardized Indicator Scores by Community and Year", region), -x = "Year", -y = "Community", -fill = "Score") -ggsave(paste0("indicator_plots/CSVI_plots/Heatmap_", region, ".png"), plot = p, width = 12, height = 8, bg = "white") -} -# Create Box Plots for each region -for (region in regions) { -p <- ggplot(df_long %>% filter(Region == region), aes(x = as.factor(Year), y = Score, fill = as.factor(Year))) + -geom_boxplot() + -facet_wrap(~ Indicator, scales = "free_y") + -theme_bw() + -theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10), -axis.text.y = element_text(size = 10), -strip.text = element_text(size = 10), -legend.title = element_text(size = 10), -legend.text = element_text(size = 10)) + -labs(title = paste("Distribution of Standardized Indicator Scores by Community and Year", region), -x = "Year", -y = "Score", -fill = "Year") -ggsave(paste0("indicator_plots/CSVI_plots/Box_Plot_", region, ".png"), plot = p, width = 12, height = 8) -} -# Create Box Plots for each region -for (region in regions) { -p <- ggplot(df_long %>% filter(Region == region), aes(x = as.factor(Year), y = Score, fill = as.factor(Year))) + -geom_boxplot() + -facet_wrap(~ Indicator, scales = "free_y") + -theme_bw() + -theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10), -axis.text.y = element_text(size = 10), -strip.text = element_text(size = 10), -legend.title = element_text(size = 10), -legend.text = element_text(size = 10)) + -labs(title = paste("Distribution of Standardized Indicator Scores by Year", region), -x = "Year", -y = "Score", -fill = "Year") -ggsave(paste0("indicator_plots/CSVI_plots/Box_Plot_", region, ".png"), plot = p, width = 12, height = 8) -} -library(readr) -CSVI_2010 = read_csv("indicator_data/CaribCSVI_2010.csv", locale = locale(encoding = "UTF-8")) -CSVI_2020 = read_csv("indicator_data/CaribCSVI_2020.csv", locale = locale(encoding = "UTF-8")) -library(ggplot2) -library(dplyr) -library(tidyr) -CSVI_2010$Year <- 2010 -CSVI_2020$Year <- 2020 -# Combine both dataframes -df_combined <- bind_rows(CSVI_2010, CSVI_2020) -#remove columns for raw data -df_combined = df_combined[,-c(2:7)] -# Convert to long format for ggplot -df_long <- df_combined %>% -pivot_longer(cols = ends_with("Cat"), -names_to = "Indicator", -values_to = "Score") -# Remove rows with NA values -df_long <- na.omit(df_long) -df_long$Region = as.character(df_long$Region) -df_long$Year = as.factor(df_long$Year) -df_long$Indicator = as.character(df_long$Indicator) -df_long$Score = as.integer(df_long$Score) -# Rename and combine regions -df_long <- df_long %>% -mutate(Region = recode(Region, -'STT' = 'St. Thomas & St. John', -'STJ' = 'St. Thomas & St. John', -'STX' = 'St. Croix', -'W' = 'Puerto Rico West', -'E' = 'Puerto Rico East')) -# Verify the changes -unique(df_long$Region) -# Rename indicators -df_long <- df_long %>% -mutate(Indicator = recode(Indicator, -'PerDisCat' = 'Personal Disruption', -'PopComCat' = 'Pop Composition', -'PovertyCat' = 'Poverty', -'LabFrc_revCat' = 'Labor Force', -'HsChr_rev_Cat' = 'Housing Charac', -'RetMigCat' = 'Retiree Migration')) -unique(df_long$Indicator) -# Create Faceted Bar Plots for each region -regions <- unique(df_long$Region) -for (region in regions) { -p <- ggplot(df_long %>% filter(Region == region), aes(x = Community, y = Score, fill = as.factor(Year))) + -geom_bar(stat = "identity", position = "dodge") + -facet_wrap(~ Indicator, scales = "free_y") + -theme_bw() + -theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5), -axis.text.y = element_text(size = 10), -strip.text = element_text(size = 10), -legend.title = element_text(size = 10), -legend.text = element_text(size = 10)) + -labs(title = paste("Standardized Indicator Scores by Community & Year", region), -x = "Community", -y = "Score", -fill = "Year") -ggsave(paste0("indicator_plots/CSVI_plots/Faceted_Bar_Plot_", region, ".png"), plot = p, width = 12, height = 8) -} -# Create Line Plots for each region -for (region in regions) { -p <- ggplot(df_long %>% filter(Region == region), aes(x = Year, y = Score, color = Community, group = Community)) + -geom_line() + -geom_point() + -facet_wrap(~ Indicator, scales = "free_y") + -theme_bw() + -theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10), -axis.text.y = element_text(size = 10), -strip.text = element_text(size = 10), -legend.title = element_text(size = 10), -legend.text = element_text(size = 10)) + -labs(title = paste("Change in Standardized Indicator Scores Over Time by Community", region), -x = "Year", -y = "Score", -color = "Community") -ggsave(paste0("indicator_plots/CSVI_plots/Line_Plot_", region, ".png"), plot = p, width = 12, height = 8) -} -# Create Heatmaps for each region -for (region in regions) { -p <- ggplot(df_long %>% filter(Region == region), aes(x = as.factor(Year), y = Community, fill = Score)) + -geom_tile() + -facet_wrap(~ Indicator, scales = "free_y") + -scale_fill_gradient(low = "blue", high = "red") + -theme_minimal() + -theme(panel.background = element_rect(fill = "white", color = NA), -plot.background = element_rect(fill = "white", color = NA), -axis.text.x = element_text(angle = 45, hjust = 1, size = 10), -axis.text.y = element_text(size = 10), -strip.text = element_text(size = 10), -legend.title = element_text(size = 10), -legend.text = element_text(size = 10)) + -labs(title = paste("Heatmap of Standardized Indicator Scores by Community and Year", region), -x = "Year", -y = "Community", -fill = "Score") -ggsave(paste0("indicator_plots/CSVI_plots/Heatmap_", region, ".png"), plot = p, width = 12, height = 8, bg = "white") -} -# Create Box Plots for each region -for (region in regions) { -p <- ggplot(df_long %>% filter(Region == region), aes(x = as.factor(Year), y = Score, fill = as.factor(Year))) + -geom_boxplot() + -facet_wrap(~ Indicator, scales = "free_y") + -theme_bw() + -theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10), -axis.text.y = element_text(size = 10), -strip.text = element_text(size = 10), -legend.title = element_text(size = 10), -legend.text = element_text(size = 10)) + -labs(title = paste("Distribution of Standardized Indicator Scores by Year", region), -x = "Year", -y = "Score", -fill = "Year") -ggsave(paste0("indicator_plots/CSVI_plots/Box_Plot_", region, ".png"), plot = p, width = 12, height = 8) -} -library(readr) -CSVI_2010 = read_csv("indicator_data/CaribCSVI_2010.csv", locale = locale(encoding = "UTF-8")) -View(CSVI_2010) -CSVI_2020 = read_csv("indicator_data/CaribCSVI_2020.csv", locale = locale(encoding = "UTF-8")) -library(ggplot2) -library(dplyr) -library(tidyr) -CSVI_2010$Year <- 2010 -CSVI_2020$Year <- 2020 -# Combine both dataframes -df_combined <- bind_rows(CSVI_2010, CSVI_2020) -#remove columns for raw data -df_combined = df_combined[,-c(2:7)] -# Convert to long format for ggplot -df_long <- df_combined %>% -pivot_longer(cols = ends_with("Cat"), -names_to = "Indicator", -values_to = "Score") -# Remove rows with NA values -df_long <- na.omit(df_long) -df_long$Community = as.character(df_long$Community) -df_long$Region = as.character(df_long$Region) -df_long$Year = as.factor(df_long$Year) -df_long$Indicator = as.character(df_long$Indicator) -df_long$Score = as.integer(df_long$Score) -# Rename and combine regions -df_long <- df_long %>% -mutate(Region = recode(Region, -'STT' = 'St. Thomas & St. John', -'STJ' = 'St. Thomas & St. John', -'STX' = 'St. Croix', -'W' = 'Puerto Rico West', -'E' = 'Puerto Rico East')) -# Verify the changes -unique(df_long$Region) -# Rename indicators -df_long <- df_long %>% -mutate(Indicator = recode(Indicator, -'PerDisCat' = 'Personal Disruption', -'PopComCat' = 'Pop Composition', -'PovertyCat' = 'Poverty', -'LabFrc_revCat' = 'Labor Force', -'HsChr_rev_Cat' = 'Housing Charac', -'RetMigCat' = 'Retiree Migration')) -unique(df_long$Indicator) -# Create Faceted Bar Plots for each region -regions <- unique(df_long$Region) -for (region in regions) { -p <- ggplot(df_long %>% filter(Region == region), aes(x = Community, y = Score, fill = as.factor(Year))) + -geom_bar(stat = "identity", position = "dodge") + -facet_wrap(~ Indicator, scales = "free_y") + -theme_bw() + -theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5), -axis.text.y = element_text(size = 10), -strip.text = element_text(size = 10), -legend.title = element_text(size = 10), -legend.text = element_text(size = 10)) + -labs(title = paste("Standardized Indicator Scores by Community & Year", region), -x = "Community", -y = "Score", -fill = "Year") -ggsave(paste0("indicator_plots/CSVI_plots/Faceted_Bar_Plot_", region, ".png"), plot = p, width = 12, height = 8) -} -# Create Line Plots for each region -for (region in regions) { -p <- ggplot(df_long %>% filter(Region == region), aes(x = Year, y = Score, color = Community, group = Community)) + -geom_line() + -geom_point() + -facet_wrap(~ Indicator, scales = "free_y") + -theme_bw() + -theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10), -axis.text.y = element_text(size = 10), -strip.text = element_text(size = 10), -legend.title = element_text(size = 10), -legend.text = element_text(size = 10)) + -labs(title = paste("Change in Standardized Indicator Scores Over Time by Community", region), -x = "Year", -y = "Score", -color = "Community") -ggsave(paste0("indicator_plots/CSVI_plots/Line_Plot_", region, ".png"), plot = p, width = 12, height = 8) -} -# Create Heatmaps for each region -for (region in regions) { -p <- ggplot(df_long %>% filter(Region == region), aes(x = as.factor(Year), y = Community, fill = Score)) + -geom_tile() + -facet_wrap(~ Indicator, scales = "free_y") + -scale_fill_gradient(low = "blue", high = "red") + -theme_minimal() + -theme(panel.background = element_rect(fill = "white", color = NA), -plot.background = element_rect(fill = "white", color = NA), -axis.text.x = element_text(angle = 45, hjust = 1, size = 10), -axis.text.y = element_text(size = 10), -strip.text = element_text(size = 10), -legend.title = element_text(size = 10), -legend.text = element_text(size = 10)) + -labs(title = paste("Heatmap of Standardized Indicator Scores by Community and Year", region), -x = "Year", -y = "Community", -fill = "Score") -ggsave(paste0("indicator_plots/CSVI_plots/Heatmap_", region, ".png"), plot = p, width = 12, height = 8, bg = "white") -} -# Create Box Plots for each region -for (region in regions) { -p <- ggplot(df_long %>% filter(Region == region), aes(x = as.factor(Year), y = Score, fill = as.factor(Year))) + -geom_boxplot() + -facet_wrap(~ Indicator, scales = "free_y") + -theme_bw() + -theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10), -axis.text.y = element_text(size = 10), -strip.text = element_text(size = 10), -legend.title = element_text(size = 10), -legend.text = element_text(size = 10)) + -labs(title = paste("Distribution of Standardized Indicator Scores by Year", region), -x = "Year", -y = "Score", -fill = "Year") -ggsave(paste0("indicator_plots/CSVI_plots/Box_Plot_", region, ".png"), plot = p, width = 12, height = 8) -} -# Tally number of stocks from Maria's table: -PR = 34 -STTSTJ = 20 -STX = 20 -PR_perc = 1/PR -PR_perc -USVI_perc = 1/STX -USVI_perc -USVI_perc = 3/STX -USVI_perc -PR_perc = 1/PR -USVI_perc = 1/STX -# Tally number of stocks from Maria's table: -PR_stocks = 34 -STTSTJ_stocks = 20 -STX_stocks = 20 -PR_perc = 1/PR_stocks -USVI_perc = 1/STX_stocks -PR = as.vector(0,0,0,0,0,0,0,0,0,0,0,0,PR_perc) -PR_perc -PR = as.vector(0,0,0,0,0,0,0,0,0,0,0,0,3) -PR = c(0,0,0,0,0,0,0,0,0,0,0,0,PR_perc) -PR -# Tally number of stocks from Maria's table: -PR_stocks = 34 -STTSTJ_stocks = 20 -STX_stocks = 20 -PR_perc = (1/PR_stocks)*100 -USVI_perc = (1/STX_stocks)*100 -PR = c(0,0,0,0,0,0,0,0,0,0,0,0,PR_perc) -PR -USVI = c(0,0,0,0,0,0,0,0,0,0,0,0,USVI_perc) -USVI -# save as indicator object ---------------------- -datdata <- 2011:2023 -inddata <- data.frame(cbind(PR, USVI)) -labs <- c("Stocks/complexes with Tier 3 designation" , "Percent", "Puerto Rico", -"Stocks/complexes with Tier 3 designation" , "Percent", "USVI") -indnames <- data.frame(matrix(labs, nrow = 3, byrow = F)) -ind <- list(labels = indnames, indicators = inddata, datelist = datdata) -class(inddata) <- "indicatordata" # plot and save ---------------------------------- plotIndicatorTimeSeries(ind, plotrownum = 2, coltoplot = 1:2, sublabel = TRUE, dateformat = "%Y%b", trendAnalysis = T) library(plotTimeSeries) @@ -510,3 +172,341 @@ require(tidyverse) require(ggplot2) url2 <- 'https://usviber.org/wp-content/uploads/2023/12/Tourism-indicator-2022-12-28-23-1.pdf' raw_text <- map(url2, pdf_text) +url1<-'https://www.bde.pr.gov/BDE/PREDDOCS/I_CRUISE.XLS' +library(readxl) +library(httr) +packageVersion("readxl") +GET(url1, write_disk(tf <- tempfile(fileext = ".xls"))) +df <- read_excel(tf, 2L) +str(df) +df2 = df[c(52,63),] +df2t = as.data.frame(t(df2)) +df2t = df2t[-c(1,2,12),] +yrs_PR = as.integer(df2t$V1) +cruise_PR = df2t$V2 +yrs_PR +cruise_PR +require(pdftools) +require(tidyverse) +require(ggplot2) +url2 <- 'https://usviber.org/wp-content/uploads/2023/12/Tourism-indicator-2022-12-28-23-1.pdf' +raw_text <- map(url2, pdf_text) +clean_table1 <- function(raw) { +#Split the single pages +raw <- map(raw, ~str_split(.x, "\\n") %>% unlist()) +#Concatenate the split pages +raw <- reduce(raw, c) +#Specify the start and end of the table data +table_start <- stringr::str_which(tolower(raw), 'total visitors') +table_end <- stringr:str_which(tolower(raw), 'number of cruise ships') +table_end <- table_end[min(which(table_end > table_start))] +#Build the table and remove special characters +table <- raw[(table_start):(table_end)] +table <- str_replace_all(table, "\\s{2,}","|") +text_con <- textConnection(table) +data_table <- read.csv(text_con, sep = "|") +#Create a list of column names +colnames(data_table) <- c("x","xx","1990","2000","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021","2022") +data_table +} +results <- map_df(raw_text, clean_table1) +require(stringr) +url2 <- 'https://usviber.org/wp-content/uploads/2023/12/Tourism-indicator-2022-12-28-23-1.pdf' +raw_text <- map(url2, pdf_text) +clean_table1 <- function(raw) { +#Split the single pages +raw <- map(raw, ~str_split(.x, "\\n") %>% unlist()) +#Concatenate the split pages +raw <- reduce(raw, c) +#Specify the start and end of the table data +table_start <- stringr::str_which(tolower(raw), 'total visitors') +table_end <- stringr:str_which(tolower(raw), 'number of cruise ships') +table_end <- table_end[min(which(table_end > table_start))] +#Build the table and remove special characters +table <- raw[(table_start):(table_end)] +table <- str_replace_all(table, "\\s{2,}","|") +text_con <- textConnection(table) +data_table <- read.csv(text_con, sep = "|") +#Create a list of column names +colnames(data_table) <- c("x","xx","1990","2000","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021","2022") +data_table +} +results <- map_df(raw_text, clean_table1) +raw_text +# Load necessary libraries +library(pdftools) +library(dplyr) +library(tidyr) +# Load the PDF +pdf_file <- "../../../Downloads/Tourism-indicator-2022-12-28-23-1.pdf" +# Extract text from the PDF +pdf_text <- pdf_text(pdf_file) +# Extract the relevant page (assuming the table is on the first page) +page_text <- pdf_text[1] +# Split the text into lines +lines <- strsplit(page_text, "\n")[[1]] +# Identify the lines containing the Visitor Arrivals table +# (assuming the table starts with "VISITOR ARRIVALS" and ends before "VISITOR EXPENDITURES") +start_line <- grep("VISITOR ARRIVALS", lines) +end_line <- grep("VISITOR EXPENDITURES", lines) +table_lines <- lines[(start_line + 1):(end_line - 1)] +# Combine lines into a single text block and split by spaces +table_text <- paste(table_lines, collapse = " ") +table_data <- strsplit(table_text, " +")[[1]] +table_data +# Extract the years and visitor numbers +years <- c(1990:2022) # Adjust the range based on actual column positions +years +# Extract the years and visitor numbers +years <- c(1990,2000,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022) # Adjust the range based on actual column positions +table_data +cruise_passengers <- as.numeric(table_data[85:101]) # Adjust the range based on actual column positions +table_data[85:101] +# Create years column +years <- c(1990,2000,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022) +# Extract and clean the total visitors data by removing commas +cruise_passengers <- gsub(",", "", table_data[85:101]) +cruise_passengers <- as.numeric(cruise_passengers) +cruise_passengers +# Create years column +years <- c(1990,2000,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022) +# Extract and clean the total visitors data by removing commas +passengers <- gsub(",", "", table_data[85:101]) +passengers <- as.numeric(passengers) +# Create a data frame +USVI_cruise <- data.frame(Year = years, Cruise_passengers = passengers) +# Print the data frame +print(USVI_cruise) +# Specify the URL of the PDF +url <- "https://usviber.org/wp-content/uploads/2023/12/Tourism-indicator-2022-12-28-23-1.pdf" +# Specify the destination file path +destfile <- "indicator_data/Cruise-indicator-2022-12-28-23-1.pdf" +# Download the PDF +download.file(url, destfile, mode = "wb") +# Check if the file has been downloaded +file.exists(destfile) +url1<-'https://www.bde.pr.gov/BDE/PREDDOCS/I_CRUISE.XLS' +library(readxl) +library(httr) +packageVersion("readxl") +GET(url1, write_disk(tf <- tempfile(fileext = ".xls"))) +df <- read_excel(tf, 2L) +str(df) +df2 = df[c(52,63),] +df2t = as.data.frame(t(df2)) +df2t = df2t[-c(1,2,12),] +yrs_PR = as.integer(df2t$V1) +cruise_PR = df2t$V2 +yrs_PR +cruise_PR +yrs_PR = as.integer(df2t$V1) +cruise_PR = as.numeric(df2t$V2) +cruise_PR +# Load necessary libraries +library(pdftools) +library(dplyr) +library(tidyr) +# Specify the URL of the PDF +url2 <- "https://usviber.org/wp-content/uploads/2023/12/Tourism-indicator-2022-12-28-23-1.pdf" +# Specify the destination file path +destfile <- "indicator_data/Cruise-indicator-2022-12-28-23-1.pdf" +# Download the PDF +download.file(url2, destfile, mode = "wb") +# Check if the file has been downloaded +file.exists(destfile) +# Load the PDF +pdf_file <- "indicator_data/Cruise-indicator-2022-12-28-23-1.pdf" +# Extract text from the PDF +pdf_text <- pdf_text(pdf_file) +# Extract the relevant page (assuming the table is on the first page) +page_text <- pdf_text[1] +# Split the text into lines +lines <- strsplit(page_text, "\n")[[1]] +# Identify the lines containing the Visitor Arrivals table +# (assuming the table starts with "VISITOR ARRIVALS" and ends before "VISITOR EXPENDITURES") +start_line <- grep("VISITOR ARRIVALS", lines) +end_line <- grep("VISITOR EXPENDITURES", lines) +table_lines <- lines[(start_line + 1):(end_line - 1)] +# Combine lines into a single text block and split by spaces +table_text <- paste(table_lines, collapse = " ") +table_data <- strsplit(table_text, " +")[[1]] +# Create years column +years <- c(1990,2000,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022) +# Extract and clean the total visitors data by removing commas +passengers <- gsub(",", "", table_data[85:101]) +passengers <- as.numeric(passengers) +# Create a data frame +USVI_cruise <- data.frame(Year = years, Cruise_passengers = passengers) +# Print the data frame +print(USVI_cruise) +yrs_USVI = as.integer(USVI_cruise$Year) +cruise_USVI = as.numeric(USVI_cruise$Cruise_passengers) +yrs <- min(yrs_PR, yrs_USVI) : max(yrs_PR, yrs_USVI) +yrs +# Combine lines into a single text block and split by spaces +table_text <- paste(table_lines, collapse = " ") +table_data <- strsplit(table_text, " +")[[1]] +# Create years column +years <- c(2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022) +# Extract and clean the total visitors data by removing commas +passengers <- gsub(",", "", table_data[87:101]) +passengers <- as.numeric(passengers) +# Create a data frame +USVI_cruise <- data.frame(Year = years, Cruise_passengers = passengers) +# Print the data frame +print(USVI_cruise) +yrs_USVI = as.integer(USVI_cruise$Year) +cruise_USVI = as.numeric(USVI_cruise$Cruise_passengers) +yrs <- min(yrs_PR, yrs_USVI) : max(yrs_PR, yrs_USVI) +yrs +yrs <- min(yrs_PR, yrs_USVI) : max(yrs_PR, yrs_USVI) +mat <- data.frame(matrix(data = NA, nrow = length(yrs), ncol = 2)) +rownames(mat) <- yrs +mat[which(yrs %in% cruise_PR), 1] <- prtab[, 2] +yrs_PR +# Create a data frame +PR_cruise <- data.frame(Year = yrs_PR, Cruise_passengers = cruise_PR) +all_years <- min(PR_cruise$Year, USVI_cruise$Year) : max(PR_cruise$Cruise_passengers, USVI_cruise$Cruise_passengers) +# Merge the data frames with the complete data frame to fill in missing years with NA +combined_df <- merge(all_years, PR_cruise, by = "Year", all.x = TRUE) +combined_df <- merge(combined_df, USVI_cruise, by = "Year", all.x = TRUE) +all_years <- min(PR_cruise$Year, USVI_cruise$Year) : max(PR_cruise$Cruise_passengers, USVI_cruise$Cruise_passengers) +# Merge the data frames with the complete data frame to fill in missing years with NA +combined_df <- merge(all_years, PR_cruise, by = "Year", all.x = TRUE) +PR_cruise +all_years <- data.frame(Year = min(PR_cruise$Year, USVI_cruise$Year) : max(PR_cruise$Cruise_passengers, USVI_cruise$Cruise_passengers)) +all_years +all_years <- data.frame(Year = min(PR_cruise$Year, USVI_cruise$Year) : max(PR_cruise$Year, USVI_cruise$Year)) +all_years +# Merge the data frames with the complete data frame to fill in missing years with NA +combined_df <- merge(all_years, PR_cruise, by = "Year", all.x = TRUE) +combined_df <- merge(combined_df, USVI_cruise, by = "Year", all.x = TRUE) +combined_df +# save as indicator object ---------------------- +datdata <- all_years +inddata <- data.frame(cbind(PR_cruise$Cruise_passengers, USVI_cruise$Cruise_passengers)) +combined_df +# save as indicator object ---------------------- +datdata <- all_years +inddata <- data.frame(cbind(combined_df$Cruise_passengers.x, combined_df$Cruise_passengers.y)) +labs <- c("Cruise passengers" , "thousands of people", "Puerto Rico", +"Cruise passengers" , "thousands of people", "USVI") +indnames <- data.frame(matrix(labs, nrow = 3, byrow = F)) +inddata <- list(labels = indnames, indicators = inddata, datelist = datdata) +class(inddata) <- "indicatordata" +ind <- inddata +plotIndicatorTimeSeries(ind, coltoplot = 1:2, plotrownum = 2, sublabel = TRUE) +library(plotTimeSeries) +plotIndicatorTimeSeries(ind, coltoplot = 1:2, plotrownum = 2, sublabel = TRUE) +all_years +# specification file and libraries ----------------------------- +rm(list = ls()) +dev.off() +library(maps) +library(plotTimeSeries) +library(xml2) +library(rvest) +load("indicator_processing/spec_file.RData") +url <- paste0("https://ds.iris.edu/ieb/evtable.phtml?caller=IEB&st=1970-01-01&et=2025-01-01&minmag=3.5&maxmag=10&mindepth=0&xde=900&orderby=time-desc&src=usgs&limit=5000&", +"maxlat=", max_lat, "&minlat=", min_lat, "&maxlon=", max_lon, "&minlon=", min_lon, +"&sbl=1&zm=8&mt=ter&title=IEB%20export%3A%201033%20earthquakes%20as%20a%20sortable%20table.&stitle=from%", +"201970-01-01%20to%202025-01-01%2C%20with%20magnitudes%20from%203.5%20to%2010%2C%20depths%20from%200%20to", +"%20900%20km%2C%20with%20priority%20for%20most%20recent%2C%20limited%20to%205000%2C%20%20showing%20data%20from%20USGS%2C%20") +page <- read_html(url) #Creates an html document from URL +table <- html_table(page, fill = TRUE) #Parses tables into data frames +table +dat <- data.frame(table[[1]]) +head(dat) +dim(dat) +min(dat$Year) +min(dat$Year) +max(dat$Year) +hist(dat$Month) +hist(dat$Depth) +hist(dat$Mag) +dat <- dat[which(dat$Year >= 2000), ] +dat <- dat[which(dat$Year <= terminal_year), ] +# check data download ----------------------------------------- +map('world', fill = 1, interior=F, col = gray(0.95), add=F, xlim = c(-80, -60), ylim = c(10, 30)) +points(dat$Lon, dat$Lat, pch=19, cex=0.5, col = 1) +dev.off() +tot_num <- table(dat$Year) +tot_st <- tapply(dat$Mag, dat$Year, sum, na.rm = T) +barplot(tot_num) +barplot(tot_st) +plot(as.numeric(tot_num), as.numeric(tot_st)) +datdata <- min(dat$Year):max(dat$Year) +inddata <- data.frame(as.numeric(tot_num)) +labs <- c("Earthquake activity", "number of events per year", "") +indnames <- data.frame(matrix(labs, nrow = 3, byrow = F)) +url1<-'https://www.bde.pr.gov/BDE/PREDDOCS/I_CRUISE.XLS' +library(readxl) +library(httr) +packageVersion("readxl") +GET(url1, write_disk(tf <- tempfile(fileext = ".xls"))) +df <- read_excel(tf, 2L) +str(df) +df2 = df[c(52,63),] +df2t = as.data.frame(t(df2)) +df2t = df2t[-c(1,2,12),] +yrs_PR = as.integer(df2t$V1) +cruise_PR = as.numeric(df2t$V2) +# Create a data frame +PR_cruise <- data.frame(Year = yrs_PR, Cruise_passengers = cruise_PR) +# Load necessary libraries +library(pdftools) +library(dplyr) +library(tidyr) +# Specify the URL of the PDF +url2 <- "https://usviber.org/wp-content/uploads/2023/12/Tourism-indicator-2022-12-28-23-1.pdf" +# Specify the destination file path +destfile <- "indicator_data/Cruise-indicator-2022-12-28-23-1.pdf" +# Download the PDF +download.file(url2, destfile, mode = "wb") +# Check if the file has been downloaded +file.exists(destfile) +# Load the PDF +pdf_file <- "indicator_data/Cruise-indicator-2022-12-28-23-1.pdf" +# Extract text from the PDF +pdf_text <- pdf_text(pdf_file) +# Extract the relevant page (assuming the table is on the first page) +page_text <- pdf_text[1] +# Split the text into lines +lines <- strsplit(page_text, "\n")[[1]] +# Identify the lines containing the Visitor Arrivals table +# (assuming the table starts with "VISITOR ARRIVALS" and ends before "VISITOR EXPENDITURES") +start_line <- grep("VISITOR ARRIVALS", lines) +end_line <- grep("VISITOR EXPENDITURES", lines) +table_lines <- lines[(start_line + 1):(end_line - 1)] +# Combine lines into a single text block and split by spaces +table_text <- paste(table_lines, collapse = " ") +table_data <- strsplit(table_text, " +")[[1]] +# Create years column +years <- c(2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022) +# Extract and clean the total visitors data by removing commas +passengers <- gsub(",", "", table_data[87:101]) +passengers <- as.numeric(passengers) +# Create a data frame +USVI_cruise <- data.frame(Year = years, Cruise_passengers = passengers) +# Print the data frame +print(USVI_cruise) +all_years <- data.frame(Year = min(PR_cruise$Year, USVI_cruise$Year) : max(PR_cruise$Year, USVI_cruise$Year)) +# Extract and clean the total visitors data by removing commas +passengers <- gsub(",", "", table_data[87:101]) +all_years <- data.frame(Year = min(PR_cruise$Year, USVI_cruise$Year) : max(PR_cruise$Year, USVI_cruise$Year)) +# Merge the data frames with the complete data frame to fill in missing years with NA +combined_df <- merge(all_years, PR_cruise, by = "Year", all.x = TRUE) +combined_df <- merge(combined_df, USVI_cruise, by = "Year", all.x = TRUE) +# save as indicator object ---------------------- +datdata <- all_years +# save as indicator object ---------------------- +datdata <- all_years$Year +inddata <- data.frame(cbind(combined_df$Cruise_passengers.x, combined_df$Cruise_passengers.y)) +labs <- c("Cruise passengers" , "thousands of people", "Puerto Rico", +"Cruise passengers" , "thousands of people", "USVI") +indnames <- data.frame(matrix(labs, nrow = 3, byrow = F)) +inddata <- list(labels = indnames, indicators = inddata, datelist = datdata) +class(inddata) <- "indicatordata" +ind <- inddata +plotIndicatorTimeSeries(ind, coltoplot = 1:2, plotrownum = 2, sublabel = TRUE) +save(ind, file = "indicator_objects/cruise.RData") diff --git a/indicator_processing/non_automated/outreach/2010 Sea Grant_ PIER_ Annual Report.pdf b/indicator_data/outreach/2010 Sea Grant_ PIER_ Annual Report.pdf similarity index 100% rename from indicator_processing/non_automated/outreach/2010 Sea Grant_ PIER_ Annual Report.pdf rename to indicator_data/outreach/2010 Sea Grant_ PIER_ Annual Report.pdf diff --git a/indicator_processing/non_automated/outreach/2011 Sea Grant_ PIER_ Annual Report.pdf b/indicator_data/outreach/2011 Sea Grant_ PIER_ Annual Report.pdf similarity index 100% rename from indicator_processing/non_automated/outreach/2011 Sea Grant_ PIER_ Annual Report.pdf rename to indicator_data/outreach/2011 Sea Grant_ PIER_ Annual Report.pdf diff --git a/indicator_processing/non_automated/outreach/2012 Sea Grant_ PIER_ Annual Report.pdf b/indicator_data/outreach/2012 Sea Grant_ PIER_ Annual Report.pdf similarity index 100% rename from indicator_processing/non_automated/outreach/2012 Sea Grant_ PIER_ Annual Report.pdf rename to indicator_data/outreach/2012 Sea Grant_ PIER_ Annual Report.pdf diff --git a/indicator_processing/non_automated/outreach/2013 Sea Grant_ PIER_ Annual Report.pdf b/indicator_data/outreach/2013 Sea Grant_ PIER_ Annual Report.pdf similarity index 100% rename from indicator_processing/non_automated/outreach/2013 Sea Grant_ PIER_ Annual Report.pdf rename to indicator_data/outreach/2013 Sea Grant_ PIER_ Annual Report.pdf diff --git a/indicator_processing/non_automated/outreach/2014 Sea Grant_ PIER_ Annual Report.pdf b/indicator_data/outreach/2014 Sea Grant_ PIER_ Annual Report.pdf similarity index 100% rename from indicator_processing/non_automated/outreach/2014 Sea Grant_ PIER_ Annual Report.pdf rename to indicator_data/outreach/2014 Sea Grant_ PIER_ Annual Report.pdf diff --git a/indicator_processing/non_automated/outreach/2015 Sea Grant_ PIER_ Annual Report.pdf b/indicator_data/outreach/2015 Sea Grant_ PIER_ Annual Report.pdf similarity index 100% rename from indicator_processing/non_automated/outreach/2015 Sea Grant_ PIER_ Annual Report.pdf rename to indicator_data/outreach/2015 Sea Grant_ PIER_ Annual Report.pdf diff --git a/indicator_processing/non_automated/outreach/2016 Sea Grant_ PIER_ Annual Report.pdf b/indicator_data/outreach/2016 Sea Grant_ PIER_ Annual Report.pdf similarity index 100% rename from indicator_processing/non_automated/outreach/2016 Sea Grant_ PIER_ Annual Report.pdf rename to indicator_data/outreach/2016 Sea Grant_ PIER_ Annual Report.pdf diff --git a/indicator_processing/non_automated/outreach/2017 Sea Grant_ PIER_ Annual Report.pdf b/indicator_data/outreach/2017 Sea Grant_ PIER_ Annual Report.pdf similarity index 100% rename from indicator_processing/non_automated/outreach/2017 Sea Grant_ PIER_ Annual Report.pdf rename to indicator_data/outreach/2017 Sea Grant_ PIER_ Annual Report.pdf diff --git a/indicator_processing/non_automated/outreach/2018 Sea Grant_ PIER_ Annual Report.pdf b/indicator_data/outreach/2018 Sea Grant_ PIER_ Annual Report.pdf similarity index 100% rename from indicator_processing/non_automated/outreach/2018 Sea Grant_ PIER_ Annual Report.pdf rename to indicator_data/outreach/2018 Sea Grant_ PIER_ Annual Report.pdf diff --git a/indicator_processing/non_automated/outreach/2019 Sea Grant_ PIER_ Annual Report.pdf b/indicator_data/outreach/2019 Sea Grant_ PIER_ Annual Report.pdf similarity index 100% rename from indicator_processing/non_automated/outreach/2019 Sea Grant_ PIER_ Annual Report.pdf rename to indicator_data/outreach/2019 Sea Grant_ PIER_ Annual Report.pdf diff --git a/indicator_processing/non_automated/outreach/2020 Sea Grant_ PIER_ Annual Report.pdf b/indicator_data/outreach/2020 Sea Grant_ PIER_ Annual Report.pdf similarity index 100% rename from indicator_processing/non_automated/outreach/2020 Sea Grant_ PIER_ Annual Report.pdf rename to indicator_data/outreach/2020 Sea Grant_ PIER_ Annual Report.pdf diff --git a/indicator_objects/outreach.RData b/indicator_objects/outreach.RData new file mode 100644 index 0000000000000000000000000000000000000000..c1de894b4df4b534bd0ba8b1152a765fae1cc230 GIT binary patch literal 488 zcmV&N7#LWXfE-2! z76wj`g3P=WASn!13!y>M0ua4mB~rdYt^o=_b*056MU@H#iA5!u$(aR-c_qbA6(Y%{ zxurRYC7ETZ3h70ODW!=esW3SPG}F|AQxn~bfSMJ`^NX^JGx7`cb5m1GGV{{0nXG_h za$-qIYF#XEQV=- z338<*mL%$>6#>mcQ_fkGU#5q_*W_Sk;X_!vFGf{{Rb0kQx;Kz>+juPGVAO4lK!X0dq=ba$-q-5ln;w e=$F)-%;FLpe&vPh0C^hfn129}r