merging shellcatch database

Gulf-IEA · Jun 2, 2023 · 127fb44 · 127fb44
1 parent b3d9303
commit 127fb44
Show file tree

Hide file tree

Showing 6 changed files with 762 additions and 20 deletions.
diff --git a/indicator_processing/fishery_dependent/INDICATOR_gini.R b/indicator_processing/fishery_dependent/INDICATOR_gini.R
@@ -14,11 +14,11 @@ calcGini <- function(vec) {
 
 # input data for Puerto Rico ---------------------------
 setwd("C:/Users/mandy.karnauskas/Desktop/Caribbean-ESR/indicator_processing/fishery_dependent/")
-dat <- read.csv("C:/Users/mandy.karnauskas/Desktop/CONFIDENTIAL/CaribbeanData/Jun2022/PR_landings_83_20.csv")
+dat <- read.csv("C:/Users/mandy.karnauskas/Desktop/CONFIDENTIAL/CaribbeanData/Jun2022/PR_landings_83_20_wSC.csv")
 
 # define start and end years ---------------------------
 styear <- 2012
-enyear <- 2020
+enyear <- 2021
 
 d <- dat[which(dat$YEAR_LANDED >= styear & dat$YEAR_LANDED <= enyear), ]
 

diff --git a/indicator_processing/fishery_dependent/INDICATOR_total_landings.R b/indicator_processing/fishery_dependent/INDICATOR_total_landings.R
@@ -5,18 +5,17 @@ rm(list = ls())
 
 # input data for Puerto Rico ---------------------------
 setwd("C:/Users/mandy.karnauskas/Desktop/Caribbean-ESR/indicator_processing/fishery_dependent/")
-dat <- read.csv("C:/Users/mandy.karnauskas/Desktop/CONFIDENTIAL/CaribbeanData/Jun2022/PR_landings_83_20.csv")
+dat <- read.csv("C:/Users/mandy.karnauskas/Desktop/CONFIDENTIAL/CaribbeanData/Jun2022/PR_landings_83_20_wSC.csv")
 
 table(dat$YEAR_LANDED)
 
 # check multiplier adjustments -------------------------
 
 dat$xADJ <- dat$POUNDS_LANDED * 1/dat$CORRECTION_FACTOR
-table(round(dat$xADJ) == round(dat$ADJUSTED_POUNDS))
-#plot(dat$xADJ, dat$ADJUSTED_POUNDS)
-table(round(dat$xADJ) == round(dat$ADJUSTED_POUNDS), dat$YEAR_LANDED)
-
-dat$ADJUSTED_POUNDS <- dat$xADJ
+hist(dat$ADJUSTED_POUNDS - dat$xADJ)
+max(abs(dat$ADJUSTED_POUNDS - dat$xADJ), na.rm = T)
+table(round(dat$ADJUSTED_POUNDS) - round(dat$xADJ))
+#dat$ADJUSTED_POUNDS <- dat$xADJ
 
 # define start and end years ---------------------------
 styear <- 1990
@@ -47,7 +46,7 @@ table(d$ITIS_COMMON_NAME, d$sppgrp)
 totland_pr <- tapply(d$ADJUSTED_POUNDS, list(d$YEAR_LANDED, d$sppgrp), sum, na.rm = T) / 10^3
 dim(totland_pr)
 totland_pr
-matplot(totland_pr, type = "l")
+matplot(totland_pr, type = "l", lty = 1, lwd = 2)
 
 ls()
 
@@ -107,10 +106,9 @@ class(s) <- "indicatordata"
 setwd("C:/Users/mandy.karnauskas/Desktop/Caribbean-ESR/indicator_plots/")
 
 plotIndicatorTimeSeries(s, coltoplot = 1:6, plotrownum = 2, sublabel = T, sameYscale = F, 
-                        widadj = 0.8, hgtadj = 0.7,
-                        outtype = "png")
+                        widadj = 0.8, hgtadj = 0.7, trendAnalysis = F)   # outtype = "png")
 
-inddata <- s
-save(inddata, file = "C:/Users/mandy.karnauskas/Desktop/Caribbean-ESR/indicator_objects/landings.RData")
+#inddata <- s
+#save(inddata, file = "C:/Users/mandy.karnauskas/Desktop/Caribbean-ESR/indicator_objects/landings.RData")
 
 
diff --git a/indicator_processing/fishery_dependent/PREPROCESS_build_reference_file.R b/indicator_processing/fishery_dependent/PREPROCESS_build_reference_file.R
@@ -5,13 +5,15 @@ library(rfishbase)
 
 setwd("C:/Users/mandy.karnauskas/Desktop/CONFIDENTIAL/CaribbeanData/Jun2022/")
 
-# concatenate landings data files -----------------------
+# concatenate PR logbook data files  -----------------------
+# ONLY NEED TO DO THIS ONCE! 
+
 lis <- dir()[grep("PR_landings", dir())]
 lis
-d1 <- read.csv(lis[1])
-d2 <- read.csv(lis[2])
-d3 <- read.csv(lis[3])
-d4 <- read.csv(lis[4])
+#d1 <- read.csv(lis[1])
+#d2 <- read.csv(lis[2])
+#d3 <- read.csv(lis[3])
+#d4 <- read.csv(lis[4])
 
 table(names(d1) == names(d2))
 table(names(d1) == names(d3))
@@ -21,7 +23,150 @@ d <- rbind(d3, d4, d1, d2)
 names(d)
 head(d)
 
-write.table(d, file = "PR_landings_83_20.csv", sep = ",", col.names = T, row.names = F)
+#write.table(d, file = "PR_landings_83_20.csv", sep = ",", col.names = T, row.names = F)
+
+
+
+# create file to match names in logbook and shellcatch ---------------
+# DONE 06/02/2023 - don't redo until new shellcatch pull 
+
+sc <- read.csv("C:/Users/mandy.karnauskas/Desktop/CONFIDENTIAL/CaribbeanData/Jun2022/shellcatch_pr_data_req_02152023_C.csv")   # original shellcatch data
+apply(sc[1:4], 2, table)
+
+dat <- read.csv("C:/Users/mandy.karnauskas/Desktop/CONFIDENTIAL/CaribbeanData/Jun2022/PR_landings_83_20.csv")
+
+head(sc)
+head(dat)
+names(sc) %in% names(dat)
+d <- data.frame(as.matrix(table(dat$ITIS_COMMON_NAME)))  # data frame for matching
+names(d) <- "N"
+d$V2 <- rownames(d)
+d$V3 <- NA
+d$V4 <- NA
+head(d)
+
+for (i in 1:nrow(d)) {    # reverse names for matching
+  b <- unlist(strsplit(d$V2[i], ","))
+    if (length(b) == 1) { b1 <- paste(b[1])         }  
+    if (length(b) == 2) { b1 <- paste(b[2], b[1])   }     
+    if (length(b) == 3) { b1 <- paste(b[3], b[2], b[1]) } 
+  d$V3[i] <- b1           }
+
+sclis <- as.character(unique(sc$SPECIES_NM))
+head(d)
+dim(d)
+length(unique(d$V2))
+length(sclis)
+
+d$V4 <- sclis[match(d$V3, sclis)]  # matching
+sort(d$V4)
+length(sclis[-which(sclis %in% d$V4)])
+mis <- sclis[-which(sclis %in% d$V4)]
+
+for (i in 1:length(mis)) {      # matching
+  d$V4[grep(mis[i], d$V3)] <- mis[i]  } 
+
+summary(is.na(d$V4))
+length(sclis)
+sclis[-which(sclis %in% d$V4)]
+length(sclis[-which(sclis %in% d$V4)])
+
+d$V5 <- c(sclis[-which(sclis %in% d$V4)], rep(NA, nrow(d) - length(sclis[-which(sclis %in% d$V4)])))
+names(d) <- c("N", "logbook", "adj", "shellcatch", "unmatched")
+
+# csv file for manual edits to match names ------------------
+#write.table(d, file = "C:/Users/mandy.karnauskas/Desktop/Caribbean-ESR/indicator_processing/fishery_dependent/name_matches.csv", 
+#            sep = ",", col.names = T, row.names = F)
+
+# output and then manually match up remaining names - this takes time
+# any species not in logbook, need to look up and insert into spp_ref file
+
+
+# merge shellcatch with logbook -----------------------
+# DONE 06/02/2023
+
+rm(list = ls())
+setwd("C:/Users/mandy.karnauskas/Desktop/Caribbean-ESR/indicator_processing/fishery_dependent")
+
+sc <- read.csv("C:/Users/mandy.karnauskas/Desktop/CONFIDENTIAL/CaribbeanData/Jun2022/shellcatch_pr_data_req_02152023_C.csv")   # original shellcatch data
+sc$POUNDS_LANDED2 <- as.numeric(as.vector(sc$POUNDS_LANDED))
+sc$ADJUSTED_POUNDS2 <- as.numeric(as.vector(sc$ADJUSTED_POUNDS))
+sc$CORRECTION_FACTOR <- as.numeric(as.vector(sc$CORRECTION_FACTOR))
+sc$PRICE <- as.numeric(as.vector(sc$PRICE))
+summary(sc$POUNDS_LANDED == sc$POUNDS_LANDED2)
+summary(sc$ADJUSTED_POUNDS == sc$ADJUSTED_POUNDS2)
+
+apply(sc[1:4], 2, table, useNA = "always")
+table(sc$LANDING_AREA_COUNTY_OR_MUNICIPALITY)
+table(sc$COAST)
+table(sc$GEAR_NAME)
+hist(sc$POUNDS_LANDED2)
+hist(sc$ADJUSTED_POUNDS2)
+table(sc$CORRECTION_FACTOR)
+co <- sc$POUNDS_LANDED2 / sc$CORRECTION_FACTOR
+table(round(co- sc$ADJUSTED_POUNDS2))
+hist(sc$PRICE)
+table(sc$AREA_CD1)
+
+sc$SPECIES_NM <- as.character(sc$SPECIES_NM)
+
+# replace duplicate names -----------------------
+sc$SPECIES_NM[which(sc$SPECIES_NM == "CROAKERS")] <- "CROAKER"
+sc$SPECIES_NM[which(sc$SPECIES_NM == "HERRING")] <- "THREAD HERRING"
+sc$SPECIES_NM[which(sc$SPECIES_NM == "SMOOTHTAIL SPINY LOBSTER")] <- "SPINY LOBSTER"
+sc$SPECIES_NM[which(sc$SPECIES_NM == "KING MACKAREL, KINGFISH")] <- "KINGFISH MACKEREL"
+sc$SPECIES_NM[which(sc$SPECIES_NM == "WENCHMAN")] <- "CARDINAL"
+sc$SPECIES_NM[which(sc$SPECIES_NM == "SPOTTED TRUNKFISH")] <- "TRUNKFISH"
+sc$SPECIES_NM[which(sc$SPECIES_NM == "SMOOTH TRUNKFISH")] <- "TRUNKFISH"
+sc$SPECIES_NM[which(sc$SPECIES_NM == "SEA BASSES")] <- "GROUPERS"
+
+nam <- read.csv("name_matches_edited.csv")
+
+dim(sc)
+summary(sc$SPECIES_NM %in% nam$shellcatch)
+sc[which((sc$SPECIES_NM %in% nam$shellcatch) == "FALSE"), ]
+sc <- sc[-which(sc$SPECIES_NM  == "SELECT TO MANUALLY INPUT"), ]
+summary(sc$SPECIES_NM %in% nam$shellcatch)
+
+sc[which(sc$SPECIES_NM == "WARMOUTH BASS"),]
+sc <- sc[-which(sc$SPECIES_NM == "WARMOUTH BASS"),]
+dim(sc)
+
+match(sc$SPECIES_NM, nam$shellcatch)
+table(match(sc$SPECIES_NM, nam$shellcatch))
+sc$logname <- as.character(nam$logbook[match(sc$SPECIES_NM, nam$shellcatch)])
+cbind(sc$logname, sc$SPECIES_NM)
+
+# merge with logbook -------------------------------
+dat <- read.csv("C:/Users/mandy.karnauskas/Desktop/CONFIDENTIAL/CaribbeanData/Jun2022/PR_landings_83_20.csv")
+
+head(dat)
+head(sc)
+
+#                   VESSEL     YEAR_LANDED  MONTH_LANDED    DAY_LANDED FISHING_CENTER_ED   FISHING_CENTER_NAME       MUNICIPALITY  
+sclog <- data.frame(sc$ISLAND, sc$TRIP_YEAR, sc$TRIP_MONTH, sc$TRIP_DAY, NA, sc$LANDING_AREA_COUNTY_OR_MUNICIPALITY, sc$LANDING_AREA_COUNTY_OR_MUNICIPALITY, 
+#AREA_FISHED1 AREA_FISHED2 AREA_FISHED3 AREA_FISHED4 FIN_GEAR_CODE  FIN_GEAR_NAME PR_ID_CODE_ED        
+ sc$AREA_CD1, sc$AREA_CD1, sc$AREA_CD1, sc$AREA_CD1, NA,            sc$GEAR_NAME, sc$LICENSE, 
+#NUMBER_OF_TRIPS_ED   GEAR_QTY_ED   GEAR_HOURS_ED   MINIMUM_DEPTH_ED   MAXIMUM_DEPTH_ED  SPECIES_ITIS  ITIS_COMMON_NAME
+NA, NA, NA, NA, NA, NA, sc$logname, 
+#ITIS_SCIENTIFIC_NAME  POUNDS_LANDED      VALUE_IN_DOLLARS  CORRECTION_FACTOR     ADJUSTED_POUNDS PRICE_PER_LB  DISTANCE DISTANCE_DESCRIBE TRIP_TICKET_NUMBER_ED            
+              NA,      sc$POUNDS_LANDED2, sc$PRICE,         sc$CORRECTION_FACTOR, sc$ADJUSTED_POUNDS2, sc$PRICE/sc$ADJUSTED_POUNDS2, NA, NA, sc$FISHING_TRIP_ID)
+
+dim(dat)
+dim(sclog)
+
+cbind(names(dat), names(sclog))
+names(sclog) <- names(dat)
+
+dat2 <- rbind(dat, sclog)
+head(dat2)
+dim(sclog) + dim(dat)
+dim(dat2)
+
+#write.table(dat2, file = "C:/Users/mandy.karnauskas/Desktop/CONFIDENTIAL/CaribbeanData/Jun2022/PR_landings_83_20_wSC.csv", 
+#            sep = ",", col.names = T, row.names = F)
+
+d <- dat2
 
 # take a look at data fields ----------------------------
 table(d$VESSEL, useNA = "always")
@@ -229,4 +374,4 @@ ref$USVI[which(ref$COMname %in% sp)] <- 1
 ref$newnames <- NA
 ref$newnames[1:length(sp[-which(sp %in% ref$COMname)])] <- sp[-which(sp %in% ref$COMname)]
 
-write.csv(ref, file = "spp_ref_STX.csv", row.names = F)
+write.csv(ref, file = "spp_ref_STX.csv", row.names = F)