Introduction

This R Markdown document is designed to transform data that is not in CWP format into CWP format. Initially, it changes the format of the data; subsequently, it maps the data to adhere to CWP standards. This markdown is created from a function so the documentation keep the format of roxygen2 skeleton A summary of the mapping process is provided. The path to the dataset is specified, you will find on this same repository on github the first line of each dataset. The datasets are named after the historical name provided by tRFMOs while exporting and may change. The information provided in the Rmd allows to understand correctly which dataset should be used in this markdown. Additional operations are performed next to verify other aspects of the data, such as the consistency of the geolocation, the values, and the reported catches in numbers and tons. If you are interested in further details, the results and codes are available for review.

path_to_raw_dataset_catch <- here::here('tunaatlas_scripts/pre-harmonization', 'iattc', 'catch', 'data', 'PublicCatchOrigFormatTunaBillfish.csv')
path_to_raw_dataset_effort <- here::here('tunaatlas_scripts/pre-harmonization', 'iattc', 'catch', 'data', 'PublicEffortOrigFormatTunaBillfish.csv')

Harmonize IATTC Longline Catch Datasets

This function processes and harmonizes Inter-American Tropical Tuna Commission (IATTC) longline catch datasets for shark and tuna_billfish species. It prepares the data for integration into the Tuna Atlas database, ensuring compliance with data standardization requirements and optionally including metadata if the dataset is intended for database loading.

@return None; the function outputs files directly, including harmonized datasets, optional metadata, and code lists for integration within the Tuna Atlas database.

@details The function restructures the dataset to include only essential fields, performs any necessary calculations for catch units, and standardizes the format for date fields and geographical identifiers. Metadata integration is contingent on the final use of the dataset within the Tuna Atlas database.

@importFrom readr read_csv write_csv @importFrom dplyr filter mutate left_join @importFrom tidyr gather @importFrom reshape2 melt @seealso for specific data structuring operations. @export @keywords IATTC, tuna, fisheries, data harmonization, longline catch @author Paul Taconet, IRD @author Bastien Grasset, IRD Catch input data sample: Record Spp DTypeID Number Weight 11407 ALB 2 17 NA 11407 BUM 2 4 NA 11407 BLM 2 2 NA 11407 SWO 2 10 NA 11407 BET 2 403 NA 11407 BIL 2 1 NA Catch: final data sample: FishingFleet Gear time_start time_end AreaName School Species CatchType CatchUnits Catch USA LL 1992-07-01 1992-08-01 6425135 ALL BSH ALL NO 4 USA LL 1993-04-01 1993-05-01 6425135 ALL BSH ALL NO 75 USA LL 1993-04-01 1993-05-01 6430135 ALL BSH ALL NO 15 USA LL 1993-05-01 1993-06-01 6425135 ALL BSH ALL NO 24 USA LL 1994-03-01 1994-04-01 6425135 ALL BSH ALL NO 14 USA LL 1994-03-01 1994-04-01 6430135 ALL BSH ALL NO 4 packages

if(!require(reshape)){
  install.packages("reshape")
  require(reshape)
}
if(!require(dplyr)){
  install.packages("dplyr")
  require(dplyr)
}
if(!require(tidyr)){
  install.packages("tidyr")
  require(tidyr)
}

Historical name for the dataset at source PublicCatchOrigFormatTunaBillfish.csv or PublicCatchOrigFormatShark.csv Historical name for the dataset at source PublicEffortOrigFormatTunaBillfish.csv or PublicEffortOrigFormatShark.csv (keep the same order as for catch)

opts <- options()
options(encoding = "UTF-8")

Catches

catches<-read.csv(path_to_raw_dataset_catch, stringsAsFactors = F)
efforts<-read.csv(path_to_raw_dataset_effort, stringsAsFactors = F)
catches <- catches %>% tidyr::gather(variable, value, -c("Record","Spp","DTypeID"))

remove values=0

catches <- catches  %>% 
  dplyr::filter( ! value %in% 0 ) %>%
  dplyr::filter( ! is.na(value)) 

Set catchunit values DType code 1 means that the data was submitted in both weight and number for the same catch DType code 2 means that the data was submitted as number only DType code 3 means that the data was submitted as weight only Initialisation

catches$CatchUnits<-'NA'

index.CatchunitMT<-which(catches[,"DTypeID"]==3 & catches[,"variable"]=="Weight")
index.CatchunitNO<-which(catches[,"DTypeID"]==2 & catches[,"variable"]=="Number")
index.CatchunitMTNO<-which(catches[,"DTypeID"]==1 & catches[,"variable"]=="Weight")
index.CatchunitNOMT<-which(catches[,"DTypeID"]==1 & catches[,"variable"]=="Number")

catches$CatchUnits[index.CatchunitMT]<-"t"
catches$CatchUnits[index.CatchunitNO]<-"no"
catches$CatchUnits[index.CatchunitMTNO]<-"MTNO"
catches$CatchUnits[index.CatchunitNOMT]<-"NOMT"

Merge catches and efforts to have the strata in the catch file

catches<-left_join(catches,efforts,by=c("Record"))
catches <- catches[c("Spp","value","CatchUnits","Year","Month","FlagAbv","Lat","Lon")]

colnames(catches)<-c("variable","value","CatchUnits","Year","Month","Flag","Lat","Lon")

catches$SquareSize<-5
catches$CodeSquareSize<-6
catches$Gear<-"LL"
catches$SetType<-"ALL"

catches$variable[which(catches[,"variable"]=="BuM")]<-"BUM"

colnames(catches)[colnames(catches)=="Flag"] <- "FishingFleet"

Reach the catches harmonized DSD using a function in IATTC_functions.R

colToKeep_captures <- c("FishingFleet","Gear","time_start","time_end","AreaName","School","Species","CatchType","CatchUnits","Catch")
source("https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/R/sardara_functions/IATTC_CE_catches_pivotDSD_to_harmonizedDSD.R")
catches<-IATTC_CE_catches_pivotDSD_to_harmonizedDSD(catches,colToKeep_captures)

colnames(catches)<-c("fishing_fleet","gear_type","time_start","time_end","geographic_identifier","fishing_mode","species","measurement_type","measurement_unit","measurement_value")
catches$source_authority<-"IATTC"
catches$measurement_type <- "RC" # Retained catches
catches$measurement <- "catch"
catches$time_start <- as.Date(catches$time_start)
catches$time_end <- as.Date(catches$time_end)
dataset_temporal_extent <- paste(
    paste0(format(min(catches$time_start), "%Y"), "-01-01"),
    paste0(format(max(catches$time_end), "%Y"), "-12-31"),
    sep = "/"
)

output_name_dataset <- "Dataset_harmonized.csv"
write.csv(catches, output_name_dataset, row.names = FALSE)
georef_dataset <- catches

@ Load pre-harmonization scripts and apply mappings

download.file('https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/R/tunaatlas_scripts/pre-harmonization/map_codelists_no_DB.R', destfile = 'local_map_codelists_no_DB.R')
source('local_map_codelists_no_DB.R')
fact <- "catch"
mapping_codelist <- map_codelists_no_DB(fact, mapping_dataset = "https://raw.githubusercontent.com/fdiwg/fdi-mappings/main/global/firms/gta/codelist_mapping_rfmos_to_global.csv", dataset_to_map = georef_dataset, mapping_keep_src_code = FALSE, summary_mapping = TRUE, source_authority_to_map = c("IATTC", "CCSBT", "WCPFC"))
## 
##  mapping dimension gear_type with code list mapping
##  mapping dimension species with code list mapping
##  mapping dimension fishing_fleet with code list mapping
##  mapping dimension fishing_mode with code list mapping
##  mapping dimension measurement_type with code list mapping

@ Handle unmapped values and save the results

georef_dataset <- mapping_codelist$dataset_mapped %>% dplyr::mutate(fishing_fleet = ifelse(fishing_fleet == 'UNK', 'NEI', fishing_fleet), gear_type = ifelse(gear_type == 'UNK', '99.9', gear_type))
fwrite(mapping_codelist$recap_mapping, 'recap_mapping.csv')
fwrite(mapping_codelist$not_mapped_total, 'not_mapped_total.csv')
fwrite(georef_dataset, 'CWP_dataset.csv')

Display the first few rows of the mapping summaries

print(head(mapping_codelist$recap_mapping))
## # A tibble: 6 × 5
##   src_code trg_code src_codingsystem trg_codingsystem   source_authority
##   <chr>    <chr>    <chr>            <chr>              <chr>           
## 1 BLZ      BLZ      flag_iattc       fishingfleet_firms IATTC           
## 2 CHN      CHN      flag_iattc       fishingfleet_firms IATTC           
## 3 ESP      EUESP    flag_iattc       fishingfleet_firms IATTC           
## 4 JPN      JPN      flag_iattc       fishingfleet_firms IATTC           
## 5 KOR      KOR      flag_iattc       fishingfleet_firms IATTC           
## 6 MEX      MEX      flag_iattc       fishingfleet_firms IATTC
print(head(mapping_codelist$not_mapped_total))
##   Value source_authority        Dimension
## 1   ALL            IATTC     fishing_mode
## 2    RC            IATTC measurement_type