Introduction

This R Markdown document is designed to transform data that is not in CWP format into CWP format. Initially, it changes the format of the data; subsequently, it maps the data to adhere to CWP standards. This markdown is automatically created from the function: https://raw.githubusercontent.com/eblondel/geoflow-tunaatlas/master/R/tunaatlas_scripts/pre-harmonization/east_pacific_ocean_catch_1deg_1m_ps_tunaatlasiattc_level0__byflag.R, the documentation keeps the format of roxygen2 skeleton.

A summary of the mapping process is provided. The path to the dataset is specified. You will find on this same repository on GitHub the first line of each dataset. The datasets are named after the historical name provided by tRFMOs while exporting and may change. The information provided in the Rmd allows understanding correctly which dataset should be used in this markdown.

Additional operations are performed next to verify other aspects of the data, such as the consistency of the geolocation, the values, and the reported catches in numbers and tons.

If you are interested in further details, the results and codes are available for review.

Each .Rmd script requires the user to knit the dataset at the beginning of the script in order to execute the harmonization process correctly. It is also possible to run the code chunk by chunk but be sure to be in the correct working directory (i.e., the one of the .Rmd).

path_to_raw_dataset <- here::here('R/tunaatlas_scripts/pre-harmonization', 'iattc', 'catch', 'data', 'PublicPSTunaFlag.csv')

Harmonize IATTC PS ByFlag Catch Datasets

Harmonizes the structure of IATTC PS (Purse Seine) catch datasets by flag. This function is designed to adjust catch data to fit standardized formats required for integration into the Tuna Atlas database. The process involves reformatting the data and possibly integrating metadata and code lists if they will be loaded into the Tuna Atlas database. This script works with any data that has the first 5 columns named and ordered as follow: {Year|Month|Flag|LatC1|LonC1|NumSets}

@details The function reads raw data, processes it according to specified stratifications such as ‘PublicPSBillfishFlag’, ‘PublicPSTunaFlag’, and ‘PublicPSSharkFlag’, and then outputs a harmonized dataset. It can conditionally include metadata and code lists based on whether the data is intended for database loading.

@return Does not return anything; it outputs files directly to the specified location within the geoflow system.

@importFrom dplyr filter mutate @importFrom readr read_csv write_csv @seealso for other conversions related to IATTC data, to standardize time formatting for database integration. @export @author Paul Taconet, IRD @author Bastien Grasset, IRD @keywords IATTC, tuna, fisheries, data harmonization, catch data

  source("https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/R/sardara_functions/FUN_catches_IATTC_CE_Flag_or_SetType.R")

packages

  if(!require(data.table)){
    install.packages("data.table")
    require(data.table)
  }
  if(!require(dplyr)){
    install.packages("dplyr")
    require(dplyr)
  }
  if(!require(reshape2)){
    install.packages("reshape2")
    require(reshape2)
  }

Historical name for the dataset at source PublicPSTunaFlag.csv or PublicPSBillfishFlag.csv

opts <- options()
options(encoding = "UTF-8")

Catches

catches_pivot_IATTC <-FUN_catches_IATTC_CE_Flag_or_SetType(path_to_raw_dataset,"Flag","PS")
catches_pivot_IATTC$NumSets<-NULL

colToKeep_captures <- c("FishingFleet","Gear","time_start","time_end","AreaName","School","Species","CatchType","CatchUnits","Catch")
source("https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/R/sardara_functions/IATTC_CE_catches_pivotDSD_to_harmonizedDSD.R")
catches<-IATTC_CE_catches_pivotDSD_to_harmonizedDSD(catches_pivot_IATTC,colToKeep_captures)

colnames(catches)<-c("fishing_fleet","gear_type","time_start","time_end","geographic_identifier","fishing_mode","species","measurement_type","measurement_unit","measurement_value")
catches$source_authority<-"IATTC"
catches$measurement_type <- "RC" # Retained catches
catches$measurement <- "catch"
catches$measurement_processing_level <- "unknown"
catches$time_start <- as.Date(catches$time_start)
catches$time_end <- as.Date(catches$time_end)
dataset_temporal_extent <- paste(
    paste0(format(min(catches$time_start), "%Y"), "-01-01"),
    paste0(format(max(catches$time_end), "%Y"), "-12-31"),
    sep = "/"
)

output in same folder as path_to_raw_dataset

output_name_dataset <- here::here('R/tunaatlas_scripts/pre-harmonization', 'iattc', 'catch', 'data', 'PublicPSTunaFlag_harmonized.csv')

write.csv(catches, output_name_dataset, row.names = FALSE)
georef_dataset <- catches

@ Load pre-harmonization scripts and apply mappings

download.file('https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/R/tunaatlas_scripts/pre-harmonization/map_codelists_no_DB.R', destfile = 'local_map_codelists_no_DB.R')
source('local_map_codelists_no_DB.R')
fact <- "catch"
mapping_codelist <- map_codelists_no_DB(fact, mapping_dataset = "https://raw.githubusercontent.com/fdiwg/fdi-mappings/main/global/firms/gta/codelist_mapping_rfmos_to_global.csv", dataset_to_map = georef_dataset, mapping_keep_src_code = FALSE, summary_mapping = TRUE, source_authority_to_map = c("IATTC", "CCSBT", "WCPFC"))
## 
##  mapping dimension gear_type with code list mapping
## 
##  mapping dimension species with code list mapping
## 
##  mapping dimension fishing_fleet with code list mapping
## 
##  mapping dimension fishing_mode with code list mapping

@ Handle unmapped values and save the results

georef_dataset <- mapping_codelist$dataset_mapped %>% dplyr::mutate(fishing_fleet = ifelse(fishing_fleet == 'UNK', 'NEI', fishing_fleet), gear_type = ifelse(gear_type == 'UNK', '99.9', gear_type))
data.table::fwrite(mapping_codelist$recap_mapping, here::here('R/tunaatlas_scripts/pre-harmonization', 'iattc', 'catch', 'data', 'PublicPSTunaFlag_recap_mapping.csv'))
data.table::fwrite(mapping_codelist$not_mapped_total, here::here('R/tunaatlas_scripts/pre-harmonization', 'iattc', 'catch', 'data', 'PublicPSTunaFlag_not_mapped_total.csv'))
data.table::fwrite(georef_dataset, here::here('R/tunaatlas_scripts/pre-harmonization', 'iattc', 'catch', 'data', 'PublicPSTunaFlag_CWP_dataset.csv'))

Display the first few rows of the mapping summaries

print(head(mapping_codelist$recap_mapping))
## # A tibble: 6 × 5
##   src_code trg_code src_codingsystem trg_codingsystem   source_authority
##   <chr>    <chr>    <chr>            <chr>              <chr>           
## 1 MEX      MEX      flag_iattc       fishingfleet_firms IATTC           
## 2 USA      USA      flag_iattc       fishingfleet_firms IATTC           
## 3 ALB      ALB      species_iattc    species_asfis      IATTC           
## 4 BZX      BZX      species_iattc    species_asfis      IATTC           
## 5 PBF      PBF      species_iattc    species_asfis      IATTC           
## 6 SKJ      SKJ      species_iattc    species_asfis      IATTC