Introduction

This R Markdown document is designed to transform data that is not in CWP format into CWP format. Initially, it changes the format of the data; subsequently, it maps the data to adhere to CWP standards. This markdown is automatically created from the function: https://raw.githubusercontent.com/eblondel/geoflow-tunaatlas/master/R/tunaatlas_scripts/pre-harmonization/east_pacific_ocean_catch_1deg_1m_ps_tunaatlasiattc_level0__shark_byflag.R, the documentation keeps the format of roxygen2 skeleton.

A summary of the mapping process is provided. The path to the dataset is specified. You will find on this same repository on GitHub the first line of each dataset. The datasets are named after the historical name provided by tRFMOs while exporting and may change. The information provided in the Rmd allows understanding correctly which dataset should be used in this markdown.

Additional operations are performed next to verify other aspects of the data, such as the consistency of the geolocation, the values, and the reported catches in numbers and tons.

If you are interested in further details, the results and codes are available for review.

Each .Rmd script requires the user to knit the dataset at the beginning of the script in order to execute the harmonization process correctly. It is also possible to run the code chunk by chunk but be sure to be in the correct working directory (i.e., the one of the .Rmd).

path_to_raw_dataset <- here::here('R/tunaatlas_scripts/pre-harmonization', 'iattc', 'catch', 'data', 'PublicPSSharkFlag.csv')

Harmonize IATTC PS Shark ByFlag Catch Datasets

This function harmonizes the structure of IATTC PS (Purse Seine) shark catch datasets by flag, preparing them for integration into the Tuna Atlas database. It ensures that only the essential fields are retained and that metadata is included if the dataset is destined for the database.

@return None; this function outputs files directly, including harmonized datasets, optional metadata, and code lists for integration within the Tuna Atlas database.

@details The function processes the data based on flag categories specifically for shark species. It involves cleaning, restructuring, and harmonizing data fields to meet specified standards. Metadata integration is optional and contingent on the destination of the dataset.

@importFrom dplyr select mutate @importFrom readr read_csv write_csv @seealso for the specific processing of shark catches by flag, for general data structuring. @export @author Paul Taconet, IRD @keywords IATTC, tuna, fisheries, data harmonization, shark catch ’# This script works with any data that has the first 5 columns named and ordered as follow: {Year|Month|Flag|LatC1|LonC1|NumSets} packages

  if(!require(data.table)){
    install.packages("data.table")
    require(data.table)
  }
  if(!require(dplyr)){
    install.packages("dplyr")
    require(dplyr)
  }
  if(!require(reshape2)){
    install.packages("reshape2")
    require(reshape2)
  }

Historical name for the dataset at source PublicPSSharkFlag.csv

opts <- options()
options(encoding = "UTF-8")

Catches

source("https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/R/sardara_functions/FUN_catches_IATTC_CE_Flag_or_SetType_Shark.R")
catches_pivot_IATTC <-FUN_catches_IATTC_CE_Flag_or_SetType_Shark(path_to_raw_dataset,"Flag","PS")
catches_pivot_IATTC$NumSets<-NULL

colToKeep_captures <- c("FishingFleet","Gear","time_start","time_end","AreaName","School","Species","CatchType","CatchUnits","Catch")
source("https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/R/sardara_functions/IATTC_CE_catches_pivotDSD_to_harmonizedDSD.R")
catches<-IATTC_CE_catches_pivotDSD_to_harmonizedDSD(catches_pivot_IATTC,colToKeep_captures)

colnames(catches)<-c("fishing_fleet","gear_type","time_start","time_end","geographic_identifier","fishing_mode","species","measurement_type","measurement_unit","measurement_value")
catches$source_authority<-"IATTC"
catches$measurement_type <- "RC" # Retained catches
catches$measurement <- "catch"
catches$measurement_processing_level <- "original_sample" # as collected by observers
catches$time_start <- as.Date(catches$time_start)
catches$time_end <- as.Date(catches$time_end)
dataset_temporal_extent <- paste(
    paste0(format(min(catches$time_start), "%Y"), "-01-01"),
    paste0(format(max(catches$time_end), "%Y"), "-12-31"),
    sep = "/"
)

output in same folder as path_to_raw_dataset

output_name_dataset <- here::here('R/tunaatlas_scripts/pre-harmonization', 'iattc', 'catch', 'data', 'PublicPSSharkFlag_harmonized.csv')

write.csv(catches, output_name_dataset, row.names = FALSE)
georef_dataset <- catches

@ Load pre-harmonization scripts and apply mappings

download.file('https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/R/tunaatlas_scripts/pre-harmonization/map_codelists_no_DB.R', destfile = 'local_map_codelists_no_DB.R')
source('local_map_codelists_no_DB.R')
fact <- "catch"
mapping_codelist <- map_codelists_no_DB(fact, mapping_dataset = "https://raw.githubusercontent.com/fdiwg/fdi-mappings/main/global/firms/gta/codelist_mapping_rfmos_to_global.csv", dataset_to_map = georef_dataset, mapping_keep_src_code = FALSE, summary_mapping = TRUE, source_authority_to_map = c("IATTC", "CCSBT", "WCPFC"))
## 
##  mapping dimension gear_type with code list mapping
## 
##  mapping dimension species with code list mapping
## 
##  mapping dimension fishing_fleet with code list mapping
## 
##  mapping dimension fishing_mode with code list mapping

@ Handle unmapped values and save the results

georef_dataset <- mapping_codelist$dataset_mapped %>% dplyr::mutate(fishing_fleet = ifelse(fishing_fleet == 'UNK', 'NEI', fishing_fleet), gear_type = ifelse(gear_type == 'UNK', '99.9', gear_type))
data.table::fwrite(mapping_codelist$recap_mapping, here::here('R/tunaatlas_scripts/pre-harmonization', 'iattc', 'catch', 'data', 'PublicPSSharkFlag_recap_mapping.csv'))
data.table::fwrite(mapping_codelist$not_mapped_total, here::here('R/tunaatlas_scripts/pre-harmonization', 'iattc', 'catch', 'data', 'PublicPSSharkFlag_not_mapped_total.csv'))
data.table::fwrite(georef_dataset, here::here('R/tunaatlas_scripts/pre-harmonization', 'iattc', 'catch', 'data', 'PublicPSSharkFlag_CWP_dataset.csv'))

Display the first few rows of the mapping summaries

print(head(mapping_codelist$recap_mapping))
## # A tibble: 6 × 5
##   src_code trg_code src_codingsystem trg_codingsystem   source_authority
##   <chr>    <chr>    <chr>            <chr>              <chr>           
## 1 COL      COL      flag_iattc       fishingfleet_firms IATTC           
## 2 ECU      ECU      flag_iattc       fishingfleet_firms IATTC           
## 3 MEX      MEX      flag_iattc       fishingfleet_firms IATTC           
## 4 OTR      NEI      flag_iattc       fishingfleet_firms IATTC           
## 5 PAN      PAN      flag_iattc       fishingfleet_firms IATTC           
## 6 USA      USA      flag_iattc       fishingfleet_firms IATTC