Introduction

This R Markdown document is designed to transform data that is not in CWP format into CWP format. Initially, it changes the format of the data; subsequently, it maps the data to adhere to CWP standards. This markdown is automatically created from the function: https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/refs/heads/master/R/tunaatlas_scripts/pre-harmonization/indian_ocean_effort_tunaatlasiotc_level0_EF_RAW_2026.R, the documentation keeps the format of roxygen2 skeleton.

A summary of the mapping process is provided. The path to the dataset is specified. You will find on this same repository on GitHub the first line of each dataset. The datasets are named after the historical name provided by tRFMOs while exporting and may change. The information provided in the Rmd allows understanding correctly which dataset should be used in this markdown.

Additional operations are performed next to verify other aspects of the data, such as the consistency of the geolocation, the values, and the reported catches in numbers and tons.

If you are interested in further details, the results and codes are available for review.

Each .Rmd script requires the user to knit the dataset at the beginning of the script in order to execute the harmonization process correctly. It is also possible to run the code chunk by chunk but be sure to be in the correct working directory (i.e., the one of the .Rmd).

path_to_raw_dataset <- here::here('R/tunaatlas_scripts/pre-harmonization', 'iotc', 'effort', 'data', 'EF_RAW.csv')

Harmonize IOTC Effort Dataset (standardized formatting)

This geoflow step reads an IOTC effort raw dataset and formats it into the harmonized Tuna Atlas structure (one record per stratum with standard fields),

@return None; the function outputs files directly and registers them as geoflow resources.

@details The formatting logic is implemented inline:

@importFrom dplyr mutate select case_when @export @keywords IOTC, tuna, fisheries, data harmonization, effort data @author Bastien Grasset, IRD

  if(!requireNamespace("dplyr", quietly = TRUE)){
    install.packages("dplyr")
  }
  library(dplyr)
  
  
  opts <- options()
  options(encoding = "UTF-8")
  on.exit(options(opts), add = TRUE)
  
  # ---- read raw ----
  effort_raw <- read.csv(path_to_raw_dataset, stringsAsFactors = FALSE)
  
  effort_raw <- effort_raw %>% dplyr::mutate(GEAR_CODE = ifelse(is.na(GEAR_CODE), "UNCL", GEAR_CODE)) #some NAs replaced by UNCL which is coding for Other/Unk
  
  
  # ---- format inline ----
  effort <- effort_raw %>%
    dplyr::mutate(
      time_start = as.Date(sprintf("%d-%02d-01", YEAR, MONTH_START)),
      
      time_end = as.Date(
        sprintf("%d-%02d-01", YEAR, MONTH_END)
      ) |>
        (\(x) as.Date(format(x + 32, "%Y-%m-01")) - 1)()
    ,
      
      # geography + gridtype
      geographic_identifier = as.character(FISHING_GROUND_CODE),
      gridtype = case_when(
        substr(as.character(FISHING_GROUND_CODE), 1, 1) == "5" ~ "1deg_x_1deg",
        substr(as.character(FISHING_GROUND_CODE), 1, 1) == "6" ~ "5deg_x_5deg",
        TRUE ~ "unknown"
      ),
      
      # core dimensions
      source_authority = "IOTC",
      gear_type        = as.character(GEAR_CODE),
      fishing_fleet    = as.character(FLEET_CODE),
      fishing_mode     = as.character(SCHOOL_TYPE_CODE),
      
      # measure
      measurement_value = EFFORT,
      measurement_unit  = as.character(EFFORT_UNIT_CODE),
      
      # optional label carried along
      fishing_fleet_label = as.character(FLEET)
    ) %>%
    dplyr::select(
      source_authority, gear_type, fishing_fleet, fishing_mode, fishing_fleet_label,
      time_start, time_end, geographic_identifier, gridtype,
      measurement_value, measurement_unit
    ) %>%
    # types normalization (inline)
    dplyr::mutate(
      geographic_identifier = as.character(geographic_identifier),
      time_start = as.character(time_start),
      time_end = as.character(time_end),
      gridtype = as.character(gridtype),
      source_authority = as.character(source_authority),
      gear_type = as.character(gear_type),
      fishing_fleet = as.character(fishing_fleet),
      fishing_mode = as.character(fishing_mode),
      measurement_unit = as.character(measurement_unit)
    ) %>%
    dplyr::mutate(
      fishing_mode = trimws(fishing_mode),
      fishing_mode = ifelse(fishing_mode == "", NA_character_, fishing_mode)
    )
  
  effort$time_start <- as.Date(effort$time_start)
  effort$time_end   <- as.Date(effort$time_end)
  effort$measurement_processing_level <- "unknown"
effort$measurement <- "effort" 
  dataset_temporal_extent <- paste(
    paste0(format(min(effort$time_start, na.rm = TRUE), "%Y"), "-01-01"),
    paste0(format(max(effort$time_end, na.rm = TRUE), "%Y"), "-12-31"),
    sep = "/"
  )
  
  # output in same folder as path_to_raw_dataset 
  output_name_dataset <- here::here('R/tunaatlas_scripts/pre-harmonization', 'iotc', 'effort', 'data', 'EF_RAW_harmonized.csv')
  
  write.csv(effort, output_name_dataset, row.names = FALSE)
georef_dataset <- effort

@ Load pre-harmonization scripts and apply mappings

download.file('https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/R/tunaatlas_scripts/pre-harmonization/map_codelists_no_DB.R', destfile = 'local_map_codelists_no_DB.R')
source('local_map_codelists_no_DB.R')
fact <- "effort"
mapping_codelist <- map_codelists_no_DB(fact, mapping_dataset = "https://raw.githubusercontent.com/fdiwg/fdi-mappings/main/global/firms/gta/codelist_mapping_rfmos_to_global.csv", dataset_to_map = georef_dataset, mapping_keep_src_code = FALSE, summary_mapping = TRUE, source_authority_to_map = c("IATTC", "CCSBT", "WCPFC", "ICCAT", "IOTC"))
## 
##  mapping dimension gear_type with code list mapping
## 
##  mapping dimension fishing_fleet with code list mapping
## 
##  mapping dimension fishing_mode with code list mapping
## 
##  mapping dimension measurement_unit with code list mapping

@ Handle unmapped values and save the results

georef_dataset <- mapping_codelist$dataset_mapped %>% dplyr::mutate(fishing_fleet = ifelse(fishing_fleet == 'UNK', 'NEI', fishing_fleet), gear_type = ifelse(gear_type == 'UNK', '99.9', gear_type))
data.table::fwrite(mapping_codelist$recap_mapping, here::here('R/tunaatlas_scripts/pre-harmonization', 'iotc', 'effort', 'data', 'EF_RAW_recap_mapping.csv'))
data.table::fwrite(mapping_codelist$not_mapped_total, here::here('R/tunaatlas_scripts/pre-harmonization', 'iotc', 'effort', 'data', 'EF_RAW_not_mapped_total.csv'))
data.table::fwrite(georef_dataset, here::here('R/tunaatlas_scripts/pre-harmonization', 'iotc', 'effort', 'data', 'EF_RAW_CWP_dataset.csv'))

Display the first few rows of the mapping summaries

print(head(mapping_codelist$recap_mapping))
## # A tibble: 6 × 5
##   src_code trg_code src_codingsystem trg_codingsystem source_authority
##   <chr>    <chr>    <chr>            <chr>            <chr>           
## 1 DAYS     DAYS     effortunit_iotc  effortunit_rfmos IOTC            
## 2 FDAYS    FDAYS    effortunit_iotc  effortunit_rfmos IOTC            
## 3 FHOURS   FHOURS   effortunit_iotc  effortunit_rfmos IOTC            
## 4 HOURS    HOURS    effortunit_iotc  effortunit_rfmos IOTC            
## 5 HRSRH    HRSRH    effortunit_iotc  effortunit_rfmos IOTC            
## 6 SETS     SETS     effortunit_iotc  effortunit_rfmos IOTC