This R Markdown document is designed to transform data that is not in CWP format into CWP format. Initially, it changes the format of the data; subsequently, it maps the data to adhere to CWP standards. This markdown is automatically created from the function: https://raw.githubusercontent.com/eblondel/geoflow-tunaatlas/master/R/tunaatlas_scripts/pre-harmonization/east_pacific_ocean_nominal_catch_tunaatlasiattc_level0.R, the documentation keeps the format of roxygen2 skeleton.
A summary of the mapping process is provided. The path to the dataset is specified. You will find on this same repository on GitHub the first line of each dataset. The datasets are named after the historical name provided by tRFMOs while exporting and may change. The information provided in the Rmd allows understanding correctly which dataset should be used in this markdown.
Additional operations are performed next to verify other aspects of the data, such as the consistency of the geolocation, the values, and the reported catches in numbers and tons.
If you are interested in further details, the results and codes are available for review.
Each .Rmd script requires the user to knit the
dataset at the beginning of the script in order to execute the
harmonization process correctly. It is also possible to run the code
chunk by chunk but be sure to be in the correct working directory (i.e.,
the one of the .Rmd).
path_to_raw_dataset <- here::here('R/tunaatlas_scripts/pre-harmonization', 'iattc', 'nominal', 'data', 'CatchByFlagGear1918-2024.csv')
Harmonize IATTC Nominal Catch Datasets
This function harmonizes the IATTC nominal catch datasets, preparing them for integration into the Tuna Atlas database, according to specified format requirements.
@return None; the function outputs files directly, including harmonized datasets, optional metadata, and code lists for integration within the Tuna Atlas database.
@details This function modifies the dataset to ensure compliance with the standardized format, including renaming, reordering, and recalculating specific fields as necessary. Metadata integration is contingent on the intended use within the Tuna Atlas database.
@import dplyr @import readr @importFrom stringr str_replace @seealso for converting time format, @export @keywords data harmonization, fisheries, IATTC, tuna @author Paul Taconet, IRD @author Bastien Grasset, IRD
# Input data sample:
# AnoYear BanderaFlag ArteGear EspeciesSpecies t
# 1918 OTR LP SKJ 1361
# 1918 OTR LP YFT 0
# 1919 OTR LP SKJ 3130
# 1919 OTR LP YFT 136
# 1920 OTR LP SKJ 3583
# 1920 OTR LP YFT 907
# Catch: final data sample:
# FishingFleet Gear time_start time_end AreaName School Species CatchType CatchUnits Catch
# BLZ LL 2001-01-01 2002-01-01 IATTC ALL ALB ALL MT 4854
# BLZ LL 2001-01-01 2002-01-01 IATTC ALL BET ALL MT 1987
# BLZ LL 2001-01-01 2002-01-01 IATTC ALL BIL ALL MT 122
# BLZ LL 2001-01-01 2002-01-01 IATTC ALL PBF ALL MT 131
# BLZ LL 2001-01-01 2002-01-01 IATTC ALL SFA ALL MT 93
# BLZ LL 2001-01-01 2002-01-01 IATTC ALL SKH ALL MT 1326
source("https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/R/sardara_functions/format_time_db_format.R")
packages
if(!require(dplyr)){
install.packages("dplyr")
require(dplyr)
}
opts <- options()
options(encoding = "UTF-8")
NC <- read.csv(path_to_raw_dataset, header=TRUE, stringsAsFactors=FALSE, strip.white=TRUE)
colToKeep_NC<-c("AnoYear","BanderaFlag","ArteGear","EspeciesSpecies","t")
NC_harm_IATTC<-NC[,colToKeep_NC]
colnames(NC_harm_IATTC)<-c("Year", "Flag","Gear","Species","Catch")
NC_harm_IATTC$AreaName<-"EPO"
NC_harm_IATTC$AreaCWPgrid<-NA
NC_harm_IATTC$School<-"UNK"
NC_harm_IATTC$CatchType<-"NC" #measurement_type
NC_harm_IATTC$CatchUnits<-"t"
NC_harm_IATTC$RFMO<-"IATTC"
NC_harm_IATTC$Ocean<-"PAC_E"
NC_harm_IATTC$MonthStart<-1
NC_harm_IATTC$Period<-12
Format inputDataset time to have the time format of the DB, which is one column time_start and one time_end
NC_harm_IATTC<-format_time_db_format(NC_harm_IATTC)
NC <- NC_harm_IATTC[NC_harm_IATTC$Catch != 0,]
rm(NC_harm_IATTC)
colnames(NC)[colnames(NC)=="Flag"] <- "FishingFleet"
colToKeep_captures <- c("FishingFleet","Gear","time_start","time_end","AreaName","School","Species","CatchType","CatchUnits","Catch")
NC <-NC[,colToKeep_captures]
remove 0 and NA values
NC <- NC[NC$Catch != 0,]
NC <- NC[!is.na(NC$Catch),]
NC <- aggregate(NC$Catch,
FUN = sum,
by = list(
FishingFleet = NC$FishingFleet,
Gear = NC$Gear,
time_start = NC$time_start,
time_end = NC$time_end,
AreaName = NC$AreaName,
School = NC$School,
Species = NC$Species,
CatchType = NC$CatchType,
CatchUnits = NC$CatchUnits
)
)
colnames(NC)<-c("fishing_fleet","gear_type","time_start","time_end","geographic_identifier","fishing_mode","species","measurement_type","measurement_unit","measurement_value")
NC$source_authority<-"IATTC"
NC$measurement_type<-"NC"
NC$measurement<-"catch"
NC$measurement_processing_level<-"raised"
NC$time_start <- as.Date(NC$time_start)
NC$time_end <- as.Date(NC$time_end)
dataset_temporal_extent <- paste(
paste0(format(min(NC$time_start), "%Y"), "-01-01"),
paste0(format(max(NC$time_end), "%Y"), "-12-31"),
sep = "/"
)
output in same folder as path_to_raw_dataset
output_name_dataset <- here::here('R/tunaatlas_scripts/pre-harmonization', 'iattc', 'nominal', 'data', 'CatchByFlagGear1918-2024_harmonized.csv')
write.csv(NC, output_name_dataset, row.names = FALSE)
georef_dataset <- NC
@ Load pre-harmonization scripts and apply mappings
download.file('https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/R/tunaatlas_scripts/pre-harmonization/map_codelists_no_DB.R', destfile = 'local_map_codelists_no_DB.R')
source('local_map_codelists_no_DB.R')
fact <- "catch"
mapping_codelist <- map_codelists_no_DB(fact, mapping_dataset = "https://raw.githubusercontent.com/fdiwg/fdi-mappings/main/global/firms/gta/codelist_mapping_rfmos_to_global.csv", dataset_to_map = georef_dataset, mapping_keep_src_code = FALSE, summary_mapping = TRUE, source_authority_to_map = c("IATTC", "CCSBT", "WCPFC"))
##
## mapping dimension gear_type with code list mapping
##
## mapping dimension species with code list mapping
##
## mapping dimension fishing_fleet with code list mapping
##
## mapping dimension fishing_mode with code list mapping
@ Handle unmapped values and save the results
georef_dataset <- mapping_codelist$dataset_mapped %>% dplyr::mutate(fishing_fleet = ifelse(fishing_fleet == 'UNK', 'NEI', fishing_fleet), gear_type = ifelse(gear_type == 'UNK', '99.9', gear_type))
data.table::fwrite(mapping_codelist$recap_mapping, here::here('R/tunaatlas_scripts/pre-harmonization', 'iattc', 'nominal', 'data', 'CatchByFlagGear1918-2024_recap_mapping.csv'))
data.table::fwrite(mapping_codelist$not_mapped_total, here::here('R/tunaatlas_scripts/pre-harmonization', 'iattc', 'nominal', 'data', 'CatchByFlagGear1918-2024_not_mapped_total.csv'))
data.table::fwrite(georef_dataset, here::here('R/tunaatlas_scripts/pre-harmonization', 'iattc', 'nominal', 'data', 'CatchByFlagGear1918-2024_CWP_dataset.csv'))
Display the first few rows of the mapping summaries
print(head(mapping_codelist$recap_mapping))
## # A tibble: 6 × 5
## src_code trg_code src_codingsystem trg_codingsystem source_authority
## <chr> <chr> <chr> <chr> <chr>
## 1 CAN CAN flag_iattc fishingfleet_firms IATTC
## 2 CHL CHL flag_iattc fishingfleet_firms IATTC
## 3 COL COL flag_iattc fishingfleet_firms IATTC
## 4 ECU ECU flag_iattc fishingfleet_firms IATTC
## 5 JPN JPN flag_iattc fishingfleet_firms IATTC
## 6 MEX MEX flag_iattc fishingfleet_firms IATTC