## This script is to do the stickiness analysis on Trase's v 2.4 soy data, all the CI and WPi calculations #Here we use the latest Soy Trase dataset made available at 30/01/2020. #However, in the analysis originally conducted at this research article, we used version 2.3. #The code is the same, nevertheless, and the data is slightly different. #This new script is aimed at preparing the new network data for stickiness analysis through Ci, Cm and C, and also extracting #Volumes of soy produced and traded #We aggregate annual trade flows in lines containing municipalities -> exporters (traders) -> countries destination # We correct mistakes in companies' names, merge some well-known subsidiaries with their owners using the Bloomberg private company database: https://www.bloomberg.com/research/stocks/%5Cprivate/snapshot.asp?privcapId=160210 #We prepare annual and aggregated edges list and nodes list. We create unique IDs for each node and each edge ##Note that this was prepared for Brazil's exporting traders between 2003-2017 found in Trase data #For other datasets, please adapt accordingly. #The line where municipalities are aggregated was changed to logistics hubs getwd() setwd("C:/Users/...") #Set UTF-8 encoding # ... this is a good idea on Windows machines, not sure about Macs. options(encoding = "utf8") options(stringsAsFactors = FALSE) # data manipulation and plotting #install.packages("tidyverse") # install this if you don't already have it - you only need to run this line of code once, so delete it after #install.packages("sna") #install.packages("network") #install.packages("miscTools") library(tidyverse) # After the install, then load the package library(network) # After the install, then load the package library(sna) # After the install, then load the package library(dplyr) c <- readRDS("RawData/trase_v24/trase_v24.rds") # reading dataframe for the aggregated dfs later d <- read_delim("RawData/trase_v24/trase_v24.csv", delim = ";") # reading dataframe for the aggregated dfs later (just different ways to read the same thing) MunRef <- readRDS("IntermediateData/NewAnalysesTrase2.3/MunicipalitiesAnalyses/Library_munComplete") # reading dataframe for the aggregated dfs later ##This list of municipalities has to be replaced for Paraguayan departments, for example, to join with other variables ##I recommend using the same codes used by Trase, which should be according to national coding system too #This is just to take out the UNKNOWNs d$`LOGISTICS HUB` <- ifelse(str_detect(d$`LOGISTICS HUB`, "UNKNOWN"), d$MUNICIPALITY, d$`LOGISTICS HUB`) ##Standardizing Traders/Exporters Names because sometimes Cargill appeared with various spellings after "CARGILL", for example c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "CARGILL"), "CARGILL", c$EXPORTER) # mAKING SURE THAT ALL CARGILL NAMES BECOME A SINGLE CARGILL c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "GLENCORE"), "GLENCORE", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "AGROGRAIN"), "AGROGRAIN", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "BUNGE"), "BUNGE", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "AGRENCO"), "AGRENCO", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "DREYFUS"), "LOUIS DREYFUS", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "NETHGRAIN"), "NETHGRAIN", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "NETH"), "NETHGRAIN", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "NETG"), "NETHGRAIN", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "AWB"), "AWB", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "CEVAL"), "CEVAL", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "CONCORDIA"), "CONCORDIA TRADING", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "CROWN MARKETING LIMITED"), "CROWN MARKETING LIMITED", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "DEAF SMITH COUNTRY GRAIN"), "DEAF SMITH COUNTRY GRAIN", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "DUNFOLL"), "DUNFOLL COMPANY", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "EUROSOY"), "EUROSOY", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "GELNCORE"), "GLENCORE", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "GLANCORE"), "GLENCORE", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "GEBANA"), "GEBANA", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "GRANELES"), "GRANELES DE CHILE", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "GS-AGRI"), "GS AGRI EG", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "HOLBUD"), "HOLBUD LTD", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "KUOK OILS AND GRAINS"), "KUOK OILS AND GRAINS LTD", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "LAR T"), "LAR TRADING", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "NATUMY"), "NATUMY AG", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "NOBLE"), "NOBLE", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "NORTHLAND ORGANIC"), "NORTHLAND ORGANIC", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "NOSKEN"), "NOSKEN", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "PROJECTO AGROPECUARIO"), "PROJECTO AGROPECUARIO RIBEIRA JULIAO", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "RIO VERDE"), "RIO VERDE", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "SATWAY COMERCIAL"), "SATWAY COMERCIAL", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "SKRETTING"), "SKRETTING", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "SMEG"), "SMEG", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "SOYA-M"), "SOYA MILLS SA", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "SOYAL LIMITED"), "SOYAL LIMITED", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "SOYCO"), "SOYCO", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "SOYLENT"), "SOYLENT", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "SUNLEY TRADING"), "SUNLEY TRADING", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "VIGANEGO"), "VIGANEGO", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "UNILEVER"), "UNILEVER", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "TRADE HOUSE"), "TRADE HOUSE SODRUGESTIVO", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "A.W.B"), "AWB", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "URUGRAIN"), "URUGRAIN", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "AGROGRAIN"), "ADM", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "MULTIGRAIN"), "MULTIGRAIN", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "URUGRAIN"), "LOUIS DREYFUS", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "ALFRED C"), "ALFRED C. TOEPFER", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "INLOGS"), "INLOGS", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "CEBEC"), "CEFETRA BV", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "IMCOPA"), "IMCOPA", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "INTERGRAIN"), "INTERGRAIN", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "GRANEX"), "GRANEX", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "NIDERA"), "COFCO", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "GAVILON"), "MARUBENI", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "AGREX"), "MITSUBISHI CORPORATION", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "MITSUBISHI CORPORATION"), "MITSUBISHI CORPORATION", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "MITSUI"), "MITSUI", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "COAMO"), "COAMO", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "ALIMPORT"), "ALIMPORT CUBA", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "LARENT"), "LARENT", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "FUENTESAUCO"), "FUENTESAUCO", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "FUENTAUSO"), "FUENTESAUCO", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "NATURALLE"), "ITOCHU", c$EXPORTER) c$EXPORTER <- ifelse(str_detect(c$EXPORTER, "COFCO"), "COFCO", c$EXPORTER) #Creating aggregate table ADF <- d %>% # ... the %>% symbol shows that you are going to do something to the data group_by(YEAR, STATE,`LOGISTICS HUB`, GEOCODE_LH, TRADER, COUNTRY) %>% # For each combination of YEAR, GEOCODE, EXPORTER, COUNTRY summarize(SUM_SOY_TONS = sum(`SOY (TONS)`)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom head(ADF) # Look at the top six rows View(ADF) colnames(ADF) <- c("YEAR","STATE", "MUNICIPALITY", "GEOCODE", "EXPORTER", "COUNTRY", "SUM_SOY_TONS") #So here I rename the columns LOGISTICS_HUB and GEOCODE_1 as MUNICIPALITY and GEOCODE, then #The code will run exactly the same for everything, but it is on LHs (logistics hubs), not for what Trase originally called municipalities. #Also I will keep the information of the state for the unknowns so that I get "unknown from state x" rather then simply "unknown" any(is.na(ADF$EXPORTER)) #This checks if there is still any NAs any(is.na(ADF)) #This checks if there is still any NAs # Testing and fixing different names for same companies ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "CARGILL"), "CARGILL", ADF$EXPORTER) # mAKING SURE THAT ALL CARGILL NAMES BECOME A SINGLE CARGILL ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "GLENCORE"), "GLENCORE", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "AGROGRAIN"), "AGROGRAIN", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "BUNGE"), "BUNGE", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "AGRENCO"), "AGRENCO", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "DREYFUS"), "LOUIS DREYFUS", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "NETHGRAIN"), "NETHGRAIN", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "NETH"), "NETHGRAIN", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "NETG"), "NETHGRAIN", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "AWB"), "AWB", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "CEVAL"), "CEVAL", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "CONCORDIA"), "CONCORDIA TRADING", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "CROWN MARKETING LIMITED"), "CROWN MARKETING LIMITED", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "DEAF SMITH COUNTRY GRAIN"), "DEAF SMITH COUNTRY GRAIN", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "DUNFOLL"), "DUNFOLL COMPANY", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "EUROSOY"), "EUROSOY", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "GELNCORE"), "GLENCORE", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "GLANCORE"), "GLENCORE", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "GEBANA"), "GEBANA", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "GRANELES"), "GRANELES DE CHILE", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "GS-AGRI"), "GS AGRI EG", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "HOLBUD"), "HOLBUD LTD", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "KUOK OILS AND GRAINS"), "KUOK OILS AND GRAINS LTD", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "LAR T"), "LAR TRADING", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "NATUMY"), "NATUMY AG", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "NOBLE"), "NOBLE", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "NORTHLAND ORGANIC"), "NORTHLAND ORGANIC", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "NOSKEN"), "NOSKEN", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "PROJECTO AGROPECUARIO"), "PROJECTO AGROPECUARIO RIBEIRA JULIAO", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "RIO VERDE"), "RIO VERDE", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "SATWAY COMERCIAL"), "SATWAY COMERCIAL", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "SKRETTING"), "SKRETTING", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "SMEG"), "SMEG", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "SOYA-M"), "SOYA MILLS SA", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "SOYAL LIMITED"), "SOYAL LIMITED", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "SOYCO"), "SOYCO", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "SOYLENT"), "SOYLENT", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "SUNLEY TRADING"), "SUNLEY TRADING", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "VIGANEGO"), "VIGANEGO", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "UNILEVER"), "UNILEVER", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "TRADE HOUSE"), "TRADE HOUSE SODRUGESTIVO", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "A.W.B"), "AWB", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "URUGRAIN"), "URUGRAIN", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "AGROGRAIN"), "ADM", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "MULTIGRAIN"), "MULTIGRAIN", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "URUGRAIN"), "LOUIS DREYFUS", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "ALFRED C"), "ALFRED C. TOEPFER", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "INLOGS"), "INLOGS", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "CEBEC"), "CEFETRA BV", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "IMCOPA"), "IMCOPA", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "INTERGRAIN"), "INTERGRAIN", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "GRANEX"), "GRANEX", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "NIDERA"), "COFCO", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "GAVILON"), "MARUBENI", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "AGREX"), "MITSUBISHI CORPORATION", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "MITSUBISHI CORPORATION"), "MITSUBISHI CORPORATION", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "MITSUI"), "MITSUI", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "COAMO"), "COAMO", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "ALIMPORT"), "ALIMPORT CUBA", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "LARENT"), "LARENT", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "FUENTESAUCO"), "FUENTESAUCO", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "FUENTAUSO"), "FUENTESAUCO", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "NATURALLE"), "ITOCHU", ADF$EXPORTER) ADF$EXPORTER <- ifelse(str_detect(ADF$EXPORTER, "COFCO"), "COFCO", ADF$EXPORTER) #Removing this BR from GEOCODES #library(stringr) ADF$GEOCODE <- str_sub(ADF$GEOCODE, start = 4) #great this works ADF$GEOCODE <- ifelse(str_detect(ADF$GEOCODE, "XXXXXXX"), ADF$MUNICIPALITY, ADF$GEOCODE) #Now reaggregagating the ADF dataframe with companies' names fixed ADF2 <- ADF %>% # ... the %>% symbol shows that you are going to do something to the data group_by(YEAR, STATE, MUNICIPALITY, GEOCODE, EXPORTER, COUNTRY) %>% # For each combination of YEAR, GEOCODE, EXPORTER, COUNTRY summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom #The number of rows reduced from 50245 to 50101, meaning that there were not so many wrong or repeated companies' names. Perhaps this reduction was mostly due to merging subsidiaries with owners #I did merging between subsidiaries and matrices and also between old and new companies. # Saving this new corrected and aggregated dataframe # 2.2 Create the aggregate table with only municipality, exporters and soy_tons (weight) - without years, to make the first pair of edges ---- adfMunExp <- ADF2 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE, EXPORTER) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom # 2.3 Create the aggregate table with only EXPORTERS, IMPORTERS and soy_tons (weight) - without years ---- adfExpCou <- ADF2 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER, COUNTRY) %>% # For each combination of GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom #Correcting columns names for igraph later colnames(adfMunExp) <- c("from","to", "weight") colnames(adfExpCou) <- c("from","to", "weight") # binding these two dataframes by row to make the unique edges list for all years UniqueEdges <- rbind(adfMunExp, adfExpCou) # 3) Creating the nodes dataframe with only ID and Label - for static aggregated network without years Nodes <- unique(c(UniqueEdges$from, UniqueEdges$to)) Nodes <- data.frame(cbind(seq(1:length(Nodes)),Nodes)) head(Nodes) colnames(Nodes) <- c("id","label") Nodes$label <- as.character(Nodes$label) Nodes$id <- as.numeric(Nodes$id) View(Nodes) #Transform UniqueEdges in numeric variables with the IDs from the NodesTrase dataframe and so on # It seems more reliable to input an Edges table totally numeric # 4) Transforming EdgesTrase in numeric variables, using join with the Nodes id # Using join # left_join; # inner_join, - keep only variables to do match # anti_join - keep only vqriables thqt dont match joined_df <- inner_join(UniqueEdges, Nodes, # which two df to join by = c("from" = "label")) # specify the cols to match by View(joined_df) joined_df <- inner_join(joined_df, Nodes, # which two df to join by = c("to" = "label") # specify the cols to match by ) head(joined_df) # Check it looks OK dim(joined_df); dim(UniqueEdges) # check the number of rows match any(is.na(joined_df$id.y))# should be false View(joined_df) # rearrange cols UniqueEdges_num <- joined_df %>% select(id.x,id.y, weight) #rename the columns colnames(UniqueEdges_num) <- c("from", "to", "weight") str(UniqueEdges_num) #check the category of columns View(UniqueEdges_num) # 5) Adding the columns "Municipality" and "State" to NodesTrase to have the municipality names as well in the nodes df # Serious problem here with lots of municipalities appearing several times in different states. #I will have to fix this on ADF2 and reaggregate into Library_Mun. But the others are ok because the Geocodes are ok. #Library_munCorrect <- readRDS("IntermediateData/3rdRound/Library_mun") Library_munCorrect <- readRDS("IntermediateData/NewAnalysesTrase2.3/Library_mun") Library_munCorrect$GEOCODE <- as.character(Library_munCorrect$GEOCODE) #Ok, this old library mun is correct, I will use this. It does not matter for the aggregation because this is done on the basis of the GEOCODE anyway and GEOCODE is allright #Library_mun <- readRDS("IntermediateData/NewAnalysesTrase2.3/Library_mun") View(Library_munCorrect) View(Nodes) joining2 <- left_join(Nodes, Library_munCorrect, # which two df to join by = c("label" = "GEOCODE")) # specify the cols to match by dim(joining2); dim(Nodes) # check the number of rows match #Corrected any(is.na(joining2$id))# should be false any(is.na(joining2))# should be false joining2$MUNICIPALITY <- ifelse(is.na(joining2$MUNICIPALITY), joining2$label, joining2$MUNICIPALITY) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY joining2$STATE <- ifelse(is.na(joining2$STATE), joining2$label, joining2$STATE) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY check <- duplicated(joining2, incomparables = FALSE) # checking for duplicates, but there is not any(isTRUE(check))# should be false Nodes_Labels <- joining2 #Here this code finishes. The basics is set #Now I have to create annual edges list, matrices, calculate coefficient of variation on edges list with annual volumes on columns, calculate CIs on matrices and add to Nodes dataframe, etc. d <- ADF2 d2003 <- subset(d, d$YEAR == 2003) adf2003ME <- d2003 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE, EXPORTER) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2003ME) <- c("from","to", "weight") adf2003EC <- d2003 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER, COUNTRY) %>% # For each combination of GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2003EC) <- c("from","to", "weight") Edges2003 <- rbind(adf2003ME, adf2003EC) saveRDS(Edges2003, file = "NewAnalysesTrase2.4/AnnualEdges/Edges2003") # saving in RDS as well to guarantee EdgesN2003 <- inner_join(Edges2003, Nodes, # which two df to join by = c("from" = "label")) # specify the cols to match by EdgesN2003 <- inner_join(EdgesN2003, Nodes, # which two df to join by = c("to" = "label") # specify the cols to match by ) dim(EdgesN2003); dim(Edges2003) # check the number of rows match any(is.na(EdgesN2003$id.y))# should be false EdgesNum2003 <- EdgesN2003 %>% select(id.x,id.y, weight) colnames(EdgesNum2003) <- c("from", "to", "weight") str(EdgesNum2003) #check the category of columns saveRDS(EdgesNum2003, file = "NewAnalysesTrase2.4/AnnualEdges/EdgesNum2003") #----------- d2004 <- subset(d, d$YEAR == 2004) adf2004ME <- d2004 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE, EXPORTER) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2004ME) <- c("from","to", "weight") adf2004EC <- d2004 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER, COUNTRY) %>% # For each combination of GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2004EC) <- c("from","to", "weight") Edges2004 <- rbind(adf2004ME, adf2004EC) saveRDS(Edges2004, file = "NewAnalysesTrase2.4/AnnualEdges/Edges2004") # saving in RDS as well to guarantee EdgesN2004 <- inner_join(Edges2004, Nodes, # which two df to join by = c("from" = "label")) # specify the cols to match by EdgesN2004 <- inner_join(EdgesN2004, Nodes, # which two df to join by = c("to" = "label") # specify the cols to match by ) dim(EdgesN2004); dim(Edges2004) # check the number of rows match any(is.na(EdgesN2004$id.y))# should be false EdgesNum2004 <- EdgesN2004 %>% select(id.x,id.y, weight) colnames(EdgesNum2004) <- c("from", "to", "weight") str(EdgesNum2004) #check the category of columns saveRDS(EdgesNum2004, file = "NewAnalysesTrase2.4/AnnualEdges/EdgesNum2004") #------------ d2005 <- subset(d, d$YEAR == 2005) adf2005ME <- d2005 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE, EXPORTER) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2005ME) <- c("from","to", "weight") adf2005EC <- d2005 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER, COUNTRY) %>% # For each combination of GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2005EC) <- c("from","to", "weight") Edges2005 <- rbind(adf2005ME, adf2005EC) saveRDS(Edges2005, file = "NewAnalysesTrase2.4/AnnualEdges/Edges2005") # saving in RDS as well to guarantee EdgesN2005 <- inner_join(Edges2005, Nodes, # which two df to join by = c("from" = "label")) # specify the cols to match by EdgesN2005 <- inner_join(EdgesN2005, Nodes, # which two df to join by = c("to" = "label") # specify the cols to match by ) dim(EdgesN2005); dim(Edges2005) # check the number of rows match any(is.na(EdgesN2005$id.y))# should be false EdgesNum2005 <- EdgesN2005 %>% select(id.x,id.y, weight) colnames(EdgesNum2005) <- c("from", "to", "weight") str(EdgesNum2005) #check the category of columns saveRDS(EdgesNum2005, file = "NewAnalysesTrase2.4/AnnualEdges/EdgesNum2005") #----------- d2006 <- subset(d, d$YEAR == 2006) adf2006ME <- d2006 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE, EXPORTER) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2006ME) <- c("from","to", "weight") adf2006EC <- d2006 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER, COUNTRY) %>% # For each combination of GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2006EC) <- c("from","to", "weight") Edges2006 <- rbind(adf2006ME, adf2006EC) saveRDS(Edges2006, file = "NewAnalysesTrase2.4/AnnualEdges/Edges2006") # saving in RDS as well to guarantee EdgesN2006 <- inner_join(Edges2006, Nodes, # which two df to join by = c("from" = "label")) # specify the cols to match by EdgesN2006 <- inner_join(EdgesN2006, Nodes, # which two df to join by = c("to" = "label") # specify the cols to match by ) dim(EdgesN2006); dim(Edges2006) # check the number of rows match any(is.na(EdgesN2006$id.y))# should be false EdgesNum2006 <- EdgesN2006 %>% select(id.x,id.y, weight) colnames(EdgesNum2006) <- c("from", "to", "weight") str(EdgesNum2006) #check the category of columns saveRDS(EdgesNum2006, file = "NewAnalysesTrase2.4/AnnualEdges/EdgesNum2006") #------------------ d2007 <- subset(d, d$YEAR == 2007) adf2007ME <- d2007 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE, EXPORTER) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2007ME) <- c("from","to", "weight") adf2007EC <- d2007 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER, COUNTRY) %>% # For each combination of GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2007EC) <- c("from","to", "weight") Edges2007 <- rbind(adf2007ME, adf2007EC) saveRDS(Edges2007, file = "NewAnalysesTrase2.4/AnnualEdges/Edges2007") # saving in RDS as well to guarantee EdgesN2007 <- inner_join(Edges2007, Nodes, # which two df to join by = c("from" = "label")) # specify the cols to match by EdgesN2007 <- inner_join(EdgesN2007, Nodes, # which two df to join by = c("to" = "label") # specify the cols to match by ) dim(EdgesN2007); dim(Edges2007) # check the number of rows match any(is.na(EdgesN2007$id.y))# should be false EdgesNum2007 <- EdgesN2007 %>% select(id.x,id.y, weight) colnames(EdgesNum2007) <- c("from", "to", "weight") str(EdgesNum2007) #check the category of columns saveRDS(EdgesNum2007, file = "NewAnalysesTrase2.4/AnnualEdges/EdgesNum2007") #---------------- d2008 <- subset(d, d$YEAR == 2008) adf2008ME <- d2008 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE, EXPORTER) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2008ME) <- c("from","to", "weight") adf2008EC <- d2008 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER, COUNTRY) %>% # For each combination of GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2008EC) <- c("from","to", "weight") Edges2008 <- rbind(adf2008ME, adf2008EC) saveRDS(Edges2008, file = "NewAnalysesTrase2.4/AnnualEdges/Edges2008") # saving in RDS as well to guarantee EdgesN2008 <- inner_join(Edges2008, Nodes, # which two df to join by = c("from" = "label")) # specify the cols to match by EdgesN2008 <- inner_join(EdgesN2008, Nodes, # which two df to join by = c("to" = "label") # specify the cols to match by ) dim(EdgesN2008); dim(Edges2008) # check the number of rows match any(is.na(EdgesN2008$id.y))# should be false EdgesNum2008 <- EdgesN2008 %>% select(id.x,id.y, weight) colnames(EdgesNum2008) <- c("from", "to", "weight") str(EdgesNum2008) #check the category of columns saveRDS(EdgesNum2008, file = "NewAnalysesTrase2.4/AnnualEdges/EdgesNum2008") #------------- d2009 <- subset(d, d$YEAR == 2009) adf2009ME <- d2009 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE, EXPORTER) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2009ME) <- c("from","to", "weight") adf2009EC <- d2009 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER, COUNTRY) %>% # For each combination of GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2009EC) <- c("from","to", "weight") Edges2009 <- rbind(adf2009ME, adf2009EC) saveRDS(Edges2009, file = "NewAnalysesTrase2.4/AnnualEdges/Edges2009") # saving in RDS as well to guarantee EdgesN2009 <- inner_join(Edges2009, Nodes, # which two df to join by = c("from" = "label")) # specify the cols to match by EdgesN2009 <- inner_join(EdgesN2009, Nodes, # which two df to join by = c("to" = "label") # specify the cols to match by ) dim(EdgesN2009); dim(Edges2009) # check the number of rows match any(is.na(EdgesN2009$id.y))# should be false EdgesNum2009 <- EdgesN2009 %>% select(id.x,id.y, weight) colnames(EdgesNum2009) <- c("from", "to", "weight") str(EdgesNum2009) #check the category of columns saveRDS(EdgesNum2009, file = "NewAnalysesTrase2.4/AnnualEdges/EdgesNum2009") #------------------- d2010 <- subset(d, d$YEAR == 2010) adf2010ME <- d2010 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE, EXPORTER) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2010ME) <- c("from","to", "weight") adf2010EC <- d2010 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER, COUNTRY) %>% # For each combination of GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2010EC) <- c("from","to", "weight") Edges2010 <- rbind(adf2010ME, adf2010EC) saveRDS(Edges2010, file = "NewAnalysesTrase2.4/AnnualEdges/Edges2010") # saving in RDS as well to guarantee EdgesN2010 <- inner_join(Edges2010, Nodes, # which two df to join by = c("from" = "label")) # specify the cols to match by EdgesN2010 <- inner_join(EdgesN2010, Nodes, # which two df to join by = c("to" = "label") # specify the cols to match by ) dim(EdgesN2010); dim(Edges2010) # check the number of rows match any(is.na(EdgesN2010$id.y))# should be false EdgesNum2010 <- EdgesN2010 %>% select(id.x,id.y, weight) colnames(EdgesNum2010) <- c("from", "to", "weight") str(EdgesNum2010) #check the category of columns saveRDS(EdgesNum2010, file = "NewAnalysesTrase2.4/AnnualEdges/EdgesNum2010") #---------------------- d2011 <- subset(d, d$YEAR == 2011) adf2011ME <- d2011 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE, EXPORTER) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2011ME) <- c("from","to", "weight") adf2011EC <- d2011 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER, COUNTRY) %>% # For each combination of GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2011EC) <- c("from","to", "weight") Edges2011 <- rbind(adf2011ME, adf2011EC) saveRDS(Edges2011, file = "NewAnalysesTrase2.4/AnnualEdges/Edges2011") # saving in RDS as well to guarantee EdgesN2011 <- inner_join(Edges2011, Nodes, # which two df to join by = c("from" = "label")) # specify the cols to match by EdgesN2011 <- inner_join(EdgesN2011, Nodes, # which two df to join by = c("to" = "label") # specify the cols to match by ) dim(EdgesN2011); dim(Edges2011) # check the number of rows match any(is.na(EdgesN2011$id.y))# should be false EdgesNum2011 <- EdgesN2011 %>% select(id.x,id.y, weight) colnames(EdgesNum2011) <- c("from", "to", "weight") str(EdgesNum2011) #check the category of columns saveRDS(EdgesNum2011, file = "NewAnalysesTrase2.4/AnnualEdges/EdgesNum2011") #----------- d2012 <- subset(d, d$YEAR == 2012) adf2012ME <- d2012 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE, EXPORTER) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2012ME) <- c("from","to", "weight") adf2012EC <- d2012 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER, COUNTRY) %>% # For each combination of GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2012EC) <- c("from","to", "weight") Edges2012 <- rbind(adf2012ME, adf2012EC) saveRDS(Edges2012, file = "NewAnalysesTrase2.4/AnnualEdges/Edges2012") # saving in RDS as well to guarantee EdgesN2012 <- inner_join(Edges2012, Nodes, # which two df to join by = c("from" = "label")) # specify the cols to match by EdgesN2012 <- inner_join(EdgesN2012, Nodes, # which two df to join by = c("to" = "label") # specify the cols to match by ) dim(EdgesN2012); dim(Edges2012) # check the number of rows match any(is.na(EdgesN2012$id.y))# should be false EdgesNum2012 <- EdgesN2012 %>% select(id.x,id.y, weight) colnames(EdgesNum2012) <- c("from", "to", "weight") str(EdgesNum2012) #check the category of columns saveRDS(EdgesNum2012, file = "NewAnalysesTrase2.4/AnnualEdges/EdgesNum2012") #------------ d2013 <- subset(d, d$YEAR == 2013) adf2013ME <- d2013 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE, EXPORTER) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2013ME) <- c("from","to", "weight") adf2013EC <- d2013 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER, COUNTRY) %>% # For each combination of GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2013EC) <- c("from","to", "weight") Edges2013 <- rbind(adf2013ME, adf2013EC) saveRDS(Edges2013, file = "NewAnalysesTrase2.4/AnnualEdges/Edges2013") # saving in RDS as well to guarantee EdgesN2013 <- inner_join(Edges2013, Nodes, # which two df to join by = c("from" = "label")) # specify the cols to match by EdgesN2013 <- inner_join(EdgesN2013, Nodes, # which two df to join by = c("to" = "label") # specify the cols to match by ) dim(EdgesN2013); dim(Edges2013) # check the number of rows match any(is.na(EdgesN2013$id.y))# should be false EdgesNum2013 <- EdgesN2013 %>% select(id.x,id.y, weight) colnames(EdgesNum2013) <- c("from", "to", "weight") str(EdgesNum2013) #check the category of columns saveRDS(EdgesNum2013, file = "NewAnalysesTrase2.4/AnnualEdges/EdgesNum2013") #--------------- d2014 <- subset(d, d$YEAR == 2014) adf2014ME <- d2014 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE, EXPORTER) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2014ME) <- c("from","to", "weight") adf2014EC <- d2014 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER, COUNTRY) %>% # For each combination of GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2014EC) <- c("from","to", "weight") Edges2014 <- rbind(adf2014ME, adf2014EC) saveRDS(Edges2014, file = "NewAnalysesTrase2.4/AnnualEdges/Edges2014") # saving in RDS as well to guarantee EdgesN2014 <- inner_join(Edges2014, Nodes, # which two df to join by = c("from" = "label")) # specify the cols to match by EdgesN2014 <- inner_join(EdgesN2014, Nodes, # which two df to join by = c("to" = "label") # specify the cols to match by ) dim(EdgesN2014); dim(Edges2014) # check the number of rows match any(is.na(EdgesN2014$id.y))# should be false EdgesNum2014 <- EdgesN2014 %>% select(id.x,id.y, weight) colnames(EdgesNum2014) <- c("from", "to", "weight") str(EdgesNum2014) #check the category of columns saveRDS(EdgesNum2014, file = "NewAnalysesTrase2.4/AnnualEdges/EdgesNum2014") #----------------- d2015 <- subset(d, d$YEAR == 2015) adf2015ME <- d2015 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE, EXPORTER) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2015ME) <- c("from","to", "weight") adf2015EC <- d2015 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER, COUNTRY) %>% # For each combination of GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2015EC) <- c("from","to", "weight") Edges2015 <- rbind(adf2015ME, adf2015EC) saveRDS(Edges2015, file = "NewAnalysesTrase2.4/AnnualEdges/Edges2015") # saving in RDS as well to guarantee EdgesN2015 <- inner_join(Edges2015, Nodes, # which two df to join by = c("from" = "label")) # specify the cols to match by EdgesN2015 <- inner_join(EdgesN2015, Nodes, # which two df to join by = c("to" = "label") # specify the cols to match by ) dim(EdgesN2015); dim(Edges2015) # check the number of rows match any(is.na(EdgesN2015$id.y))# should be false EdgesNum2015 <- EdgesN2015 %>% select(id.x,id.y, weight) colnames(EdgesNum2015) <- c("from", "to", "weight") str(EdgesNum2015) #check the category of columns saveRDS(EdgesNum2015, file = "NewAnalysesTrase2.4/AnnualEdges/EdgesNum2015") #-------------- d2016 <- subset(d, d$YEAR == 2016) adf2016ME <- d2016 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE, EXPORTER) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2016ME) <- c("from","to", "weight") adf2016EC <- d2016 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER, COUNTRY) %>% # For each combination of GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2016EC) <- c("from","to", "weight") Edges2016 <- rbind(adf2016ME, adf2016EC) saveRDS(Edges2016, file = "NewAnalysesTrase2.4/AnnualEdges/Edges2016") # saving in RDS as well to guarantee EdgesN2016 <- inner_join(Edges2016, Nodes, # which two df to join by = c("from" = "label")) # specify the cols to match by EdgesN2016 <- inner_join(EdgesN2016, Nodes, # which two df to join by = c("to" = "label") # specify the cols to match by ) dim(EdgesN2016); dim(Edges2016) # check the number of rows match any(is.na(EdgesN2016$id.y))# should be false EdgesNum2016 <- EdgesN2016 %>% select(id.x,id.y, weight) colnames(EdgesNum2016) <- c("from", "to", "weight") str(EdgesNum2016) #check the category of columns saveRDS(EdgesNum2016, file = "NewAnalysesTrase2.4/AnnualEdges/EdgesNum2016") #-------------- d2017 <- subset(d, d$YEAR == 2017) adf2017ME <- d2017 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE, EXPORTER) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2017ME) <- c("from","to", "weight") adf2017EC <- d2017 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER, COUNTRY) %>% # For each combination of GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2017EC) <- c("from","to", "weight") Edges2017 <- rbind(adf2017ME, adf2017EC) saveRDS(Edges2017, file = "NewAnalysesTrase2.4/AnnualEdges/Edges2017") # saving in RDS as well to guarantee EdgesN2017 <- inner_join(Edges2017, Nodes, # which two df to join by = c("from" = "label")) # specify the cols to match by EdgesN2017 <- inner_join(EdgesN2017, Nodes, # which two df to join by = c("to" = "label") # specify the cols to match by ) dim(EdgesN2017); dim(Edges2017) # check the number of rows match any(is.na(EdgesN2017$id.y))# should be false EdgesNum2017 <- EdgesN2017 %>% select(id.x,id.y, weight) colnames(EdgesNum2017) <- c("from", "to", "weight") str(EdgesNum2017) #check the category of columns saveRDS(EdgesNum2017, file = "NewAnalysesTrase2.4/AnnualEdges/EdgesNum2017") ##saving things that I will need later saveRDS(UniqueEdges_num, "NewAnalysesTrase2.4/UniqueEdges_num") saveRDS(UniqueEdges, "NewAnalysesTrase2.4/UniqueEdges") saveRDS(Nodes, "NewAnalysesTrase2.4/Nodes") saveRDS(Nodes_Labels, "NewAnalysesTrase2.4/Nodes_Labels") saveRDS(ADF2, "NewAnalysesTrase2.4/Trase24_clean") #----------- #Here it finishes, now I will move to creating annual matrices of equal dimensions to calculate Ci - library(igraph) EdgesNum <- readRDS("NewAnalysesTrase2.4/UniqueEdges_num") EdgesRef <- readRDS("NewAnalysesTrase2.4/UniqueEdges") Nodes <- readRDS("NewAnalysesTrase2.4/Nodes") # First I need to join all annual edges lists with the allyears reference edges list #here it would already be important to determine which is the minimum amount of soy traded that #should constitute a true link (e.g. weight>27). I did with 27. edges2003 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2003") Ed2003 <- left_join(EdgesNum, edges2003, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2003 <- Ed2003 %>% select(from, to, weight.y) colnames(Ed2003) <- c("from", "to", "weight") Ed2003$weight[is.na(Ed2003$weight)] <- 0 #here below we clean the NAs Ed2003$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2003$weight2 <- as.numeric(Ed2003$weight2) Ed2003$weight2 <- ifelse(Ed2003$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2003$weight <- NULL colnames(Ed2003) <- c("from", "to", "weight") saveRDS(Ed2003, "NewAnalysesTrase2.4/AnnualEdges/EdgesDim/Ed2003dim") igraph_edges2003 <- graph.data.frame(Ed2003,directed=TRUE) matrix2003 <- as_adjacency_matrix(igraph_edges2003,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2003, "NewAnalysesTrase2.4/AnnualMatrices/matrix2003_dim") #saving the matrix with equalized dimension #this is testing, keep here for future purposes matrix2003["57","523"] #it should be 0 matrix2003["1327","2217"] #it should be 1 #For LHs below matrix2003["1","509"] #it should be 0 #It worked!!! Do for all years now # Continue from here copying and pasting the above formula for all years edges2004 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2004") Ed2004 <- left_join(EdgesNum, edges2004, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2004 <- Ed2004 %>% select(from, to, weight.y) colnames(Ed2004) <- c("from", "to", "weight") Ed2004$weight[is.na(Ed2004$weight)] <- 0 #here below we clean the NAs Ed2004$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2004$weight2 <- as.numeric(Ed2004$weight2) Ed2004$weight2 <- ifelse(Ed2004$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2004$weight <- NULL colnames(Ed2004) <- c("from", "to", "weight") saveRDS(Ed2004, "NewAnalysesTrase2.4/AnnualEdges/EdgesDim/Ed2004dim") igraph_edges2004 <- graph.data.frame(Ed2004,directed=TRUE) matrix2004 <- as_adjacency_matrix(igraph_edges2004,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2004, "NewAnalysesTrase2.4/AnnualMatrices/matrix2004_dim") #saving the matrix with equalized dimension #---------------------- edges2005 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2005") Ed2005 <- left_join(EdgesNum, edges2005, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2005 <- Ed2005 %>% select(from, to, weight.y) colnames(Ed2005) <- c("from", "to", "weight") Ed2005$weight[is.na(Ed2005$weight)] <- 0 #here below we clean the NAs Ed2005$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2005$weight2 <- as.numeric(Ed2005$weight2) Ed2005$weight2 <- ifelse(Ed2005$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2005$weight <- NULL colnames(Ed2005) <- c("from", "to", "weight") saveRDS(Ed2005, "NewAnalysesTrase2.4/AnnualEdges/EdgesDim/Ed2005dim") igraph_edges2005 <- graph.data.frame(Ed2005,directed=TRUE) matrix2005 <- as_adjacency_matrix(igraph_edges2005,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2005, "NewAnalysesTrase2.4/AnnualMatrices/matrix2005_dim") #saving the matrix with equalized dimension #♣------------------ edges2006 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2006") Ed2006 <- left_join(EdgesNum, edges2006, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2006 <- Ed2006 %>% select(from, to, weight.y) colnames(Ed2006) <- c("from", "to", "weight") Ed2006$weight[is.na(Ed2006$weight)] <- 0 #here below we clean the NAs Ed2006$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2006$weight2 <- as.numeric(Ed2006$weight2) Ed2006$weight2 <- ifelse(Ed2006$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2006$weight <- NULL colnames(Ed2006) <- c("from", "to", "weight") saveRDS(Ed2006, "NewAnalysesTrase2.4/AnnualEdges/EdgesDim/Ed2006dim") igraph_edges2006 <- graph.data.frame(Ed2006,directed=TRUE) matrix2006 <- as_adjacency_matrix(igraph_edges2006,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2006, "NewAnalysesTrase2.4/AnnualMatrices/matrix2006_dim") #saving the matrix with equalized dimension #--------------------------------------- edges2007 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2007") Ed2007 <- left_join(EdgesNum, edges2007, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2007 <- Ed2007 %>% select(from, to, weight.y) colnames(Ed2007) <- c("from", "to", "weight") Ed2007$weight[is.na(Ed2007$weight)] <- 0 #here below we clean the NAs Ed2007$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2007$weight2 <- as.numeric(Ed2007$weight2) Ed2007$weight2 <- ifelse(Ed2007$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2007$weight <- NULL colnames(Ed2007) <- c("from", "to", "weight") saveRDS(Ed2007, "NewAnalysesTrase2.4/AnnualEdges/EdgesDim/Ed2007dim") igraph_edges2007 <- graph.data.frame(Ed2007,directed=TRUE) matrix2007 <- as_adjacency_matrix(igraph_edges2007,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2007, "NewAnalysesTrase2.4/AnnualMatrices/matrix2007_dim") #saving the matrix with equalized dimension #-------------- edges2008 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2008") Ed2008 <- left_join(EdgesNum, edges2008, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2008 <- Ed2008 %>% select(from, to, weight.y) colnames(Ed2008) <- c("from", "to", "weight") Ed2008$weight[is.na(Ed2008$weight)] <- 0 #here below we clean the NAs Ed2008$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2008$weight2 <- as.numeric(Ed2008$weight2) Ed2008$weight2 <- ifelse(Ed2008$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2008$weight <- NULL colnames(Ed2008) <- c("from", "to", "weight") saveRDS(Ed2008, "NewAnalysesTrase2.4/AnnualEdges/EdgesDim/Ed2008dim") igraph_edges2008 <- graph.data.frame(Ed2008,directed=TRUE) matrix2008 <- as_adjacency_matrix(igraph_edges2008,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2008, "NewAnalysesTrase2.4/AnnualMatrices/matrix2008_dim") #saving the matrix with equalized dimension #--------------- edges2009 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2009") Ed2009 <- left_join(EdgesNum, edges2009, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2009 <- Ed2009 %>% select(from, to, weight.y) colnames(Ed2009) <- c("from", "to", "weight") Ed2009$weight[is.na(Ed2009$weight)] <- 0 #here below we clean the NAs Ed2009$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2009$weight2 <- as.numeric(Ed2009$weight2) Ed2009$weight2 <- ifelse(Ed2009$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2009$weight <- NULL colnames(Ed2009) <- c("from", "to", "weight") saveRDS(Ed2009, "NewAnalysesTrase2.4/AnnualEdges/EdgesDim/Ed2009dim") igraph_edges2009 <- graph.data.frame(Ed2009,directed=TRUE) matrix2009 <- as_adjacency_matrix(igraph_edges2009,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2009, "NewAnalysesTrase2.4/AnnualMatrices/matrix2009_dim") #saving the matrix with equalized dimension #----------------- edges2010 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2010") Ed2010 <- left_join(EdgesNum, edges2010, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2010 <- Ed2010 %>% select(from, to, weight.y) colnames(Ed2010) <- c("from", "to", "weight") Ed2010$weight[is.na(Ed2010$weight)] <- 0 #here below we clean the NAs Ed2010$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2010$weight2 <- as.numeric(Ed2010$weight2) Ed2010$weight2 <- ifelse(Ed2010$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2010$weight <- NULL colnames(Ed2010) <- c("from", "to", "weight") saveRDS(Ed2010, "NewAnalysesTrase2.4/AnnualEdges/EdgesDim/Ed2010dim") igraph_edges2010 <- graph.data.frame(Ed2010,directed=TRUE) matrix2010 <- as_adjacency_matrix(igraph_edges2010,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2010, "NewAnalysesTrase2.4/AnnualMatrices/matrix2010_dim") #saving the matrix with equalized dimension #☺-------------- edges2011 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2011") Ed2011 <- left_join(EdgesNum, edges2011, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2011 <- Ed2011 %>% select(from, to, weight.y) colnames(Ed2011) <- c("from", "to", "weight") Ed2011$weight[is.na(Ed2011$weight)] <- 0 #here below we clean the NAs Ed2011$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2011$weight2 <- as.numeric(Ed2011$weight2) Ed2011$weight2 <- ifelse(Ed2011$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2011$weight <- NULL colnames(Ed2011) <- c("from", "to", "weight") saveRDS(Ed2011, "NewAnalysesTrase2.4/AnnualEdges/EdgesDim/Ed2011dim") igraph_edges2011 <- graph.data.frame(Ed2011,directed=TRUE) matrix2011 <- as_adjacency_matrix(igraph_edges2011,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2011, "NewAnalysesTrase2.4/AnnualMatrices/matrix2011_dim") #saving the matrix with equalized dimension #----------------- edges2012 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2012") Ed2012 <- left_join(EdgesNum, edges2012, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2012 <- Ed2012 %>% select(from, to, weight.y) colnames(Ed2012) <- c("from", "to", "weight") Ed2012$weight[is.na(Ed2012$weight)] <- 0 #here below we clean the NAs Ed2012$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2012$weight2 <- as.numeric(Ed2012$weight2) Ed2012$weight2 <- ifelse(Ed2012$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2012$weight <- NULL colnames(Ed2012) <- c("from", "to", "weight") saveRDS(Ed2012, "NewAnalysesTrase2.4/AnnualEdges/EdgesDim/Ed2012dim") igraph_edges2012 <- graph.data.frame(Ed2012,directed=TRUE) matrix2012 <- as_adjacency_matrix(igraph_edges2012,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2012, "NewAnalysesTrase2.4/AnnualMatrices/matrix2012_dim") #saving the matrix with equalized dimension #------------- edges2013 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2013") Ed2013 <- left_join(EdgesNum, edges2013, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2013 <- Ed2013 %>% select(from, to, weight.y) colnames(Ed2013) <- c("from", "to", "weight") Ed2013$weight[is.na(Ed2013$weight)] <- 0 #here below we clean the NAs Ed2013$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2013$weight2 <- as.numeric(Ed2013$weight2) Ed2013$weight2 <- ifelse(Ed2013$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2013$weight <- NULL colnames(Ed2013) <- c("from", "to", "weight") saveRDS(Ed2013, "NewAnalysesTrase2.4/AnnualEdges/EdgesDim/Ed2013dim") igraph_edges2013 <- graph.data.frame(Ed2013,directed=TRUE) matrix2013 <- as_adjacency_matrix(igraph_edges2013,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2013, "NewAnalysesTrase2.4/AnnualMatrices/matrix2013_dim") #saving the matrix with equalized dimension #------------- edges2014 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2014") Ed2014 <- left_join(EdgesNum, edges2014, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2014 <- Ed2014 %>% select(from, to, weight.y) colnames(Ed2014) <- c("from", "to", "weight") Ed2014$weight[is.na(Ed2014$weight)] <- 0 #here below we clean the NAs Ed2014$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2014$weight2 <- as.numeric(Ed2014$weight2) Ed2014$weight2 <- ifelse(Ed2014$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2014$weight <- NULL colnames(Ed2014) <- c("from", "to", "weight") saveRDS(Ed2014, "NewAnalysesTrase2.4/AnnualEdges/EdgesDim/Ed2014dim") igraph_edges2014 <- graph.data.frame(Ed2014,directed=TRUE) matrix2014 <- as_adjacency_matrix(igraph_edges2014,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2014, "NewAnalysesTrase2.4/AnnualMatrices/matrix2014_dim") #saving the matrix with equalized dimension #--------------- edges2015 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2015") Ed2015 <- left_join(EdgesNum, edges2015, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2015 <- Ed2015 %>% select(from, to, weight.y) colnames(Ed2015) <- c("from", "to", "weight") Ed2015$weight[is.na(Ed2015$weight)] <- 0 #here below we clean the NAs Ed2015$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2015$weight2 <- as.numeric(Ed2015$weight2) Ed2015$weight2 <- ifelse(Ed2015$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2015$weight <- NULL colnames(Ed2015) <- c("from", "to", "weight") saveRDS(Ed2015, "NewAnalysesTrase2.4/AnnualEdges/EdgesDim/Ed2015dim") igraph_edges2015 <- graph.data.frame(Ed2015,directed=TRUE) matrix2015 <- as_adjacency_matrix(igraph_edges2015,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2015, "NewAnalysesTrase2.4/AnnualMatrices/matrix2015_dim") #saving the matrix with equalized dimension #------------ edges2016 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2016") Ed2016 <- left_join(EdgesNum, edges2016, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2016 <- Ed2016 %>% select(from, to, weight.y) colnames(Ed2016) <- c("from", "to", "weight") Ed2016$weight[is.na(Ed2016$weight)] <- 0 #here below we clean the NAs Ed2016$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2016$weight2 <- as.numeric(Ed2016$weight2) Ed2016$weight2 <- ifelse(Ed2016$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2016$weight <- NULL colnames(Ed2016) <- c("from", "to", "weight") saveRDS(Ed2016, "NewAnalysesTrase2.4/AnnualEdges/EdgesDim/Ed2016dim") igraph_edges2016 <- graph.data.frame(Ed2016,directed=TRUE) matrix2016 <- as_adjacency_matrix(igraph_edges2016,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2016, "NewAnalysesTrase2.4/AnnualMatrices/matrix2016_dim") #saving the matrix with equalized dimension #-------------- edges2017 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2017") Ed2017 <- left_join(EdgesNum, edges2017, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2017 <- Ed2017 %>% select(from, to, weight.y) colnames(Ed2017) <- c("from", "to", "weight") Ed2017$weight[is.na(Ed2017$weight)] <- 0 #here below we clean the NAs Ed2017$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2017$weight2 <- as.numeric(Ed2017$weight2) Ed2017$weight2 <- ifelse(Ed2017$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2017$weight <- NULL colnames(Ed2017) <- c("from", "to", "weight") saveRDS(Ed2017, "NewAnalysesTrase2.4/AnnualEdges/EdgesDim/Ed2017dim") igraph_edges2017 <- graph.data.frame(Ed2017,directed=TRUE) matrix2017 <- as_adjacency_matrix(igraph_edges2017,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2017, "NewAnalysesTrase2.4/AnnualMatrices/matrix2017_dim") #saving the matrix with equalized dimension #Now I create a reference matrix for all unique edges #-------- Reference now UniqueEdges <- readRDS("NewAnalysesTrase2.4/UniqueEdges_num") UniqueEdges$weight2 <- 0 UniqueEdges$weight2 <- as.numeric(UniqueEdges$weight2) UniqueEdges$weight2 <- ifelse(UniqueEdges$weight>0, 1, 0) UniqueEdges$weight <- NULL colnames(UniqueEdges) <- c("from", "to", "weight") UniqueEdges$weight <- as.numeric(UniqueEdges$weight) saveRDS(UniqueEdges, "NewAnalysesTrase2.4/AnnualEdges/EdgesDim/UniqueEdgesdim") igraph_UniqueEdges <- graph.data.frame(UniqueEdges,directed=TRUE) matrixUniqueEdges <- as_adjacency_matrix(igraph_UniqueEdges,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrixUniqueEdges, "NewAnalysesTrase2.4/AnnualMatrices/MatrixUniqueEdges") #saving the matrix with equalized dimension #This part is to create an unique edges list only with municipalities -> countries to conduct the same edges analyses options(encoding = "utf8") options(stringsAsFactors = FALSE) library(tidyverse) library(igraph) d <- readRDS("NewAnalysesTrase2.4/Trase24_clean") # reading dataframe for the aggregated dfs later ADF <- d %>% # ... the %>% symbol shows that you are going to do something to the data group_by(YEAR, GEOCODE, COUNTRY) %>% # For each combination of YEAR, GEOCODE, EXPORTER, COUNTRY summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom ADF2 <- d %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE, COUNTRY) %>% # For each combination of YEAR, GEOCODE, EXPORTER, COUNTRY summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom ADF$GEOCODE <- as.character(ADF$GEOCODE) ADF$GEOCODE <- ifelse(is.na(ADF$GEOCODE), ADF$MUNICIPALITY, ADF$GEOCODE) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY ADF2$GEOCODE <- as.character(ADF2$GEOCODE) ADF2$GEOCODE <- ifelse(is.na(ADF2$GEOCODE), ADF2$MUNICIPALITY, ADF2$GEOCODE) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY #fixing columns names for igraph later colnames(ADF) <- c("year", "from","to", "weight") colnames(ADF2) <- c("from","to", "weight") saveRDS(ADF,"NewAnalysesTrase2.4/MunCountry_EdAnalyses/MunCountry_Years") saveRDS(ADF2,"NewAnalysesTrase2.4/MunCountry_EdAnalyses/MunCountry_UnEdges") #Transforming the two edges lists in numeric according to Nodes IDs to facilitate later operations Nodes <- readRDS("NewAnalysesTrase2.4/Nodes") joined_df <- inner_join(ADF, Nodes, # which two df to join by = c("from" = "label")) # specify the cols to match by View(joined_df) joined_df <- inner_join(joined_df, Nodes, # which two df to join by = c("to" = "label") # specify the cols to match by ) head(joined_df) # Check it looks OK dim(joined_df); dim(ADF) # check the number of rows match any(is.na(joined_df$id.y))# should be false View(joined_df) # rearrange cols MCYear <- joined_df %>% select(year, id.x,id.y, weight) #rename the columns colnames(MCYear) <- c("year", "from", "to", "weight") str(MCYear) #check the category of columns View(MCYear) saveRDS(MCYear,"NewAnalysesTrase2.4/MunCountry_EdAnalyses/MunCountry_YearsNum") #same thing for MCUniqueEdges joined_df2 <- inner_join(ADF2, Nodes, # which two df to join by = c("from" = "label")) # specify the cols to match by View(joined_df2) joined_df2 <- inner_join(joined_df2, Nodes, # which two df to join by = c("to" = "label") # specify the cols to match by ) head(joined_df2) # Check it looks OK dim(joined_df2); dim(ADF2) # check the number of rows match any(is.na(joined_df2$id.y))# should be false View(joined_df2) # rearrange cols MCUnEd <- joined_df2 %>% select(id.x,id.y, weight) #rename the columns colnames(MCUnEd) <- c("from", "to", "weight") str(MCUnEd) #check the category of columns View(MCUnEd) saveRDS(MCUnEd,"NewAnalysesTrase2.4/MunCountry_EdAnalyses/MunCountry_UnEdgesNum") # Creating one unique ID for each unique nodes pairs MCUnEd$id <- seq.int(nrow(MCUnEd)) # Saving this reference edges list with the unique ID saveRDS(MCUnEd, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/MCUnEdNumID") #Now joining this unique ID with the MCYear to later count the frequency of each edge MCYearID <- inner_join(MCYear, MCUnEd, # which two df to join by = c("from", "to" = "from", "to")) MCYearID <- MCYearID %>% select(year, from, to, weight.x) colnames(MCYearID) <- c("year", "from", "to", "weight") saveRDS(MCYearID, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/MCYearsNum") # Subsetting annual edges list to join each year's weight to MCUnEd d2003 <- subset(MCYearID, MCYearID$year == 2003) Ed2003 <- d2003 %>% select(from, to, weight) saveRDS(Ed2003, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2003") # saving in RDS as well to guarantee #--------------- d2004 <- subset(MCYearID, MCYearID$year == 2004) Ed2004 <- d2004 %>% select(from, to, weight) saveRDS(Ed2004, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2004") # saving in RDS as well to guarantee #------------------ d2005 <- subset(MCYearID, MCYearID$year == 2005) Ed2005 <- d2005 %>% select(from, to, weight) saveRDS(Ed2005, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2005") # saving in RDS as well to guarantee #------------------- d2006 <- subset(MCYearID, MCYearID$year == 2006) Ed2006 <- d2006 %>% select(from, to, weight) saveRDS(Ed2006, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2006") # saving in RDS as well to guarantee #----------------- d2007 <- subset(MCYearID, MCYearID$year == 2007) Ed2007 <- d2007 %>% select(from, to, weight) saveRDS(Ed2007, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2007") # saving in RDS as well to guarantee #------------- d2008 <- subset(MCYearID, MCYearID$year == 2008) Ed2008 <- d2008 %>% select(from, to, weight) saveRDS(Ed2008, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2008") # saving in RDS as well to guarantee #------------------ d2009 <- subset(MCYearID, MCYearID$year == 2009) Ed2009 <- d2009 %>% select(from, to, weight) saveRDS(Ed2009, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2009") # saving in RDS as well to guarantee #----------------- d2010 <- subset(MCYearID, MCYearID$year == 2010) Ed2010 <- d2010 %>% select(from, to, weight) saveRDS(Ed2010, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2010") # saving in RDS as well to guarantee #-------------------- d2011 <- subset(MCYearID, MCYearID$year == 2011) Ed2011 <- d2011 %>% select(from, to, weight) saveRDS(Ed2011, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2011") # saving in RDS as well to guarantee #------------- d2012 <- subset(MCYearID, MCYearID$year == 2012) Ed2012 <- d2012 %>% select(from, to, weight) saveRDS(Ed2012, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2012") # saving in RDS as well to guarantee #------------ d2013 <- subset(MCYearID, MCYearID$year == 2013) Ed2013 <- d2013 %>% select(from, to, weight) saveRDS(Ed2013, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2013") # saving in RDS as well to guarantee #------------- d2014 <- subset(MCYearID, MCYearID$year == 2014) Ed2014 <- d2014 %>% select(from, to, weight) saveRDS(Ed2014, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2014") # saving in RDS as well to guarantee #--------------- d2015 <- subset(MCYearID, MCYearID$year == 2015) Ed2015 <- d2015 %>% select(from, to, weight) saveRDS(Ed2015, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2015") # saving in RDS as well to guarantee #------------- d2016 <- subset(MCYearID, MCYearID$year == 2016) Ed2016 <- d2016 %>% select(from, to, weight) saveRDS(Ed2016, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2016") # saving in RDS as well to guarantee #-------------- d2017 <- subset(MCYearID, MCYearID$year == 2017) Ed2017 <- d2017 %>% select(from, to, weight) saveRDS(Ed2017, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2017") # saving in RDS as well to guarantee #------------------ #Now joining the into the MCUnEd, but before I change the weights <27 tons to zero and subset them out #And after I clean the NAs Ed2003$weight <- ifelse(Ed2003$weight>=27, Ed2003$weight, 0) #here we select and maintain only the values above 27 tons Ed2003 <- subset(Ed2003, weight>0) Ed2004$weight <- ifelse(Ed2004$weight>=27, Ed2004$weight, 0) #here we select and maintain only the values above 27 tons Ed2004 <- subset(Ed2004, weight>0) Ed2005$weight <- ifelse(Ed2005$weight>=27, Ed2005$weight, 0) #here we select and maintain only the values above 27 tons Ed2005 <- subset(Ed2005, weight>0) Ed2006$weight <- ifelse(Ed2006$weight>=27, Ed2006$weight, 0) #here we select and maintain only the values above 27 tons Ed2006 <- subset(Ed2006, weight>0) Ed2007$weight <- ifelse(Ed2007$weight>=27, Ed2007$weight, 0) #here we select and maintain only the values above 27 tons Ed2007 <- subset(Ed2007, weight>0) Ed2008$weight <- ifelse(Ed2008$weight>=27, Ed2008$weight, 0) #here we select and maintain only the values above 27 tons Ed2008 <- subset(Ed2008, weight>0) Ed2009$weight <- ifelse(Ed2009$weight>=27, Ed2009$weight, 0) #here we select and maintain only the values above 27 tons Ed2009 <- subset(Ed2009, weight>0) Ed2010$weight <- ifelse(Ed2010$weight>=27, Ed2010$weight, 0) #here we select and maintain only the values above 27 tons Ed2010 <- subset(Ed2010, weight>0) Ed2011$weight <- ifelse(Ed2011$weight>=27, Ed2011$weight, 0) #here we select and maintain only the values above 27 tons Ed2011 <- subset(Ed2011, weight>0) Ed2012$weight <- ifelse(Ed2012$weight>=27, Ed2012$weight, 0) #here we select and maintain only the values above 27 tons Ed2012 <- subset(Ed2012, weight>0) Ed2013$weight <- ifelse(Ed2013$weight>=27, Ed2013$weight, 0) #here we select and maintain only the values above 27 tons Ed2013 <- subset(Ed2013, weight>0) Ed2014$weight <- ifelse(Ed2014$weight>=27, Ed2014$weight, 0) #here we select and maintain only the values above 27 tons Ed2014 <- subset(Ed2014, weight>0) Ed2015$weight <- ifelse(Ed2015$weight>=27, Ed2015$weight, 0) #here we select and maintain only the values above 27 tons Ed2015 <- subset(Ed2015, weight>0) Ed2016$weight <- ifelse(Ed2016$weight>=27, Ed2016$weight, 0) #here we select and maintain only the values above 27 tons Ed2016 <- subset(Ed2016, weight>0) Ed2017$weight <- ifelse(Ed2017$weight>=27, Ed2017$weight, 0) #here we select and maintain only the values above 27 tons Ed2017 <- subset(Ed2017, weight>0) join <- left_join(MCUnEd, Ed2003, # which two df to join by = c("from", "to" = "from", "to")) # specify the cols to match by join <- left_join(join, Ed2004, # which two df to join by = c("from", "to" = "from", "to")) # specify the cols to match by join <- left_join(join, Ed2005, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by join <- left_join(join, Ed2006, # which two df to join by = c("from","to" = "from","to")) # specify the col join <- left_join(join, Ed2007, # which two df to join by = c("from","to" = "from","to")) # specify the col join <- left_join(join, Ed2008, by = c("from","to" = "from","to")) join <- left_join(join, Ed2009, by = c("from","to" = "from","to")) join <- left_join(join, Ed2010, by = c("from","to" = "from","to")) join <- left_join(join, Ed2011, by = c("from","to" = "from","to")) join <- left_join(join, Ed2012, by = c("from","to" = "from","to")) join <- left_join(join, Ed2013, by = c("from","to" = "from","to")) join <- left_join(join, Ed2014, by = c("from","to" = "from","to")) join <- left_join(join, Ed2015, by = c("from","to" = "from","to")) join <- left_join(join, Ed2016, by = c("from","to" = "from","to")) join <- left_join(join, Ed2017, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by colnames(join) <- c("from", "to", "total_weight", "id", "2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017") #Cleaning NAs is.na(join) <- sapply(join, is.infinite) join[is.na(join)] <- 0 saveRDS(join, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/MCEdAnalyses") saveRDS(join, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/MCEdAnalysesGreater0") # Calculating SD with R function just to compare the results join$SD <- apply(join[,c(5:19)], 1, FUN=sd) join$mean <- apply(join[,c(5:19)], 1, FUN=mean) join$cv <- join$SD/join$mean #Counting the frequency now MCYear <- subset(MCYear, weight>=27) MCYearID <- inner_join(MCYear, MCUnEd, # which two df to join by = c("from", "to" = "from", "to")) MCYearID <- MCYearID %>% select(year, from, to, weight.x, id) colnames(MCYearID) <- c("year", "from", "to", "weight", "id") saveRDS(MCYearID, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/MCYearsNumID") count <- count(MCYearID, vars = MCYearID$id) colnames(count) <- c("id", "frequency") join <- left_join(join, count, # which two df to join by = c("id" = "id")) #Cleaning NAs is.na(join) <- sapply(join, is.infinite) join[is.na(join)] <- 0 saveRDS(join, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/MCEdAnalyses_all") test <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/MCEdAnalyses") saveRDS(test, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/MCEdAnalyses_original") MCEd_all <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/MCEdAnalyses_all") MCEd_Greater0 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/MCEdAnalysesGreater0") MCEd_original <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/MCEdAnalyses_original") MCEd_Greater02 <- subset(MCEd_all, frequency>0) saveRDS(MCEd_Greater02, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/MCEdAnalysesGreater0") MCEd_Greater03 <- MCEd_Greater02 MCEd_Greater03$SD <- apply(MCEd_Greater03[,c(5:19)], 1, FUN=sd) MCEd_Greater03$mean <- apply(MCEd_Greater03[,c(5:19)], 1, FUN=mean) MCEd_Greater03$cv <- MCEd_Greater03$SD/MCEd_Greater03$mean #Now putting municipalities labels and states MCEdAnalyses <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/MCEdAnalysesGreater0") LibraryMun <- readRDS("IntermediateData/NewAnalysesTrase2.3/Library_mun") MCEdAnalyses_orig <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/MCEdAnalyses_original") NodesLabels <- readRDS("NewAnalysesTrase2.4/Nodes_Labels") MCEdAnalyses_Labels <- left_join(MCEdAnalyses, NodesLabels, by = c("from" = "id")) MCEdAnalyses_Labels <- left_join(MCEdAnalyses_Labels, NodesLabels, by = c("to" = "id")) #fixing column names MCEdAnalyses_Labels$labelfrom <- MCEdAnalyses_Labels$label.x MCEdAnalyses_Labels$labelto <- MCEdAnalyses_Labels$label.y MCEdAnalyses_Labels$label.x <- NULL MCEdAnalyses_Labels$label.y <- NULL MCEdAnalyses_Labels <- left_join(MCEdAnalyses_Labels, LibraryMun, by = c("labelfrom" = "GEOCODE")) MCEdAnalyses_Labels$MUNICIPALITY <- ifelse(is.na(MCEdAnalyses_Labels$MUNICIPALITY), MCEdAnalyses_Labels$labelfrom, MCEdAnalyses_Labels$MUNICIPALITY) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY MCEdAnalyses_Labels$STATE <- ifelse(is.na(MCEdAnalyses_Labels$STATE), MCEdAnalyses_Labels$labelfrom, MCEdAnalyses_Labels$STATE) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY saveRDS(MCEdAnalyses_Labels, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/MCEdAnalyses_Labels") #Here it finishes the creation of MunCountry_EdAnalysis #Now I will create matrices for the relationships between municipalities and countries to calculate (MunCountry_EdAnalysis) #Cis Mun -> Countries EdgesNum <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/MunCountry_UnEdgesNum") edges2003 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2003") Ed2003 <- left_join(EdgesNum, edges2003, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2003 <- Ed2003 %>% select(from, to, weight.y) colnames(Ed2003) <- c("from", "to", "weight") Ed2003$weight[is.na(Ed2003$weight)] <- 0 #here below we clean the NAs Ed2003$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2003$weight2 <- as.numeric(Ed2003$weight2) Ed2003$weight2 <- ifelse(Ed2003$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2003$weight <- NULL colnames(Ed2003) <- c("from", "to", "weight") saveRDS(Ed2003, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDim/Ed2003dim") igraph_edges2003 <- graph.data.frame(Ed2003,directed=TRUE) matrix2003 <- as_adjacency_matrix(igraph_edges2003,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2003, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2003_dim") #saving the matrix with equalized dimension #this is testing, keep here for future purposes matrix2003["2","2216"] #it should be 1 matrix2003["21","2217"] #it should be 0 #Test ok - go on #Test on LHs matrix2003["2","2294"] #it should be 0 #Test ok - go on edges2004 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2004") Ed2004 <- left_join(EdgesNum, edges2004, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2004 <- Ed2004 %>% select(from, to, weight.y) colnames(Ed2004) <- c("from", "to", "weight") Ed2004$weight[is.na(Ed2004$weight)] <- 0 #here below we clean the NAs Ed2004$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2004$weight2 <- as.numeric(Ed2004$weight2) Ed2004$weight2 <- ifelse(Ed2004$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2004$weight <- NULL colnames(Ed2004) <- c("from", "to", "weight") saveRDS(Ed2004, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDim/Ed2004dim") igraph_edges2004 <- graph.data.frame(Ed2004,directed=TRUE) matrix2004 <- as_adjacency_matrix(igraph_edges2004,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2004, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2004_dim") #saving the matrix with equalized dimension #---------------------- edges2005 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2005") Ed2005 <- left_join(EdgesNum, edges2005, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2005 <- Ed2005 %>% select(from, to, weight.y) colnames(Ed2005) <- c("from", "to", "weight") Ed2005$weight[is.na(Ed2005$weight)] <- 0 #here below we clean the NAs Ed2005$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2005$weight2 <- as.numeric(Ed2005$weight2) Ed2005$weight2 <- ifelse(Ed2005$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2005$weight <- NULL colnames(Ed2005) <- c("from", "to", "weight") saveRDS(Ed2005, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDim/Ed2005dim") igraph_edges2005 <- graph.data.frame(Ed2005,directed=TRUE) matrix2005 <- as_adjacency_matrix(igraph_edges2005,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2005, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2005_dim") #saving the matrix with equalized dimension #♣------------------ edges2006 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2006") Ed2006 <- left_join(EdgesNum, edges2006, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2006 <- Ed2006 %>% select(from, to, weight.y) colnames(Ed2006) <- c("from", "to", "weight") Ed2006$weight[is.na(Ed2006$weight)] <- 0 #here below we clean the NAs Ed2006$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2006$weight2 <- as.numeric(Ed2006$weight2) Ed2006$weight2 <- ifelse(Ed2006$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2006$weight <- NULL colnames(Ed2006) <- c("from", "to", "weight") saveRDS(Ed2006, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDim/Ed2006dim") igraph_edges2006 <- graph.data.frame(Ed2006,directed=TRUE) matrix2006 <- as_adjacency_matrix(igraph_edges2006,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2006, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2006_dim") #saving the matrix with equalized dimension #--------------------------------------- edges2007 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2007") Ed2007 <- left_join(EdgesNum, edges2007, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2007 <- Ed2007 %>% select(from, to, weight.y) colnames(Ed2007) <- c("from", "to", "weight") Ed2007$weight[is.na(Ed2007$weight)] <- 0 #here below we clean the NAs Ed2007$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2007$weight2 <- as.numeric(Ed2007$weight2) Ed2007$weight2 <- ifelse(Ed2007$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2007$weight <- NULL colnames(Ed2007) <- c("from", "to", "weight") saveRDS(Ed2007, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDim/Ed2007dim") igraph_edges2007 <- graph.data.frame(Ed2007,directed=TRUE) matrix2007 <- as_adjacency_matrix(igraph_edges2007,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2007, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2007_dim") #saving the matrix with equalized dimension #-------------- edges2008 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2008") Ed2008 <- left_join(EdgesNum, edges2008, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2008 <- Ed2008 %>% select(from, to, weight.y) colnames(Ed2008) <- c("from", "to", "weight") Ed2008$weight[is.na(Ed2008$weight)] <- 0 #here below we clean the NAs Ed2008$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2008$weight2 <- as.numeric(Ed2008$weight2) Ed2008$weight2 <- ifelse(Ed2008$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2008$weight <- NULL colnames(Ed2008) <- c("from", "to", "weight") saveRDS(Ed2008, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDim/Ed2008dim") igraph_edges2008 <- graph.data.frame(Ed2008,directed=TRUE) matrix2008 <- as_adjacency_matrix(igraph_edges2008,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2008, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2008_dim") #saving the matrix with equalized dimension #--------------- edges2009 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2009") Ed2009 <- left_join(EdgesNum, edges2009, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2009 <- Ed2009 %>% select(from, to, weight.y) colnames(Ed2009) <- c("from", "to", "weight") Ed2009$weight[is.na(Ed2009$weight)] <- 0 #here below we clean the NAs Ed2009$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2009$weight2 <- as.numeric(Ed2009$weight2) Ed2009$weight2 <- ifelse(Ed2009$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2009$weight <- NULL colnames(Ed2009) <- c("from", "to", "weight") saveRDS(Ed2009, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDim/Ed2009dim") igraph_edges2009 <- graph.data.frame(Ed2009,directed=TRUE) matrix2009 <- as_adjacency_matrix(igraph_edges2009,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2009, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2009_dim") #saving the matrix with equalized dimension #----------------- edges2010 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2010") Ed2010 <- left_join(EdgesNum, edges2010, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2010 <- Ed2010 %>% select(from, to, weight.y) colnames(Ed2010) <- c("from", "to", "weight") Ed2010$weight[is.na(Ed2010$weight)] <- 0 #here below we clean the NAs Ed2010$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2010$weight2 <- as.numeric(Ed2010$weight2) Ed2010$weight2 <- ifelse(Ed2010$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2010$weight <- NULL colnames(Ed2010) <- c("from", "to", "weight") saveRDS(Ed2010, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDim/Ed2010dim") igraph_edges2010 <- graph.data.frame(Ed2010,directed=TRUE) matrix2010 <- as_adjacency_matrix(igraph_edges2010,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2010, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2010_dim") #saving the matrix with equalized dimension #☺-------------- edges2011 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2011") Ed2011 <- left_join(EdgesNum, edges2011, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2011 <- Ed2011 %>% select(from, to, weight.y) colnames(Ed2011) <- c("from", "to", "weight") Ed2011$weight[is.na(Ed2011$weight)] <- 0 #here below we clean the NAs Ed2011$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2011$weight2 <- as.numeric(Ed2011$weight2) Ed2011$weight2 <- ifelse(Ed2011$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2011$weight <- NULL colnames(Ed2011) <- c("from", "to", "weight") saveRDS(Ed2011, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDim/Ed2011dim") igraph_edges2011 <- graph.data.frame(Ed2011,directed=TRUE) matrix2011 <- as_adjacency_matrix(igraph_edges2011,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2011, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2011_dim") #saving the matrix with equalized dimension #----------------- edges2012 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2012") Ed2012 <- left_join(EdgesNum, edges2012, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2012 <- Ed2012 %>% select(from, to, weight.y) colnames(Ed2012) <- c("from", "to", "weight") Ed2012$weight[is.na(Ed2012$weight)] <- 0 #here below we clean the NAs Ed2012$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2012$weight2 <- as.numeric(Ed2012$weight2) Ed2012$weight2 <- ifelse(Ed2012$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2012$weight <- NULL colnames(Ed2012) <- c("from", "to", "weight") saveRDS(Ed2012, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDim/Ed2012dim") igraph_edges2012 <- graph.data.frame(Ed2012,directed=TRUE) matrix2012 <- as_adjacency_matrix(igraph_edges2012,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2012, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2012_dim") #saving the matrix with equalized dimension #------------- edges2013 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2013") Ed2013 <- left_join(EdgesNum, edges2013, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2013 <- Ed2013 %>% select(from, to, weight.y) colnames(Ed2013) <- c("from", "to", "weight") Ed2013$weight[is.na(Ed2013$weight)] <- 0 #here below we clean the NAs Ed2013$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2013$weight2 <- as.numeric(Ed2013$weight2) Ed2013$weight2 <- ifelse(Ed2013$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2013$weight <- NULL colnames(Ed2013) <- c("from", "to", "weight") saveRDS(Ed2013, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDim/Ed2013dim") igraph_edges2013 <- graph.data.frame(Ed2013,directed=TRUE) matrix2013 <- as_adjacency_matrix(igraph_edges2013,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2013, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2013_dim") #saving the matrix with equalized dimension #------------- edges2014 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2014") Ed2014 <- left_join(EdgesNum, edges2014, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2014 <- Ed2014 %>% select(from, to, weight.y) colnames(Ed2014) <- c("from", "to", "weight") Ed2014$weight[is.na(Ed2014$weight)] <- 0 #here below we clean the NAs Ed2014$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2014$weight2 <- as.numeric(Ed2014$weight2) Ed2014$weight2 <- ifelse(Ed2014$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2014$weight <- NULL colnames(Ed2014) <- c("from", "to", "weight") saveRDS(Ed2014, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDim/Ed2014dim") igraph_edges2014 <- graph.data.frame(Ed2014,directed=TRUE) matrix2014 <- as_adjacency_matrix(igraph_edges2014,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2014, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2014_dim") #saving the matrix with equalized dimension #--------------- edges2015 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2015") Ed2015 <- left_join(EdgesNum, edges2015, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2015 <- Ed2015 %>% select(from, to, weight.y) colnames(Ed2015) <- c("from", "to", "weight") Ed2015$weight[is.na(Ed2015$weight)] <- 0 #here below we clean the NAs Ed2015$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2015$weight2 <- as.numeric(Ed2015$weight2) Ed2015$weight2 <- ifelse(Ed2015$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2015$weight <- NULL colnames(Ed2015) <- c("from", "to", "weight") saveRDS(Ed2015, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDim/Ed2015dim") igraph_edges2015 <- graph.data.frame(Ed2015,directed=TRUE) matrix2015 <- as_adjacency_matrix(igraph_edges2015,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2015, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2015_dim") #saving the matrix with equalized dimension #------------ edges2016 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2016") Ed2016 <- left_join(EdgesNum, edges2016, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2016 <- Ed2016 %>% select(from, to, weight.y) colnames(Ed2016) <- c("from", "to", "weight") Ed2016$weight[is.na(Ed2016$weight)] <- 0 #here below we clean the NAs Ed2016$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2016$weight2 <- as.numeric(Ed2016$weight2) Ed2016$weight2 <- ifelse(Ed2016$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2016$weight <- NULL colnames(Ed2016) <- c("from", "to", "weight") saveRDS(Ed2016, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDim/Ed2016dim") igraph_edges2016 <- graph.data.frame(Ed2016,directed=TRUE) matrix2016 <- as_adjacency_matrix(igraph_edges2016,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2016, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2016_dim") #saving the matrix with equalized dimension #-------------- edges2017 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/Ed2017") Ed2017 <- left_join(EdgesNum, edges2017, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2017 <- Ed2017 %>% select(from, to, weight.y) colnames(Ed2017) <- c("from", "to", "weight") Ed2017$weight[is.na(Ed2017$weight)] <- 0 #here below we clean the NAs Ed2017$weight2 <- 0 #here we create a new field 'weight' to convert from volumes to binary values (0 or 1) Ed2017$weight2 <- as.numeric(Ed2017$weight2) Ed2017$weight2 <- ifelse(Ed2017$weight>27, 1, 0) #here we do the conversion and below we exclude the old weights and rename the columns Ed2017$weight <- NULL colnames(Ed2017) <- c("from", "to", "weight") saveRDS(Ed2017, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDim/Ed2017dim") igraph_edges2017 <- graph.data.frame(Ed2017,directed=TRUE) matrix2017 <- as_adjacency_matrix(igraph_edges2017,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2017, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2017_dim") #saving the matrix with equalized dimension Ed2017 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDim/Ed2017dim") #Now I create a reference matrix for all unique edges #-------- Reference now UniqueEdges <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/MunCountry_UnEdgesNum") UniqueEdges$weight2 <- 0 UniqueEdges$weight2 <- as.numeric(UniqueEdges$weight2) UniqueEdges$weight2 <- ifelse(UniqueEdges$weight>0, 1, 0) UniqueEdges$weight <- NULL colnames(UniqueEdges) <- c("from", "to", "weight") UniqueEdges$weight <- as.numeric(UniqueEdges$weight) saveRDS(UniqueEdges, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDim/UniqueEdgesdim") UniqueEdges <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDim/UniqueEdgesdim") igraph_UniqueEdges <- graph.data.frame(UniqueEdges,directed=TRUE) matrixUniqueEdges <- as_adjacency_matrix(igraph_UniqueEdges,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrixUniqueEdges, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatrixUniqueEdges") #saving the matrix with equalized dimension #Here this finishes preparation for MunCountry Analyses. Next steps: Creating weighted edges list for WPi and calculating CIs on binary matrices #This code is to prepare an edges list with unique edges, total volume flown in all years and all annual volumes # Putting values in columns to calculate the standard deviation, mean and coefficient of variation options(encoding = "utf8") options(stringsAsFactors = FALSE) library(tidyverse) library(igraph) EdgesNum <- readRDS("NewAnalysesTrase2.4/UniqueEdges_num") #First I need to prepare annual edges list with same size of rows or dimension (n rows = 24442) and actual weights #Here I can also already filter edges to have valid weights only if it is above 27 tons edges2003 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2003") Ed2003 <- left_join(EdgesNum, edges2003, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2003 <- Ed2003 %>% select(from, to, weight.y) colnames(Ed2003) <- c("from", "to", "weight") Ed2003$weight[is.na(Ed2003$weight)] <- 0 #here below we clean the NAs Ed2003$weight <- ifelse(Ed2003$weight>=27, Ed2003$weight, 0) #here we select and maintain only the values above 27 tons #it works! Let's save this saveRDS(Ed2003, "NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2003dimWei") #Here I don't need to transform this edges list with same dimension of reference and weights in matrix #Because I'm not calculating Ci on weighted values #But If I want or need to, it's just to activate the code below #igraph_edges2003 <- graph.data.frame(Ed2003,directed=TRUE) #matrix2003 <- as_adjacency_matrix(igraph_edges2003,names=TRUE,sparse=FALSE,attr="weight") #saveRDS(matrix2003, "NewAnalysesTrase2.4/AnnualMatrices/matrix2003_dimWei") #saving the matrix with equalized dimension #--------------- edges2004 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2004") Ed2004 <- left_join(EdgesNum, edges2004, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2004 <- Ed2004 %>% select(from, to, weight.y) colnames(Ed2004) <- c("from", "to", "weight") Ed2004$weight[is.na(Ed2004$weight)] <- 0 #here below we clean the NAs Ed2004$weight <- ifelse(Ed2004$weight>=27, Ed2004$weight, 0) #here we select and maintain only the values above 27 tons #it works! Let's save this saveRDS(Ed2004, "NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2004dimWei") #---------------------- edges2005 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2005") Ed2005 <- left_join(EdgesNum, edges2005, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2005 <- Ed2005 %>% select(from, to, weight.y) colnames(Ed2005) <- c("from", "to", "weight") Ed2005$weight[is.na(Ed2005$weight)] <- 0 #here below we clean the NAs Ed2005$weight <- ifelse(Ed2005$weight>=27, Ed2005$weight, 0) #here we select and maintain only the values above 27 tons #it works! Let's save this saveRDS(Ed2005, "NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2005dimWei") #------------------ edges2006 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2006") Ed2006 <- left_join(EdgesNum, edges2006, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2006 <- Ed2006 %>% select(from, to, weight.y) colnames(Ed2006) <- c("from", "to", "weight") Ed2006$weight[is.na(Ed2006$weight)] <- 0 #here below we clean the NAs Ed2006$weight <- ifelse(Ed2006$weight>=27, Ed2006$weight, 0) #here we select and maintain only the values above 27 tons #it works! Let's save this saveRDS(Ed2006, "NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2006dimWei") #--------------------------------------- edges2007 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2007") Ed2007 <- left_join(EdgesNum, edges2007, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2007 <- Ed2007 %>% select(from, to, weight.y) colnames(Ed2007) <- c("from", "to", "weight") Ed2007$weight[is.na(Ed2007$weight)] <- 0 #here below we clean the NAs Ed2007$weight <- ifelse(Ed2007$weight>=27, Ed2007$weight, 0) #here we select and maintain only the values above 27 tons #it works! Let's save this saveRDS(Ed2007, "NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2007dimWei") #-------------- edges2008 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2008") Ed2008 <- left_join(EdgesNum, edges2008, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2008 <- Ed2008 %>% select(from, to, weight.y) colnames(Ed2008) <- c("from", "to", "weight") Ed2008$weight[is.na(Ed2008$weight)] <- 0 #here below we clean the NAs Ed2008$weight <- ifelse(Ed2008$weight>=27, Ed2008$weight, 0) #here we select and maintain only the values above 27 tons #it works! Let's save this saveRDS(Ed2008, "NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2008dimWei") #--------------- edges2009 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2009") Ed2009 <- left_join(EdgesNum, edges2009, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2009 <- Ed2009 %>% select(from, to, weight.y) colnames(Ed2009) <- c("from", "to", "weight") Ed2009$weight[is.na(Ed2009$weight)] <- 0 #here below we clean the NAs Ed2009$weight <- ifelse(Ed2009$weight>=27, Ed2009$weight, 0) #here we select and maintain only the values above 27 tons #it works! Let's save this saveRDS(Ed2009, "NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2009dimWei") #----------------- edges2010 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2010") Ed2010 <- left_join(EdgesNum, edges2010, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2010 <- Ed2010 %>% select(from, to, weight.y) colnames(Ed2010) <- c("from", "to", "weight") Ed2010$weight[is.na(Ed2010$weight)] <- 0 #here below we clean the NAs Ed2010$weight <- ifelse(Ed2010$weight>=27, Ed2010$weight, 0) #here we select and maintain only the values above 27 tons #it works! Let's save this saveRDS(Ed2010, "NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2010dimWei") #-------------- edges2011 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2011") Ed2011 <- left_join(EdgesNum, edges2011, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2011 <- Ed2011 %>% select(from, to, weight.y) colnames(Ed2011) <- c("from", "to", "weight") Ed2011$weight[is.na(Ed2011$weight)] <- 0 #here below we clean the NAs Ed2011$weight <- ifelse(Ed2011$weight>=27, Ed2011$weight, 0) #here we select and maintain only the values above 27 tons #it works! Let's save this saveRDS(Ed2011, "NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2011dimWei") #----------------- edges2012 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2012") Ed2012 <- left_join(EdgesNum, edges2012, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2012 <- Ed2012 %>% select(from, to, weight.y) colnames(Ed2012) <- c("from", "to", "weight") Ed2012$weight[is.na(Ed2012$weight)] <- 0 #here below we clean the NAs Ed2012$weight <- ifelse(Ed2012$weight>=27, Ed2012$weight, 0) #here we select and maintain only the values above 27 tons #it works! Let's save this saveRDS(Ed2012, "NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2012dimWei") #------------- edges2013 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2013") Ed2013 <- left_join(EdgesNum, edges2013, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2013 <- Ed2013 %>% select(from, to, weight.y) colnames(Ed2013) <- c("from", "to", "weight") Ed2013$weight[is.na(Ed2013$weight)] <- 0 #here below we clean the NAs Ed2013$weight <- ifelse(Ed2013$weight>=27, Ed2013$weight, 0) #here we select and maintain only the values above 27 tons #it works! Let's save this saveRDS(Ed2013, "NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2013dimWei") #------------- edges2014 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2014") Ed2014 <- left_join(EdgesNum, edges2014, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2014 <- Ed2014 %>% select(from, to, weight.y) colnames(Ed2014) <- c("from", "to", "weight") Ed2014$weight[is.na(Ed2014$weight)] <- 0 #here below we clean the NAs Ed2014$weight <- ifelse(Ed2014$weight>=27, Ed2014$weight, 0) #here we select and maintain only the values above 27 tons #it works! Let's save this saveRDS(Ed2014, "NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2014dimWei") #--------------- edges2015 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2015") Ed2015 <- left_join(EdgesNum, edges2015, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2015 <- Ed2015 %>% select(from, to, weight.y) colnames(Ed2015) <- c("from", "to", "weight") Ed2015$weight[is.na(Ed2015$weight)] <- 0 #here below we clean the NAs Ed2015$weight <- ifelse(Ed2015$weight>=27, Ed2015$weight, 0) #here we select and maintain only the values above 27 tons #it works! Let's save this saveRDS(Ed2015, "NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2015dimWei") #------------ edges2016 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2016") Ed2016 <- left_join(EdgesNum, edges2016, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2016 <- Ed2016 %>% select(from, to, weight.y) colnames(Ed2016) <- c("from", "to", "weight") Ed2016$weight[is.na(Ed2016$weight)] <- 0 #here below we clean the NAs Ed2016$weight <- ifelse(Ed2016$weight>=27, Ed2016$weight, 0) #here we select and maintain only the values above 27 tons #it works! Let's save this saveRDS(Ed2016, "NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2016dimWei") #-------------- edges2017 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesNum2017") Ed2017 <- left_join(EdgesNum, edges2017, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by Ed2017 <- Ed2017 %>% select(from, to, weight.y) colnames(Ed2017) <- c("from", "to", "weight") Ed2017$weight[is.na(Ed2017$weight)] <- 0 #here below we clean the NAs Ed2017$weight <- ifelse(Ed2017$weight>=27, Ed2017$weight, 0) #here we select and maintain only the values above 27 tons #it works! Let's save this saveRDS(Ed2017, "NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2017dimWei") #Now I create a reference unique edges list for all unique edges and taking out the edges with less than 27 tons in aggregate total volume for the whole period (2003-2017) #-------- Reference now UniqueEdges <- readRDS("NewAnalysesTrase2.4/UniqueEdges_num") UniqueEdges$weight <- ifelse(UniqueEdges$weight>=27, UniqueEdges$weight, 0) #here we select and maintain only the values above or equal to 27 tons saveRDS(UniqueEdges, "NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/UniqueEdgesDimWei") test1 <- subset(EdgesNum, weight>=27) test2 <- subset(UniqueEdges, weight>=27) #Test is ok, on EdgesNum I have the original values. On UniqueEdges I have all weight values below 27 as 0. # Move on to preparing the dataframe with annual weights as columns #----------- UniqueEdgesDimWei <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/UniqueEdgesDimWei") Ed2003 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2003dimWei") Ed2004 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2004dimWei") Ed2005 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2005dimWei") Ed2006 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2006dimWei") Ed2007 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2007dimWei") Ed2008 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2008dimWei") Ed2009 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2009dimWei") Ed2010 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2010dimWei") Ed2011 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2011dimWei") Ed2012 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2012dimWei") Ed2013 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2013dimWei") Ed2014 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2014dimWei") Ed2015 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2015dimWei") Ed2016 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2016dimWei") Ed2017 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2017dimWei") # Creating one unique ID for each unique nodes pairs UniqueEdgesDimWei$id <- seq.int(nrow(UniqueEdgesDimWei)) # Saving this reference edges list with the unique ID saveRDS(UniqueEdgesDimWei, "NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/UniqueEdgesDimWei_id") #Now joining them join <- inner_join(UniqueEdgesDimWei, Ed2003, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by join <- inner_join(join, Ed2004, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by join <- inner_join(join, Ed2005, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by join <- inner_join(join, Ed2006, # which two df to join by = c("from","to" = "from","to")) # specify the col join <- inner_join(join, Ed2007, # which two df to join by = c("from","to" = "from","to")) # specify the col join <- inner_join(join, Ed2008, by = c("from","to" = "from","to")) join <- inner_join(join, Ed2009, by = c("from","to" = "from","to")) join <- inner_join(join, Ed2010, by = c("from","to" = "from","to")) join <- inner_join(join, Ed2011, by = c("from","to" = "from","to")) join <- inner_join(join, Ed2012, by = c("from","to" = "from","to")) join <- inner_join(join, Ed2013, by = c("from","to" = "from","to")) join <- inner_join(join, Ed2014, by = c("from","to" = "from","to")) join <- inner_join(join, Ed2015, by = c("from","to" = "from","to")) join <- inner_join(join, Ed2016, by = c("from","to" = "from","to")) join <- inner_join(join, Ed2017, # which two df to join by = c("from","to" = "from","to")) # specify the cols to match by colnames(join) <- c("from", "to", "total_weight", "id", "2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017") # testes de tonto join$test_total <- NULL join$test_total <- (join$`2003`+join$`2004`+ join$`2005`+join$`2006`+join$`2007`+join$`2008`+join$`2009`+join$`2010`+join$`2011`+ join$`2012`+join$`2013`+join$`2014`+join$`2015`+join$`2016`+ join$`2017`) join$test_total2 <- join$total_weight-(join$`2003`+join$`2004`+ join$`2005`+join$`2006`+join$`2007`+join$`2008`+join$`2009`+join$`2010`+join$`2011`+ join$`2012`+join$`2013`+join$`2014`+join$`2015`+join$`2016`+ join$`2017`) join$test_total <- NULL join$test_total2 <- NULL saveRDS(join, "NewAnalysesTrase2.4/EdgesAnalyses/EdgesAnalyses") join$avrg <- join$total_weight/15 EdgesAnalyses <- join # Calculating SD with R function just to compare the results EdgesAnalyses$SD <- apply(EdgesAnalyses[,c(5:19)], 1, FUN=sd) EdgesAnalyses$mean <- apply(EdgesAnalyses[,c(5:19)], 1, FUN=mean) EdgesAnalyses$cv <- EdgesAnalyses$SD/EdgesAnalyses$mean #saving this before next step saveRDS(join, "NewAnalysesTrase2.4/EdgesAnalyses/EdgesAnalyses") #Calculating the frequency of annual values > 0 AllyearsRowBound <- rbind(Ed2003, Ed2004, Ed2005, Ed2006, Ed2007, Ed2008, Ed2009, Ed2010, Ed2011, Ed2012, Ed2013, Ed2014, Ed2015, Ed2016, Ed2017) AllyearsRowBoundGreater0 <- subset(AllyearsRowBound, weight>0) UnEdgesID <- select(EdgesAnalyses, from, to, id) joinID <- inner_join(AllyearsRowBoundGreater0, UnEdgesID, # which two df to join by = c("from","to" = "from","to")) count <- count(joinID, vars = joinID$id) colnames(count) <- c("id", "frequency") EdgesAnalyses <- left_join(EdgesAnalyses, count, # which two df to join by = c("id" = "id")) #Cleaning NAs on frequency and NaNs on CV is.na(EdgesAnalyses) <- sapply(EdgesAnalyses, is.infinite) EdgesAnalyses[is.na(EdgesAnalyses)] <- 0 saveRDS(EdgesAnalyses, "NewAnalysesTrase2.4/EdgesAnalyses/EdgesAnalyses") EdgesAnalyses2 <- subset(EdgesAnalyses, frequency>0) saveRDS(EdgesAnalyses2, "NewAnalysesTrase2.4/EdgesAnalyses/EdgesAnalyses_FreqGreater0") NodesLabels <- readRDS("NewAnalysesTrase2.4/Nodes") EdAnalyses_labels <- left_join(EdgesAnalyses, NodesLabels, by = c("from" = "id")) EdAnalyses_labels <- left_join(EdAnalyses_labels, NodesLabels, by = c("to" = "id")) #fixing column names EdAnalyses_labels$labelfrom <- EdAnalyses_labels$label.x EdAnalyses_labels$labelto <- EdAnalyses_labels$label.y EdAnalyses_labels$label.x <- NULL EdAnalyses_labels$label.y <- NULL saveRDS(EdAnalyses_labels, "NewAnalysesTrase2.4/EdgesAnalyses/EdgesAnalyses_labels") EdAnalyses_labels2 <- subset(EdAnalyses_labels, frequency>0) saveRDS(EdAnalyses_labels2, "NewAnalysesTrase2.4/EdgesAnalyses/EdgesAnalyses_labelsFreqGreater0") #Adding municipalities names LibraryMun <- readRDS("IntermediateData/NewAnalysesTrase2.3/Library_mun") EdAnalyses_labels3 <- left_join(EdAnalyses_labels2, LibraryMun, by = c("labelfrom" = "GEOCODE")) EdAnalyses_labels3$MUNICIPALITY <- ifelse(is.na(EdAnalyses_labels3$MUNICIPALITY), EdAnalyses_labels3$labelfrom, EdAnalyses_labels3$MUNICIPALITY) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY EdAnalyses_labels3$STATE <- ifelse(is.na(EdAnalyses_labels3$STATE), EdAnalyses_labels3$labelfrom, EdAnalyses_labels3$STATE) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY #saving the third labelled version saveRDS(EdAnalyses_labels3, "NewAnalysesTrase2.4/EdgesAnalyses/EdgesAnalyses_labelsMunFreqGreat0") EdAnalyses_labels4 <- left_join(EdAnalyses_labels, LibraryMun, by = c("labelfrom" = "GEOCODE")) EdAnalyses_labels4$MUNICIPALITY <- ifelse(is.na(EdAnalyses_labels4$MUNICIPALITY), EdAnalyses_labels4$labelfrom, EdAnalyses_labels4$MUNICIPALITY) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY EdAnalyses_labels4$STATE <- ifelse(is.na(EdAnalyses_labels4$STATE), EdAnalyses_labels4$labelfrom, EdAnalyses_labels4$STATE) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY saveRDS(EdAnalyses_labels4, "NewAnalysesTrase2.4/EdgesAnalyses/EdgesAnalyses_labelsMun") EdAnalyses <- readRDS("NewAnalysesTrase2.4/EdgesAnalyses/EdgesAnalyses_labelsMun") #Here it is done! #Next step is analysing edges between municipalities and countries directly #I probably need to start all over again, do the same calculations for Unique Edges, but only for #Mun -> Countries, then I can do Rbind here or analyse separetely. #Later I can move to plotting maps, studying panel data analysis and things like this EdAnalyses2 <- subset(EdAnalyses, frequency>0) saveRDS(EdAnalyses2, "NewAnalysesTrase2.4/EdgesAnalyses/EdgesAnalyses_labelsMun_SubsetFreqLarger0") #Ok this part is done #This script is to calculate the WPi (weighted persistence), according to Patrick's formula: #Absolute changes in flows of i Node in two snapshots/ total volume of i Node in the two snapshots #This is first calculated at Logistics Hubs options(encoding = "utf8") options(stringsAsFactors = FALSE) library(tidyverse) library(igraph) #The first step is retrieving the original annual edges lists with real weights (volumes of soy traded) #Because I will then prepare matrices with the actual weights instead of 0 or 1. Ed2003 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2003dimWei") igraph_edges2003 <- graph.data.frame(Ed2003,directed=TRUE) matrix2003 <- as_adjacency_matrix(igraph_edges2003,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2003, "NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2003_dimWei") #saving the matrix with equalized dimension Ed2004 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2004dimWei") igraph_edges2004 <- graph.data.frame(Ed2004,directed=TRUE) matrix2004 <- as_adjacency_matrix(igraph_edges2004,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2004, "NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2004_dimWei") #saving the matrix with equalized dimension Ed2005 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2005dimWei") igraph_edges2005 <- graph.data.frame(Ed2005,directed=TRUE) matrix2005 <- as_adjacency_matrix(igraph_edges2005,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2005, "NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2005_dimWei") #saving the matrix with equalized dimension Ed2006 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2006dimWei") igraph_edges2006 <- graph.data.frame(Ed2006,directed=TRUE) matrix2006 <- as_adjacency_matrix(igraph_edges2006,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2006, "NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2006_dimWei") #saving the matrix with equalized dimension Ed2007 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2007dimWei") igraph_edges2007 <- graph.data.frame(Ed2007,directed=TRUE) matrix2007 <- as_adjacency_matrix(igraph_edges2007,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2007, "NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2007_dimWei") #saving the matrix with equalized dimension Ed2008 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2008dimWei") igraph_edges2008 <- graph.data.frame(Ed2008,directed=TRUE) matrix2008 <- as_adjacency_matrix(igraph_edges2008,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2008, "NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2008_dimWei") #saving the matrix with equalized dimension Ed2009 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2009dimWei") igraph_edges2009 <- graph.data.frame(Ed2009,directed=TRUE) matrix2009 <- as_adjacency_matrix(igraph_edges2009,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2009, "NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2009_dimWei") #saving the matrix with equalized dimension Ed2010 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2010dimWei") igraph_edges2010 <- graph.data.frame(Ed2010,directed=TRUE) matrix2010 <- as_adjacency_matrix(igraph_edges2010,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2010, "NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2010_dimWei") #saving the matrix with equalized dimension Ed2011 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2011dimWei") igraph_edges2011 <- graph.data.frame(Ed2011,directed=TRUE) matrix2011 <- as_adjacency_matrix(igraph_edges2011,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2011, "NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2011_dimWei") #saving the matrix with equalized dimension Ed2012 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2012dimWei") igraph_edges2012 <- graph.data.frame(Ed2012,directed=TRUE) matrix2012 <- as_adjacency_matrix(igraph_edges2012,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2012, "NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2012_dimWei") #saving the matrix with equalized dimension Ed2013 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2013dimWei") igraph_edges2013 <- graph.data.frame(Ed2013,directed=TRUE) matrix2013 <- as_adjacency_matrix(igraph_edges2013,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2013, "NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2013_dimWei") #saving the matrix with equalized dimension Ed2014 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2014dimWei") igraph_edges2014 <- graph.data.frame(Ed2014,directed=TRUE) matrix2014 <- as_adjacency_matrix(igraph_edges2014,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2014, "NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2014_dimWei") #saving the matrix with equalized dimension Ed2015 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2015dimWei") igraph_edges2015 <- graph.data.frame(Ed2015,directed=TRUE) matrix2015 <- as_adjacency_matrix(igraph_edges2015,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2015, "NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2015_dimWei") #saving the matrix with equalized dimension Ed2016 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2016dimWei") igraph_edges2016 <- graph.data.frame(Ed2016,directed=TRUE) matrix2016 <- as_adjacency_matrix(igraph_edges2016,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2016, "NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2016_dimWei") #saving the matrix with equalized dimension Ed2017 <- readRDS("NewAnalysesTrase2.4/AnnualEdges/EdgesDimWei/Ed2017dimWei") igraph_edges2017 <- graph.data.frame(Ed2017,directed=TRUE) matrix2017 <- as_adjacency_matrix(igraph_edges2017,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2017, "NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2017_dimWei") #saving the matrix with equalized dimension #this is testing, keep here for future purposes matrix2007["9","509"] #it should be 111734.431 matrix2004["940","2241"] #it should be 0.0000 matrix2010["1697","2217"] #aqui tem que ser 1006.5952 #tests ok, let's go #Now moving to calculate Wpi matrix2003 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2003_dimWei") matrix2004 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2004_dimWei") matrix2005 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2005_dimWei") matrix2006 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2006_dimWei") matrix2007 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2007_dimWei") matrix2008 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2008_dimWei") matrix2009 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2009_dimWei") matrix2010 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2010_dimWei") matrix2011 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2011_dimWei") matrix2012 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2012_dimWei") matrix2013 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2013_dimWei") matrix2014 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2014_dimWei") matrix2015 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2015_dimWei") matrix2016 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2016_dimWei") matrix2017 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/MatricesDimWei/matrix2017_dimWei") Nodes <- readRDS("NewAnalysesTrase2.4/Nodes") #Calculating WPi for every pairs of years, like the first Ci, then I will define other time windows later WPi0304S <- abs(matrix2003-matrix2004) matrix2015["1443","2181"] matrix2015["1443","2183"] matrix2016["1443","2181"] matrix2016["1443","2183"] A <- rowSums(matrix2015) B <- rowSums(matrix2016) A ["1443"] B ["1443"] #tests ok WPi0304S <- 1-((rowSums(abs(matrix2003-matrix2004)))/(rowSums(matrix2003)+rowSums(matrix2004))) WPi0304S2 <- 1-(((rowSums(abs(matrix2003-matrix2004)))/(rowSums(matrix2003+matrix2004)))) #Both ways give the same result, but I will use the second because the final number looks better #Putting in the dataframe to see the WPis by node NodesCiS <- data.frame(Nodes$id, Nodes$label, WPi0304S, WPi0304S2) #Seems allright, let's go WPi0304S <- 1-(((rowSums(abs(matrix2003-matrix2004)))/(rowSums(matrix2003+matrix2004)))) WPi0405S <- 1-(((rowSums(abs(matrix2004-matrix2005)))/(rowSums(matrix2004+matrix2005)))) WPi0506S <- 1-(((rowSums(abs(matrix2005-matrix2006)))/(rowSums(matrix2005+matrix2006)))) WPi0607S <- 1-(((rowSums(abs(matrix2006-matrix2007)))/(rowSums(matrix2006+matrix2007)))) WPi0708S <- 1-(((rowSums(abs(matrix2007-matrix2008)))/(rowSums(matrix2007+matrix2008)))) WPi0809S <- 1-(((rowSums(abs(matrix2008-matrix2009)))/(rowSums(matrix2008+matrix2009)))) WPi0910S <- 1-(((rowSums(abs(matrix2009-matrix2010)))/(rowSums(matrix2009+matrix2010)))) WPi1011S <- 1-(((rowSums(abs(matrix2010-matrix2011)))/(rowSums(matrix2010+matrix2011)))) WPi1112S <- 1-(((rowSums(abs(matrix2011-matrix2012)))/(rowSums(matrix2011+matrix2012)))) WPi1213S <- 1-(((rowSums(abs(matrix2012-matrix2013)))/(rowSums(matrix2012+matrix2013)))) WPi1314S <- 1-(((rowSums(abs(matrix2013-matrix2014)))/(rowSums(matrix2013+matrix2014)))) WPi1415S <- 1-(((rowSums(abs(matrix2014-matrix2015)))/(rowSums(matrix2014+matrix2015)))) WPi1516S <- 1-(((rowSums(abs(matrix2015-matrix2016)))/(rowSums(matrix2015+matrix2016)))) WPi1617S <- 1-(((rowSums(abs(matrix2016-matrix2017)))/(rowSums(matrix2016+matrix2017)))) WPi0317S <- 1-(((rowSums(abs(matrix2003-matrix2017)))/(rowSums(matrix2003+matrix2017)))) NodesWPiS <- data.frame(Nodes$id, Nodes$label, WPi0304S, WPi0405S, WPi0506S, WPi0607S, WPi0708S, WPi0809S, WPi0910S, WPi1011S, WPi1112S, WPi1213S, WPi1314S, WPi1415S, WPi1516S, WPi1617S, WPi0317S) #Cleaning the NaNs to calculate mean properly is.na(NodesWPiS) <- sapply(NodesWPiS, is.infinite) NodesWPiS[is.na(NodesWPiS)] <- 0 #Now calculating average WPi for whole period NodesWPiS$WPis_avrg <- apply(NodesWPiS[,c(3:16)], 1, FUN=mean) #Now putting the volumes for every node at each year, so that later I can filter nodes with volumes higher than 27 tons NodesWPiS$v2003 <- rowSums(matrix2003) NodesWPiS$v2004 <- rowSums(matrix2004) NodesWPiS$v2005 <- rowSums(matrix2005) NodesWPiS$v2006 <- rowSums(matrix2006) NodesWPiS$v2003 <- rowSums(matrix2003) NodesWPiS$v2007 <- rowSums(matrix2007) NodesWPiS$v2008 <- rowSums(matrix2008) NodesWPiS$v2009 <- rowSums(matrix2009) NodesWPiS$v2010 <- rowSums(matrix2010) NodesWPiS$v2011 <- rowSums(matrix2011) NodesWPiS$v2012 <- rowSums(matrix2012) NodesWPiS$v2013 <- rowSums(matrix2013) NodesWPiS$v2014 <- rowSums(matrix2014) NodesWPiS$v2015 <- rowSums(matrix2015) NodesWPiS$v2016 <- rowSums(matrix2016) NodesWPiS$v2017 <- rowSums(matrix2017) saveRDS(NodesWPiS, "NewAnalysesTrase2.4/NodesWPi/NodesWPiS") #Adding the Library mun to identify Logistics Hubs LibraryMun <- readRDS("IntermediateData/NewAnalysesTrase2.3/Library_mun") NodesWPiS_labels <- left_join(NodesWPiS, LibraryMun, by = c("Nodes.label" = "GEOCODE")) NodesWPiS_labels$MUNICIPALITY <- ifelse(is.na(NodesWPiS_labels$MUNICIPALITY), NodesWPiS_labels$Nodes.label, NodesWPiS_labels$MUNICIPALITY) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY NodesWPiS_labels$STATE <- ifelse(is.na(NodesWPiS_labels$STATE), NodesWPiS_labels$Nodes.label, NodesWPiS_labels$STATE) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY saveRDS(NodesWPiS_labels, "NewAnalysesTrase2.4/NodesWPi/NodesWPiS_labels") #All volumes < 27 tons were already transformed into 0. Great. ##############Now everything for receiving nodes ------------ WPi0304R <- 1-(((colSums(abs(matrix2003-matrix2004)))/(colSums(matrix2003+matrix2004)))) WPi0405R <- 1-(((colSums(abs(matrix2004-matrix2005)))/(colSums(matrix2004+matrix2005)))) WPi0506R <- 1-(((colSums(abs(matrix2005-matrix2006)))/(colSums(matrix2005+matrix2006)))) WPi0607R <- 1-(((colSums(abs(matrix2006-matrix2007)))/(colSums(matrix2006+matrix2007)))) WPi0708R <- 1-(((colSums(abs(matrix2007-matrix2008)))/(colSums(matrix2007+matrix2008)))) WPi0809R <- 1-(((colSums(abs(matrix2008-matrix2009)))/(colSums(matrix2008+matrix2009)))) WPi0910R <- 1-(((colSums(abs(matrix2009-matrix2010)))/(colSums(matrix2009+matrix2010)))) WPi1011R <- 1-(((colSums(abs(matrix2010-matrix2011)))/(colSums(matrix2010+matrix2011)))) WPi1112R <- 1-(((colSums(abs(matrix2011-matrix2012)))/(colSums(matrix2011+matrix2012)))) WPi1213R <- 1-(((colSums(abs(matrix2012-matrix2013)))/(colSums(matrix2012+matrix2013)))) WPi1314R <- 1-(((colSums(abs(matrix2013-matrix2014)))/(colSums(matrix2013+matrix2014)))) WPi1415R <- 1-(((colSums(abs(matrix2014-matrix2015)))/(colSums(matrix2014+matrix2015)))) WPi1516R <- 1-(((colSums(abs(matrix2015-matrix2016)))/(colSums(matrix2015+matrix2016)))) WPi1617R <- 1-(((colSums(abs(matrix2016-matrix2017)))/(colSums(matrix2016+matrix2017)))) WPi0317R <- 1-(((colSums(abs(matrix2003-matrix2017)))/(colSums(matrix2003+matrix2017)))) NodesWPiR <- data.frame(Nodes$id, Nodes$label, WPi0304R, WPi0405R, WPi0506R, WPi0607R, WPi0708R, WPi0809R, WPi0910R, WPi1011R, WPi1112R, WPi1213R, WPi1314R, WPi1415R, WPi1516R, WPi1617R, WPi0317R) #Cleaning the NaNs to calculate mean properly is.na(NodesWPiR) <- sapply(NodesWPiR, is.infinite) NodesWPiR[is.na(NodesWPiR)] <- 0 #Now calculating average WPi for whole period NodesWPiR$WPiR_avrg <- apply(NodesWPiR[,c(3:16)], 1, FUN=mean) #Now putting the volumes for every node at each year, so that later I can filter nodes with volumes higher than 27 tons NodesWPiR$v2003 <- colSums(matrix2003) NodesWPiR$v2004 <- colSums(matrix2004) NodesWPiR$v2005 <- colSums(matrix2005) NodesWPiR$v2006 <- colSums(matrix2006) NodesWPiR$v2003 <- colSums(matrix2003) NodesWPiR$v2007 <- colSums(matrix2007) NodesWPiR$v2008 <- colSums(matrix2008) NodesWPiR$v2009 <- colSums(matrix2009) NodesWPiR$v2010 <- colSums(matrix2010) NodesWPiR$v2011 <- colSums(matrix2011) NodesWPiR$v2012 <- colSums(matrix2012) NodesWPiR$v2013 <- colSums(matrix2013) NodesWPiR$v2014 <- colSums(matrix2014) NodesWPiR$v2015 <- colSums(matrix2015) NodesWPiR$v2016 <- colSums(matrix2016) NodesWPiR$v2017 <- colSums(matrix2017) saveRDS(NodesWPiR, "NewAnalysesTrase2.4/NodesWPi/NodesWPiR") #Adding the Library mun to identify Logistics Hubs LibraryMun <- readRDS("IntermediateData/NewAnalysesTrase2.3/Library_mun") NodesWPiR_labels <- left_join(NodesWPiR, LibraryMun, by = c("Nodes.label" = "GEOCODE")) NodesWPiR_labels$MUNICIPALITY <- ifelse(is.na(NodesWPiR_labels$MUNICIPALITY), NodesWPiR_labels$Nodes.label, NodesWPiR_labels$MUNICIPALITY) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY NodesWPiR_labels$STATE <- ifelse(is.na(NodesWPiR_labels$STATE), NodesWPiR_labels$Nodes.label, NodesWPiR_labels$STATE) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY saveRDS(NodesWPiR_labels, "NewAnalysesTrase2.4/NodesWPi/NodesWPiR_labels") ###############Now everything for Logistics Hubs directly to Countries for WPi #First loading all edges lists, but here I need to put their actual weights as I only prepared binary EdLists ##Important to acknowledge that here the weights are already filtered so that any annual volume trade ##less than 27 already became 0. This is already in AllYears df AllYears <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/MCEdAnalyses") Ed2003 <- AllYears %>% select(from, to, "2003") colnames(Ed2003) <- c("from", "to", "weight") saveRDS(Ed2003, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDimWei/Ed2003DimWei") # saving in RDS as well to guarantee igraph_edges2003 <- graph.data.frame(Ed2003,directed=TRUE) matrix2003 <- as_adjacency_matrix(igraph_edges2003,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2003, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2003dimwei") #saving the matrix with equalized dimension Ed2004 <- AllYears %>% select(from, to, "2004") colnames(Ed2004) <- c("from", "to", "weight") saveRDS(Ed2004, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDimWei/Ed2004DimWei") # saving in RDS as well to guarantee igraph_edges2004 <- graph.data.frame(Ed2004,directed=TRUE) matrix2004 <- as_adjacency_matrix(igraph_edges2004,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2004, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2004dimwei") #saving the matrix with equalized dimension Ed2005 <- AllYears %>% select(from, to, "2005") colnames(Ed2005) <- c("from", "to", "weight") saveRDS(Ed2005, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDimWei/Ed2005DimWei") # saving in RDS as well to guarantee igraph_edges2005 <- graph.data.frame(Ed2005,directed=TRUE) matrix2005 <- as_adjacency_matrix(igraph_edges2005,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2005, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2005dimwei") #saving the matrix with equalized dimension Ed2006 <- AllYears %>% select(from, to, "2006") colnames(Ed2006) <- c("from", "to", "weight") saveRDS(Ed2006, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDimWei/Ed2006DimWei") # saving in RDS as well to guarantee igraph_edges2006 <- graph.data.frame(Ed2006,directed=TRUE) matrix2006 <- as_adjacency_matrix(igraph_edges2006,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2006, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2006dimwei") #saving the matrix with equalized dimension Ed2007 <- AllYears %>% select(from, to, "2007") colnames(Ed2007) <- c("from", "to", "weight") saveRDS(Ed2007, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDimWei/Ed2007DimWei") # saving in RDS as well to guarantee igraph_edges2007 <- graph.data.frame(Ed2007,directed=TRUE) matrix2007 <- as_adjacency_matrix(igraph_edges2007,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2007, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2007dimwei") #saving the matrix with equalized dimension Ed2008 <- AllYears %>% select(from, to, "2008") colnames(Ed2008) <- c("from", "to", "weight") saveRDS(Ed2008, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDimWei/Ed2008DimWei") # saving in RDS as well to guarantee igraph_edges2008 <- graph.data.frame(Ed2008,directed=TRUE) matrix2008 <- as_adjacency_matrix(igraph_edges2008,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2008, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2008dimwei") #saving the matrix with equalized dimension Ed2009 <- AllYears %>% select(from, to, "2009") colnames(Ed2009) <- c("from", "to", "weight") saveRDS(Ed2009, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDimWei/Ed2009DimWei") # saving in RDS as well to guarantee igraph_edges2009 <- graph.data.frame(Ed2009,directed=TRUE) matrix2009 <- as_adjacency_matrix(igraph_edges2009,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2009, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2009dimwei") #saving the matrix with equalized dimension Ed2010 <- AllYears %>% select(from, to, "2010") colnames(Ed2010) <- c("from", "to", "weight") saveRDS(Ed2010, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDimWei/Ed2010DimWei") # saving in RDS as well to guarantee igraph_edges2010 <- graph.data.frame(Ed2010,directed=TRUE) matrix2010 <- as_adjacency_matrix(igraph_edges2010,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2010, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2010dimwei") #saving the matrix with equalized dimension Ed2011 <- AllYears %>% select(from, to, "2011") colnames(Ed2011) <- c("from", "to", "weight") saveRDS(Ed2011, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDimWei/Ed2011DimWei") # saving in RDS as well to guarantee igraph_edges2011 <- graph.data.frame(Ed2011,directed=TRUE) matrix2011 <- as_adjacency_matrix(igraph_edges2011,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2011, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2011dimwei") #saving the matrix with equalized dimension Ed2012 <- AllYears %>% select(from, to, "2012") colnames(Ed2012) <- c("from", "to", "weight") saveRDS(Ed2012, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDimWei/Ed2012DimWei") # saving in RDS as well to guarantee igraph_edges2012 <- graph.data.frame(Ed2012,directed=TRUE) matrix2012 <- as_adjacency_matrix(igraph_edges2012,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2012, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2012dimwei") #saving the matrix with equalized dimension Ed2013 <- AllYears %>% select(from, to, "2013") colnames(Ed2013) <- c("from", "to", "weight") saveRDS(Ed2013, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDimWei/Ed2013DimWei") # saving in RDS as well to guarantee igraph_edges2013 <- graph.data.frame(Ed2013,directed=TRUE) matrix2013 <- as_adjacency_matrix(igraph_edges2013,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2013, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2013dimwei") #saving the matrix with equalized dimension Ed2014 <- AllYears %>% select(from, to, "2014") colnames(Ed2014) <- c("from", "to", "weight") saveRDS(Ed2014, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDimWei/Ed2014DimWei") # saving in RDS as well to guarantee igraph_edges2014 <- graph.data.frame(Ed2014,directed=TRUE) matrix2014 <- as_adjacency_matrix(igraph_edges2014,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2014, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2014dimwei") #saving the matrix with equalized dimension Ed2015 <- AllYears %>% select(from, to, "2015") colnames(Ed2015) <- c("from", "to", "weight") saveRDS(Ed2015, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDimWei/Ed2015DimWei") # saving in RDS as well to guarantee igraph_edges2015 <- graph.data.frame(Ed2015,directed=TRUE) matrix2015 <- as_adjacency_matrix(igraph_edges2015,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2015, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2015dimwei") #saving the matrix with equalized dimension Ed2016 <- AllYears %>% select(from, to, "2016") colnames(Ed2016) <- c("from", "to", "weight") saveRDS(Ed2016, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDimWei/Ed2016DimWei") # saving in RDS as well to guarantee igraph_edges2016 <- graph.data.frame(Ed2016,directed=TRUE) matrix2016 <- as_adjacency_matrix(igraph_edges2016,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2016, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2016dimwei") #saving the matrix with equalized dimension Ed2017 <- AllYears %>% select(from, to, "2017") colnames(Ed2017) <- c("from", "to", "weight") saveRDS(Ed2017, file = "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualEdges/EdgesDimWei/Ed2017DimWei") # saving in RDS as well to guarantee igraph_edges2017 <- graph.data.frame(Ed2017,directed=TRUE) matrix2017 <- as_adjacency_matrix(igraph_edges2017,names=TRUE,sparse=FALSE,attr="weight") saveRDS(matrix2017, "NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2017dimwei") #saving the matrix with equalized dimension #Now moving to calculate Wpi on LHs to Countries matrix2003 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2003dimWei") matrix2004 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2004dimwei") matrix2005 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2005dimwei") matrix2006 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2006dimwei") matrix2007 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2007dimwei") matrix2008 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2008dimwei") matrix2009 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2009dimwei") matrix2010 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2010dimwei") matrix2011 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2011dimwei") matrix2012 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2012dimwei") matrix2013 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2013dimwei") matrix2014 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2014dimwei") matrix2015 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2015dimwei") matrix2016 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2016dimwei") matrix2017 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/MatricesDimWei/matrix2017dimwei") NodesAll <- readRDS("NewAnalysesTrase2.4/Nodes") #Subsetting only LHs and countries #Nodes <- subset(NodesAll, id>0 & id<=2529 | id >=4239 & id <=4365) ## This is the subsetting for LHs Nodes <- subset(NodesAll, id>0 & id<=508 | id >=2216) WPi0304S <- 1-(((rowSums(abs(matrix2003-matrix2004)))/(rowSums(matrix2003+matrix2004)))) WPi0405S <- 1-(((rowSums(abs(matrix2004-matrix2005)))/(rowSums(matrix2004+matrix2005)))) WPi0506S <- 1-(((rowSums(abs(matrix2005-matrix2006)))/(rowSums(matrix2005+matrix2006)))) WPi0607S <- 1-(((rowSums(abs(matrix2006-matrix2007)))/(rowSums(matrix2006+matrix2007)))) WPi0708S <- 1-(((rowSums(abs(matrix2007-matrix2008)))/(rowSums(matrix2007+matrix2008)))) WPi0809S <- 1-(((rowSums(abs(matrix2008-matrix2009)))/(rowSums(matrix2008+matrix2009)))) WPi0910S <- 1-(((rowSums(abs(matrix2009-matrix2010)))/(rowSums(matrix2009+matrix2010)))) WPi1011S <- 1-(((rowSums(abs(matrix2010-matrix2011)))/(rowSums(matrix2010+matrix2011)))) WPi1112S <- 1-(((rowSums(abs(matrix2011-matrix2012)))/(rowSums(matrix2011+matrix2012)))) WPi1213S <- 1-(((rowSums(abs(matrix2012-matrix2013)))/(rowSums(matrix2012+matrix2013)))) WPi1314S <- 1-(((rowSums(abs(matrix2013-matrix2014)))/(rowSums(matrix2013+matrix2014)))) WPi1415S <- 1-(((rowSums(abs(matrix2014-matrix2015)))/(rowSums(matrix2014+matrix2015)))) WPi1516S <- 1-(((rowSums(abs(matrix2015-matrix2016)))/(rowSums(matrix2015+matrix2016)))) WPi1617S <- 1-(((rowSums(abs(matrix2016-matrix2017)))/(rowSums(matrix2016+matrix2017)))) WPi0317S <- 1-(((rowSums(abs(matrix2003-matrix2017)))/(rowSums(matrix2003+matrix2017)))) NodesWPiS <- data.frame(Nodes$id, Nodes$label, WPi0304S, WPi0405S, WPi0506S, WPi0607S, WPi0708S, WPi0809S, WPi0910S, WPi1011S, WPi1112S, WPi1213S, WPi1314S, WPi1415S, WPi1516S, WPi1617S, WPi0317S) #Cleaning the NaNs to calculate mean properly is.na(NodesWPiS) <- sapply(NodesWPiS, is.infinite) NodesWPiS[is.na(NodesWPiS)] <- 0 #Now calculating average WPi for whole period NodesWPiS$WPis_avrg <- apply(NodesWPiS[,c(3:16)], 1, FUN=mean) #Now putting the volumes for every node at each year, so that later I can filter nodes with volumes higher than 27 tons NodesWPiS$v2003 <- rowSums(matrix2003) NodesWPiS$v2004 <- rowSums(matrix2004) NodesWPiS$v2005 <- rowSums(matrix2005) NodesWPiS$v2006 <- rowSums(matrix2006) NodesWPiS$v2003 <- rowSums(matrix2003) NodesWPiS$v2007 <- rowSums(matrix2007) NodesWPiS$v2008 <- rowSums(matrix2008) NodesWPiS$v2009 <- rowSums(matrix2009) NodesWPiS$v2010 <- rowSums(matrix2010) NodesWPiS$v2011 <- rowSums(matrix2011) NodesWPiS$v2012 <- rowSums(matrix2012) NodesWPiS$v2013 <- rowSums(matrix2013) NodesWPiS$v2014 <- rowSums(matrix2014) NodesWPiS$v2015 <- rowSums(matrix2015) NodesWPiS$v2016 <- rowSums(matrix2016) NodesWPiS$v2017 <- rowSums(matrix2017) saveRDS(NodesWPiS, "NewAnalysesTrase2.4/NodesWPi/WPi_MunCountry/NodesWPiS") #Adding the Library mun to identify Logistics Hubs LibraryMun <- readRDS("IntermediateData/NewAnalysesTrase2.3/Library_mun") NodesWPiS_labels <- left_join(NodesWPiS, LibraryMun, by = c("Nodes.label" = "GEOCODE")) NodesWPiS_labels$MUNICIPALITY <- ifelse(is.na(NodesWPiS_labels$MUNICIPALITY), NodesWPiS_labels$Nodes.label, NodesWPiS_labels$MUNICIPALITY) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY NodesWPiS_labels$STATE <- ifelse(is.na(NodesWPiS_labels$STATE), NodesWPiS_labels$Nodes.label, NodesWPiS_labels$STATE) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY saveRDS(NodesWPiS_labels, "NewAnalysesTrase2.4/NodesWPi/WPi_MunCountry/NodesWPiS_labels") #All volumes < 27 tons were already transformed into 0. Great. ##############Now everything for receiving nodes ------------ WPi0304R <- 1-(((colSums(abs(matrix2003-matrix2004)))/(colSums(matrix2003+matrix2004)))) WPi0405R <- 1-(((colSums(abs(matrix2004-matrix2005)))/(colSums(matrix2004+matrix2005)))) WPi0506R <- 1-(((colSums(abs(matrix2005-matrix2006)))/(colSums(matrix2005+matrix2006)))) WPi0607R <- 1-(((colSums(abs(matrix2006-matrix2007)))/(colSums(matrix2006+matrix2007)))) WPi0708R <- 1-(((colSums(abs(matrix2007-matrix2008)))/(colSums(matrix2007+matrix2008)))) WPi0809R <- 1-(((colSums(abs(matrix2008-matrix2009)))/(colSums(matrix2008+matrix2009)))) WPi0910R <- 1-(((colSums(abs(matrix2009-matrix2010)))/(colSums(matrix2009+matrix2010)))) WPi1011R <- 1-(((colSums(abs(matrix2010-matrix2011)))/(colSums(matrix2010+matrix2011)))) WPi1112R <- 1-(((colSums(abs(matrix2011-matrix2012)))/(colSums(matrix2011+matrix2012)))) WPi1213R <- 1-(((colSums(abs(matrix2012-matrix2013)))/(colSums(matrix2012+matrix2013)))) WPi1314R <- 1-(((colSums(abs(matrix2013-matrix2014)))/(colSums(matrix2013+matrix2014)))) WPi1415R <- 1-(((colSums(abs(matrix2014-matrix2015)))/(colSums(matrix2014+matrix2015)))) WPi1516R <- 1-(((colSums(abs(matrix2015-matrix2016)))/(colSums(matrix2015+matrix2016)))) WPi1617R <- 1-(((colSums(abs(matrix2016-matrix2017)))/(colSums(matrix2016+matrix2017)))) WPi0317R <- 1-(((colSums(abs(matrix2003-matrix2017)))/(colSums(matrix2003+matrix2017)))) NodesWPiR <- data.frame(Nodes$id, Nodes$label, WPi0304R, WPi0405R, WPi0506R, WPi0607R, WPi0708R, WPi0809R, WPi0910R, WPi1011R, WPi1112R, WPi1213R, WPi1314R, WPi1415R, WPi1516R, WPi1617R, WPi0317R) #Cleaning the NaNs to calculate mean properly is.na(NodesWPiR) <- sapply(NodesWPiR, is.infinite) NodesWPiR[is.na(NodesWPiR)] <- 0 #Now calculating average WPi for whole period NodesWPiR$WPiR_avrg <- apply(NodesWPiR[,c(3:16)], 1, FUN=mean) #Now putting the volumes for every node at each year, so that later I can filter nodes with volumes higher than 27 tons NodesWPiR$v2003 <- colSums(matrix2003) NodesWPiR$v2004 <- colSums(matrix2004) NodesWPiR$v2005 <- colSums(matrix2005) NodesWPiR$v2006 <- colSums(matrix2006) NodesWPiR$v2003 <- colSums(matrix2003) NodesWPiR$v2007 <- colSums(matrix2007) NodesWPiR$v2008 <- colSums(matrix2008) NodesWPiR$v2009 <- colSums(matrix2009) NodesWPiR$v2010 <- colSums(matrix2010) NodesWPiR$v2011 <- colSums(matrix2011) NodesWPiR$v2012 <- colSums(matrix2012) NodesWPiR$v2013 <- colSums(matrix2013) NodesWPiR$v2014 <- colSums(matrix2014) NodesWPiR$v2015 <- colSums(matrix2015) NodesWPiR$v2016 <- colSums(matrix2016) NodesWPiR$v2017 <- colSums(matrix2017) saveRDS(NodesWPiR, "NewAnalysesTrase2.4/NodesWPi/WPi_MunCountry/NodesWPiR") #Adding the Library mun to identify Logistics Hubs LibraryMun <- readRDS("IntermediateData/NewAnalysesTrase2.3/Library_mun") NodesWPiR_labels <- left_join(NodesWPiR, LibraryMun, by = c("Nodes.label" = "GEOCODE")) NodesWPiR_labels$MUNICIPALITY <- ifelse(is.na(NodesWPiR_labels$MUNICIPALITY), NodesWPiR_labels$Nodes.label, NodesWPiR_labels$MUNICIPALITY) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY NodesWPiR_labels$STATE <- ifelse(is.na(NodesWPiR_labels$STATE), NodesWPiR_labels$Nodes.label, NodesWPiR_labels$STATE) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY saveRDS(NodesWPiR_labels, "NewAnalysesTrase2.4/NodesWPi/WPi_MunCountry/NodesWPiR_labels") ##Now calculating CIs #This script is to calculate CIs on annual matrices with equal dimensions and weights binarized (0 or 1) options(encoding = "utf8") options(stringsAsFactors = FALSE) library(tidyverse) library(igraph) matrix2003 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/matrix2003_dim") matrix2004 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/matrix2004_dim") matrix2005 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/matrix2005_dim") matrix2006 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/matrix2006_dim") matrix2007 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/matrix2007_dim") matrix2008 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/matrix2008_dim") matrix2009 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/matrix2009_dim") matrix2010 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/matrix2010_dim") matrix2011 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/matrix2011_dim") matrix2012 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/matrix2012_dim") matrix2013 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/matrix2013_dim") matrix2014 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/matrix2014_dim") matrix2015 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/matrix2015_dim") matrix2016 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/matrix2016_dim") matrix2017 <- readRDS("NewAnalysesTrase2.4/AnnualMatrices/matrix2017_dim") Nodes <- readRDS("NewAnalysesTrase2.4/Nodes") #First, calculating Ci sending for all years together # Formula corrected without the %*% for * Ciall_send <- rowSums(matrix2003*matrix2004*matrix2005*matrix2006*matrix2007*matrix2008*matrix2009* matrix2010*matrix2011*matrix2012*matrix2013*matrix2014* matrix2015*matrix2016*matrix2017)/sqrt(rowSums(matrix2003)*rowSums(matrix2004)*rowSums(matrix2005)*rowSums(matrix2006)* rowSums(matrix2007)*rowSums(matrix2008)*rowSums(matrix2009)*rowSums(matrix2010)* rowSums(matrix2011)*rowSums(matrix2012)*rowSums(matrix2013)*rowSums(matrix2014)* rowSums(matrix2015)*rowSums(matrix2016)*rowSums(matrix2017)) # Second, calculating Cis sending for pairs of years and first and last year Ci0304S <- rowSums(matrix2003*matrix2004)/sqrt(rowSums(matrix2003)*rowSums(matrix2004)) Ci0405S <- rowSums(matrix2004*matrix2005)/sqrt(rowSums(matrix2004)*rowSums(matrix2005)) Ci0506S <- rowSums(matrix2005*matrix2006)/sqrt(rowSums(matrix2005)*rowSums(matrix2006)) Ci0607S <- rowSums(matrix2006*matrix2007)/sqrt(rowSums(matrix2006)*rowSums(matrix2007)) Ci0708S <- rowSums(matrix2007*matrix2008)/sqrt(rowSums(matrix2007)*rowSums(matrix2008)) Ci0809S <- rowSums(matrix2008*matrix2009)/sqrt(rowSums(matrix2008)*rowSums(matrix2009)) Ci0910S <- rowSums(matrix2009*matrix2010)/sqrt(rowSums(matrix2009)*rowSums(matrix2010)) Ci1011S <- rowSums(matrix2010*matrix2011)/sqrt(rowSums(matrix2010)*rowSums(matrix2011)) Ci1112S <- rowSums(matrix2011*matrix2012)/sqrt(rowSums(matrix2011)*rowSums(matrix2012)) Ci1213S <- rowSums(matrix2012*matrix2013)/sqrt(rowSums(matrix2012)*rowSums(matrix2013)) Ci1314S <- rowSums(matrix2013*matrix2014)/sqrt(rowSums(matrix2013)*rowSums(matrix2014)) Ci1415S <- rowSums(matrix2014*matrix2015)/sqrt(rowSums(matrix2014)*rowSums(matrix2015)) Ci1516S <- rowSums(matrix2015*matrix2016)/sqrt(rowSums(matrix2015)*rowSums(matrix2016)) Ci1617S <- rowSums(matrix2016*matrix2017)/sqrt(rowSums(matrix2016)*rowSums(matrix2017)) Ci0317S <- rowSums(matrix2003*matrix2017)/sqrt(rowSums(matrix2003)*rowSums(matrix2017)) #Putting in the dataframe to save the original with Infs and NaNs that will be cleared for Cm and C calculations later NodesCiS <- data.frame(Nodes$id, Nodes$label, Ciall_send, Ci0304S, Ci0405S, Ci0506S, Ci0607S, Ci0708S, Ci0809S, Ci0910S, Ci1011S, Ci1112S, Ci1213S, Ci1314S, Ci1415S, Ci1516S, Ci1617S, Ci0317S) saveRDS(NodesCiS, "NewAnalysesTrase2.4/NodesCi/NodesCiS_original") #Before calculating the Ci average, clean the Inf values because they screw everything # It is the same to clean before or after in the dataframe is.na(NodesCiS) <- sapply(NodesCiS, is.infinite) NodesCiS[is.na(NodesCiS)] <- 0 #Now saving the Clean NodesCiS saveRDS(NodesCiS, "NewAnalysesTrase2.4/NodesCi/NodesCiS_clean") #Calculating Cm(tm, tm+1) according to Buttner (2016). This is the average topological overlap of #the graph (not of the nodes) for tm consecutive snapshots. So it is Cm0506 and so on. #Before calculating Cm of the graph for each pair of years, I need to find #maxA (the maximal number of active nodes in each pair of years) #It is all in the formula Cm0304 <- (1/sum(rowSums(matrix2003+matrix2004) > 0))*sum(NodesCiS$Ci0304S) Cm0405 <- (1/sum(rowSums(matrix2004+matrix2005) > 0))*sum(NodesCiS$Ci0405S) Cm0506 <- (1/sum(rowSums(matrix2005+matrix2006) > 0))*sum(NodesCiS$Ci0506S) Cm0607 <- (1/sum(rowSums(matrix2006+matrix2007) > 0))*sum(NodesCiS$Ci0607S) Cm0708 <- (1/sum(rowSums(matrix2007+matrix2008) > 0))*sum(NodesCiS$Ci0708S) Cm0809 <- (1/sum(rowSums(matrix2008+matrix2009) > 0))*sum(NodesCiS$Ci0809S) Cm0910 <- (1/sum(rowSums(matrix2009+matrix2010) > 0))*sum(NodesCiS$Ci0910S) Cm1011 <- (1/sum(rowSums(matrix2010+matrix2011) > 0))*sum(NodesCiS$Ci1011S) Cm1112 <- (1/sum(rowSums(matrix2011+matrix2012) > 0))*sum(NodesCiS$Ci1112S) Cm1213 <- (1/sum(rowSums(matrix2012+matrix2013) > 0))*sum(NodesCiS$Ci1213S) Cm1314 <- (1/sum(rowSums(matrix2013+matrix2014) > 0))*sum(NodesCiS$Ci1314S) Cm1415 <- (1/sum(rowSums(matrix2014+matrix2015) > 0))*sum(NodesCiS$Ci1415S) Cm1516 <- (1/sum(rowSums(matrix2015+matrix2016) > 0))*sum(NodesCiS$Ci1516S) Cm1617 <- (1/sum(rowSums(matrix2016+matrix2017) > 0))*sum(NodesCiS$Ci1617S) Cm0317 <- (1/sum(rowSums(matrix2003+matrix2017) > 0))*sum(NodesCiS$Ci0317S) NodesCiSCm <- data.frame(Nodes$id, Nodes$label, Ciall_send, Ci0304S, Ci0405S, Ci0506S, Ci0607S, Ci0708S, Ci0809S, Ci0910S, Ci1011S, Ci1112S, Ci1213S, Ci1314S, Ci1415S, Ci1516S, Ci1617S, Ci0317S, Cm0304, Cm0405, Cm0506, Cm0607, Cm0708, Cm0809, Cm0910, Cm1011, Cm1112, Cm1213, Cm1314, Cm1415, Cm1516, Cm1617, Cm0317) #Now Saving the original NodesCiSCm saveRDS(NodesCiSCm, "NewAnalysesTrase2.4/NodesCi/NodesCiSCm_original") #And saving the clean version is.na(NodesCiSCm) <- sapply(NodesCiSCm, is.infinite) NodesCiSCm[is.na(NodesCiSCm)] <- 0 saveRDS(NodesCiSCm, "NewAnalysesTrase2.4/NodesCi/NodesCiSCm_clean") #Now I will calculate the average topological overlap of nodes Ci (Ci_avrg) for all snapshots Ci_avrgS <- 1/14* (NodesCiS$Ci0304S+ NodesCiS$Ci0405S+ NodesCiS$Ci0506S+ NodesCiS$Ci0607S+ NodesCiS$Ci0708S+ NodesCiS$Ci0809S+ NodesCiS$Ci0910S+ NodesCiS$Ci1011S+ NodesCiS$Ci1112S+ NodesCiS$Ci1213S+ NodesCiS$Ci1314S+ NodesCiS$Ci1415S+ NodesCiS$Ci1516S+ NodesCiS$Ci1617S) NodesCiSCmAvrg <- data.frame(Nodes$id, Nodes$label, Ciall_send, Ci0304S, Ci0405S, Ci0506S, Ci0607S, Ci0708S, Ci0809S, Ci0910S, Ci1011S, Ci1112S, Ci1213S, Ci1314S, Ci1415S, Ci1516S, Ci1617S, Ci0317S, Cm0304, Cm0405, Cm0506, Cm0607, Cm0708, Cm0809, Cm0910, Cm1011, Cm1112, Cm1213, Cm1314, Cm1415, Cm1516, Cm1617, Cm0317, Ci_avrgS) #Now Saving the original NodesCiSCmAvrg saveRDS(NodesCiSCmAvrg, "NewAnalysesTrase2.4/NodesCi/NodesCiSCmAvrg_original") #And saving the clean version is.na(NodesCiSCmAvrg) <- sapply(NodesCiSCmAvrg, is.infinite) NodesCiSCmAvrg[is.na(NodesCiSCmAvrg)] <- 0 saveRDS(NodesCiSCmAvrg, "NewAnalysesTrase2.4/NodesCi/NodesCiSCmAvrg_clean") #Now calculating C for the whole graph CS <- 1/14*(Cm0304+ Cm0405+ Cm0506+ Cm0607+ Cm0708+ Cm0809+ Cm0910+ Cm1011+ Cm1112+ Cm1213+ Cm1314+ Cm1415+ Cm1516+ Cm1617) NodesCiS_final <- data.frame(Nodes$id, Nodes$label, Ciall_send, Ci0304S, Ci0405S, Ci0506S, Ci0607S, Ci0708S, Ci0809S, Ci0910S, Ci1011S, Ci1112S, Ci1213S, Ci1314S, Ci1415S, Ci1516S, Ci1617S, Ci0317S, Cm0304, Cm0405, Cm0506, Cm0607, Cm0708, Cm0809, Cm0910, Cm1011, Cm1112, Cm1213, Cm1314, Cm1415, Cm1516, Cm1617, Cm0317, Ci_avrgS, CS) #Now Saving the original NodesCiS_final saveRDS(NodesCiS_final, "NewAnalysesTrase2.4/NodesCi/NodesCiS_final_original") #And saving the clean version is.na(NodesCiS_final) <- sapply(NodesCiS_final, is.infinite) NodesCiS_final[is.na(NodesCiS_final)] <- 0 saveRDS(NodesCiS_final, "NewAnalysesTrase2.4/NodesCi/NodesCiS_final_clean") #Adding the Library mun to identify municipalities LibraryMun <- readRDS("NewAnalysesTrase2.4/Library_mun") NodesCiS_labels <- left_join(NodesCiS_final, LibraryMun, by = c("Nodes.label" = "GEOCODE")) NodesCiS_labels$MUNICIPALITY <- ifelse(is.na(NodesCiS_labels$MUNICIPALITY), NodesCiS_labels$Nodes.label, NodesCiS_labels$MUNICIPALITY) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY NodesCiS_labels$STATE <- ifelse(is.na(NodesCiS_labels$STATE), NodesCiS_labels$Nodes.label, NodesCiS_labels$STATE) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY saveRDS(NodesCiS_labels, "NewAnalysesTrase2.4/NodesCi/NodesCiS_final_cleanLabels") ##############Now everything for receiving nodes ------------ #First, calculating Ci receiving for all years together # Formula corrected without the %*% for * Ciall_rec <- colSums(matrix2003*matrix2004*matrix2005*matrix2006*matrix2007*matrix2008*matrix2009* matrix2010*matrix2011*matrix2012*matrix2013*matrix2014* matrix2015*matrix2016*matrix2017)/sqrt(colSums(matrix2003)*colSums(matrix2004)*colSums(matrix2005)*colSums(matrix2006)* colSums(matrix2007)*colSums(matrix2008)*colSums(matrix2009)*colSums(matrix2010)* colSums(matrix2011)*colSums(matrix2012)*colSums(matrix2013)*colSums(matrix2014)* colSums(matrix2015)*colSums(matrix2016)*colSums(matrix2017)) # Second, calculating Cis receiving for pairs of years and first and last year Ci0304R <- colSums(matrix2003*matrix2004)/sqrt(colSums(matrix2003)*colSums(matrix2004)) Ci0405R <- colSums(matrix2004*matrix2005)/sqrt(colSums(matrix2004)*colSums(matrix2005)) Ci0506R <- colSums(matrix2005*matrix2006)/sqrt(colSums(matrix2005)*colSums(matrix2006)) Ci0607R <- colSums(matrix2006*matrix2007)/sqrt(colSums(matrix2006)*colSums(matrix2007)) Ci0708R <- colSums(matrix2007*matrix2008)/sqrt(colSums(matrix2007)*colSums(matrix2008)) Ci0809R <- colSums(matrix2008*matrix2009)/sqrt(colSums(matrix2008)*colSums(matrix2009)) Ci0910R <- colSums(matrix2009*matrix2010)/sqrt(colSums(matrix2009)*colSums(matrix2010)) Ci1011R <- colSums(matrix2010*matrix2011)/sqrt(colSums(matrix2010)*colSums(matrix2011)) Ci1112R <- colSums(matrix2011*matrix2012)/sqrt(colSums(matrix2011)*colSums(matrix2012)) Ci1213R <- colSums(matrix2012*matrix2013)/sqrt(colSums(matrix2012)*colSums(matrix2013)) Ci1314R <- colSums(matrix2013*matrix2014)/sqrt(colSums(matrix2013)*colSums(matrix2014)) Ci1415R <- colSums(matrix2014*matrix2015)/sqrt(colSums(matrix2014)*colSums(matrix2015)) Ci1516R <- colSums(matrix2015*matrix2016)/sqrt(colSums(matrix2015)*colSums(matrix2016)) Ci1617R <- colSums(matrix2016*matrix2017)/sqrt(colSums(matrix2016)*colSums(matrix2017)) Ci0317R <- colSums(matrix2003*matrix2017)/sqrt(colSums(matrix2003)*colSums(matrix2017)) #Putting in the dataframe to save the original with Infs and NaNs that will be cleared for Cm and C calculations later NodesCiR <- data.frame(Nodes$id, Nodes$label, Ciall_rec, Ci0304R, Ci0405R, Ci0506R, Ci0607R, Ci0708R, Ci0809R, Ci0910R, Ci1011R, Ci1112R, Ci1213R, Ci1314R, Ci1415R, Ci1516R, Ci1617R, Ci0317R) saveRDS(NodesCiR, "NewAnalysesTrase2.4/NodesCi/NodesCiR_original") #Before calculating the Ci average, clean the Inf values because they screw everything # It is the same to clean before or after in the dataframe is.na(NodesCiR) <- sapply(NodesCiR, is.infinite) NodesCiR[is.na(NodesCiR)] <- 0 #Now saving the Clean NodesCiR saveRDS(NodesCiR, "NewAnalysesTrase2.4/NodesCi/NodesCiR_clean") #Calculating Cm(tm, tm+1) according to Buttner (2016). This is the average topological overlap of #the graph (not of the nodeR) for tm consecutive snapshots. So it is Cm0506 and so on. #Before calculating Cm of the graph for each pair of yearR, I need to find #maxA (the maximal number of active nodes in each pair of yearR) #It is all in the formula Cm0304 <- (1/sum(colSums(matrix2003+matrix2004) > 0))*sum(NodesCiR$Ci0304R) Cm0405 <- (1/sum(colSums(matrix2004+matrix2005) > 0))*sum(NodesCiR$Ci0405R) Cm0506 <- (1/sum(colSums(matrix2005+matrix2006) > 0))*sum(NodesCiR$Ci0506R) Cm0607 <- (1/sum(colSums(matrix2006+matrix2007) > 0))*sum(NodesCiR$Ci0607R) Cm0708 <- (1/sum(colSums(matrix2007+matrix2008) > 0))*sum(NodesCiR$Ci0708R) Cm0809 <- (1/sum(colSums(matrix2008+matrix2009) > 0))*sum(NodesCiR$Ci0809R) Cm0910 <- (1/sum(colSums(matrix2009+matrix2010) > 0))*sum(NodesCiR$Ci0910R) Cm1011 <- (1/sum(colSums(matrix2010+matrix2011) > 0))*sum(NodesCiR$Ci1011R) Cm1112 <- (1/sum(colSums(matrix2011+matrix2012) > 0))*sum(NodesCiR$Ci1112R) Cm1213 <- (1/sum(colSums(matrix2012+matrix2013) > 0))*sum(NodesCiR$Ci1213R) Cm1314 <- (1/sum(colSums(matrix2013+matrix2014) > 0))*sum(NodesCiR$Ci1314R) Cm1415 <- (1/sum(colSums(matrix2014+matrix2015) > 0))*sum(NodesCiR$Ci1415R) Cm1516 <- (1/sum(colSums(matrix2015+matrix2016) > 0))*sum(NodesCiR$Ci1516R) Cm1617 <- (1/sum(colSums(matrix2016+matrix2017) > 0))*sum(NodesCiR$Ci1617R) Cm0317 <- (1/sum(colSums(matrix2003+matrix2017) > 0))*sum(NodesCiR$Ci0317R) NodesCiRCm <- data.frame(Nodes$id, Nodes$label, Ciall_rec, Ci0304R, Ci0405R, Ci0506R, Ci0607R, Ci0708R, Ci0809R, Ci0910R, Ci1011R, Ci1112R, Ci1213R, Ci1314R, Ci1415R, Ci1516R, Ci1617R, Ci0317R, Cm0304, Cm0405, Cm0506, Cm0607, Cm0708, Cm0809, Cm0910, Cm1011, Cm1112, Cm1213, Cm1314, Cm1415, Cm1516, Cm1617, Cm0317) #Now Saving the original NodesCiRCm saveRDS(NodesCiRCm, "NewAnalysesTrase2.4/NodesCi/NodesCiRCm_original") #And saving the clean version is.na(NodesCiRCm) <- sapply(NodesCiRCm, is.infinite) NodesCiRCm[is.na(NodesCiRCm)] <- 0 saveRDS(NodesCiRCm, "NewAnalysesTrase2.4/NodesCi/NodesCiRCm_clean") #Now I will calculate the average topological overlap of nodes Ci (Ci_avrg) for all snapshots Ci_avrgR <- 1/14* (NodesCiR$Ci0304R+ NodesCiR$Ci0405R+ NodesCiR$Ci0506R+ NodesCiR$Ci0607R+ NodesCiR$Ci0708R+ NodesCiR$Ci0809R+ NodesCiR$Ci0910R+ NodesCiR$Ci1011R+ NodesCiR$Ci1112R+ NodesCiR$Ci1213R+ NodesCiR$Ci1314R+ NodesCiR$Ci1415R+ NodesCiR$Ci1516R+ NodesCiR$Ci1617R) NodesCiRCmAvrg <- data.frame(Nodes$id, Nodes$label, Ciall_rec, Ci0304R, Ci0405R, Ci0506R, Ci0607R, Ci0708R, Ci0809R, Ci0910R, Ci1011R, Ci1112R, Ci1213R, Ci1314R, Ci1415R, Ci1516R, Ci1617R, Ci0317R, Cm0304, Cm0405, Cm0506, Cm0607, Cm0708, Cm0809, Cm0910, Cm1011, Cm1112, Cm1213, Cm1314, Cm1415, Cm1516, Cm1617, Cm0317, Ci_avrgR) #Now Saving the original NodesCiRCmAvrg saveRDS(NodesCiRCmAvrg, "NewAnalysesTrase2.4/NodesCi/NodesCiRCmAvrg_original") #And saving the clean version is.na(NodesCiRCmAvrg) <- sapply(NodesCiRCmAvrg, is.infinite) NodesCiRCmAvrg[is.na(NodesCiRCmAvrg)] <- 0 saveRDS(NodesCiRCmAvrg, "NewAnalysesTrase2.4/NodesCi/NodesCiRCmAvrg_clean") #Now calculating C for the whole graph CR <- 1/14*(Cm0304+ Cm0405+ Cm0506+ Cm0607+ Cm0708+ Cm0809+ Cm0910+ Cm1011+ Cm1112+ Cm1213+ Cm1314+ Cm1415+ Cm1516+ Cm1617) NodesCiR_final <- data.frame(Nodes$id, Nodes$label, Ciall_rec, Ci0304R, Ci0405R, Ci0506R, Ci0607R, Ci0708R, Ci0809R, Ci0910R, Ci1011R, Ci1112R, Ci1213R, Ci1314R, Ci1415R, Ci1516R, Ci1617R, Ci0317R, Cm0304, Cm0405, Cm0506, Cm0607, Cm0708, Cm0809, Cm0910, Cm1011, Cm1112, Cm1213, Cm1314, Cm1415, Cm1516, Cm1617, Cm0317, Ci_avrgR, CR) #Now Saving the original NodesCiR_final saveRDS(NodesCiR_final, "NewAnalysesTrase2.4/NodesCi/NodesCiR_final_original") #And saving the clean version is.na(NodesCiR_final) <- sapply(NodesCiR_final, is.infinite) NodesCiR_final[is.na(NodesCiR_final)] <- 0 saveRDS(NodesCiR_final, "NewAnalysesTrase2.4/NodesCi/NodesCiR_final_clean") NodesCiR_final_original <- readRDS("NewAnalysesTrase2.4/NodesCi/NodesCiR_final_original") #Adding the Library mun to identify municipalities LibraryMun <- readRDS("NewAnalysesTrase2.4/Library_mun") NodesCiR_labels <- left_join(NodesCiR_final, LibraryMun, by = c("Nodes.label" = "GEOCODE")) NodesCiR_labels$MUNICIPALITY <- ifelse(is.na(NodesCiR_labels$MUNICIPALITY), NodesCiR_labels$Nodes.label, NodesCiR_labels$MUNICIPALITY) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY NodesCiR_labels$STATE <- ifelse(is.na(NodesCiR_labels$STATE), NodesCiR_labels$Nodes.label, NodesCiR_labels$STATE) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY saveRDS(NodesCiR_labels, "NewAnalysesTrase2.4/NodesCi/NodesCiR_final_cleanLabels") ############# Now calculating Cis on relationships Municipalities -> Countries (Sending and receiving) matrix2003 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2003_dim") matrix2004 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2004_dim") matrix2005 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2005_dim") matrix2006 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2006_dim") matrix2007 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2007_dim") matrix2008 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2008_dim") matrix2009 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2009_dim") matrix2010 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2010_dim") matrix2011 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2011_dim") matrix2012 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2012_dim") matrix2013 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2013_dim") matrix2014 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2014_dim") matrix2015 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2015_dim") matrix2016 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2016_dim") matrix2017 <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/AnnualMatrices/matrix2017_dim") NodesAll <- readRDS("NewAnalysesTrase2.4/Nodes") #Subsetting only Municipalities and countries on Municipalities #Nodes <- subset(NodesAll, id>0 & id<=2529 | id >=4239 & id <=4365) ## This is the subsetting for LHs Nodes <- subset(NodesAll, id>0 & id<=508 | id >=2216) #First, calculating Ci sending for all years together # Formula corrected without the %*% for * Ciall_send <- rowSums(matrix2003*matrix2004*matrix2005*matrix2006*matrix2007*matrix2008*matrix2009* matrix2010*matrix2011*matrix2012*matrix2013*matrix2014* matrix2015*matrix2016*matrix2017)/sqrt(rowSums(matrix2003)*rowSums(matrix2004)*rowSums(matrix2005)*rowSums(matrix2006)* rowSums(matrix2007)*rowSums(matrix2008)*rowSums(matrix2009)*rowSums(matrix2010)* rowSums(matrix2011)*rowSums(matrix2012)*rowSums(matrix2013)*rowSums(matrix2014)* rowSums(matrix2015)*rowSums(matrix2016)*rowSums(matrix2017)) # Second, calculating Cis sending for pairs of years and first and last year Ci0304S <- rowSums(matrix2003*matrix2004)/sqrt(rowSums(matrix2003)*rowSums(matrix2004)) Ci0405S <- rowSums(matrix2004*matrix2005)/sqrt(rowSums(matrix2004)*rowSums(matrix2005)) Ci0506S <- rowSums(matrix2005*matrix2006)/sqrt(rowSums(matrix2005)*rowSums(matrix2006)) Ci0607S <- rowSums(matrix2006*matrix2007)/sqrt(rowSums(matrix2006)*rowSums(matrix2007)) Ci0708S <- rowSums(matrix2007*matrix2008)/sqrt(rowSums(matrix2007)*rowSums(matrix2008)) Ci0809S <- rowSums(matrix2008*matrix2009)/sqrt(rowSums(matrix2008)*rowSums(matrix2009)) Ci0910S <- rowSums(matrix2009*matrix2010)/sqrt(rowSums(matrix2009)*rowSums(matrix2010)) Ci1011S <- rowSums(matrix2010*matrix2011)/sqrt(rowSums(matrix2010)*rowSums(matrix2011)) Ci1112S <- rowSums(matrix2011*matrix2012)/sqrt(rowSums(matrix2011)*rowSums(matrix2012)) Ci1213S <- rowSums(matrix2012*matrix2013)/sqrt(rowSums(matrix2012)*rowSums(matrix2013)) Ci1314S <- rowSums(matrix2013*matrix2014)/sqrt(rowSums(matrix2013)*rowSums(matrix2014)) Ci1415S <- rowSums(matrix2014*matrix2015)/sqrt(rowSums(matrix2014)*rowSums(matrix2015)) Ci1516S <- rowSums(matrix2015*matrix2016)/sqrt(rowSums(matrix2015)*rowSums(matrix2016)) Ci1617S <- rowSums(matrix2016*matrix2017)/sqrt(rowSums(matrix2016)*rowSums(matrix2017)) Ci0317S <- rowSums(matrix2003*matrix2017)/sqrt(rowSums(matrix2003)*rowSums(matrix2017)) #Putting in the dataframe to save the original with Infs and NaNs that will be cleared for Cm and C calculations later NodesCiS <- data.frame(Nodes$id, Nodes$label, Ciall_send, Ci0304S, Ci0405S, Ci0506S, Ci0607S, Ci0708S, Ci0809S, Ci0910S, Ci1011S, Ci1112S, Ci1213S, Ci1314S, Ci1415S, Ci1516S, Ci1617S, Ci0317S) saveRDS(NodesCiS, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiS_original") #Before calculating the Ci average, clean the Inf values because they screw everything # It is the same to clean before or after in the dataframe is.na(NodesCiS) <- sapply(NodesCiS, is.infinite) NodesCiS[is.na(NodesCiS)] <- 0 #Now saving the Clean NodesCiS saveRDS(NodesCiS, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiS_clean") #Calculating Cm(tm, tm+1) according to Buttner (2016). This is the average topological overlap of #the graph (not of the nodes) for tm consecutive snapshots. So it is Cm0506 and so on. #Before calculating Cm of the graph for each pair of years, I need to find #maxA (the maximal number of active nodes in each pair of years) #It is all in the formula Cm0304 <- (1/sum(rowSums(matrix2003+matrix2004) > 0))*sum(NodesCiS$Ci0304S) Cm0405 <- (1/sum(rowSums(matrix2004+matrix2005) > 0))*sum(NodesCiS$Ci0405S) Cm0506 <- (1/sum(rowSums(matrix2005+matrix2006) > 0))*sum(NodesCiS$Ci0506S) Cm0607 <- (1/sum(rowSums(matrix2006+matrix2007) > 0))*sum(NodesCiS$Ci0607S) Cm0708 <- (1/sum(rowSums(matrix2007+matrix2008) > 0))*sum(NodesCiS$Ci0708S) Cm0809 <- (1/sum(rowSums(matrix2008+matrix2009) > 0))*sum(NodesCiS$Ci0809S) Cm0910 <- (1/sum(rowSums(matrix2009+matrix2010) > 0))*sum(NodesCiS$Ci0910S) Cm1011 <- (1/sum(rowSums(matrix2010+matrix2011) > 0))*sum(NodesCiS$Ci1011S) Cm1112 <- (1/sum(rowSums(matrix2011+matrix2012) > 0))*sum(NodesCiS$Ci1112S) Cm1213 <- (1/sum(rowSums(matrix2012+matrix2013) > 0))*sum(NodesCiS$Ci1213S) Cm1314 <- (1/sum(rowSums(matrix2013+matrix2014) > 0))*sum(NodesCiS$Ci1314S) Cm1415 <- (1/sum(rowSums(matrix2014+matrix2015) > 0))*sum(NodesCiS$Ci1415S) Cm1516 <- (1/sum(rowSums(matrix2015+matrix2016) > 0))*sum(NodesCiS$Ci1516S) Cm1617 <- (1/sum(rowSums(matrix2016+matrix2017) > 0))*sum(NodesCiS$Ci1617S) Cm0317 <- (1/sum(rowSums(matrix2003+matrix2017) > 0))*sum(NodesCiS$Ci0317S) NodesCiSCm <- data.frame(Nodes$id, Nodes$label, Ciall_send, Ci0304S, Ci0405S, Ci0506S, Ci0607S, Ci0708S, Ci0809S, Ci0910S, Ci1011S, Ci1112S, Ci1213S, Ci1314S, Ci1415S, Ci1516S, Ci1617S, Ci0317S, Cm0304, Cm0405, Cm0506, Cm0607, Cm0708, Cm0809, Cm0910, Cm1011, Cm1112, Cm1213, Cm1314, Cm1415, Cm1516, Cm1617, Cm0317) #Now Saving the original NodesCiSCm saveRDS(NodesCiSCm, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiSCm_original") #And saving the clean version is.na(NodesCiSCm) <- sapply(NodesCiSCm, is.infinite) NodesCiSCm[is.na(NodesCiSCm)] <- 0 saveRDS(NodesCiSCm, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiSCm_clean") #Now I will calculate the average topological overlap of nodes Ci (Ci_avrg) for all snapshots Ci_avrgS <- 1/14* (NodesCiS$Ci0304S+ NodesCiS$Ci0405S+ NodesCiS$Ci0506S+ NodesCiS$Ci0607S+ NodesCiS$Ci0708S+ NodesCiS$Ci0809S+ NodesCiS$Ci0910S+ NodesCiS$Ci1011S+ NodesCiS$Ci1112S+ NodesCiS$Ci1213S+ NodesCiS$Ci1314S+ NodesCiS$Ci1415S+ NodesCiS$Ci1516S+ NodesCiS$Ci1617S) NodesCiSCmAvrg <- data.frame(Nodes$id, Nodes$label, Ciall_send, Ci0304S, Ci0405S, Ci0506S, Ci0607S, Ci0708S, Ci0809S, Ci0910S, Ci1011S, Ci1112S, Ci1213S, Ci1314S, Ci1415S, Ci1516S, Ci1617S, Ci0317S, Cm0304, Cm0405, Cm0506, Cm0607, Cm0708, Cm0809, Cm0910, Cm1011, Cm1112, Cm1213, Cm1314, Cm1415, Cm1516, Cm1617, Cm0317, Ci_avrgS) #Now Saving the original NodesCiSCmAvrg saveRDS(NodesCiSCmAvrg, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiSCmAvrg_original") #And saving the clean version is.na(NodesCiSCmAvrg) <- sapply(NodesCiSCmAvrg, is.infinite) NodesCiSCmAvrg[is.na(NodesCiSCmAvrg)] <- 0 saveRDS(NodesCiSCmAvrg, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiSCmAvrg_clean") #Now calculating C for the whole graph CS <- 1/14*(Cm0304+ Cm0405+ Cm0506+ Cm0607+ Cm0708+ Cm0809+ Cm0910+ Cm1011+ Cm1112+ Cm1213+ Cm1314+ Cm1415+ Cm1516+ Cm1617) NodesCiS_final <- data.frame(Nodes$id, Nodes$label, Ciall_send, Ci0304S, Ci0405S, Ci0506S, Ci0607S, Ci0708S, Ci0809S, Ci0910S, Ci1011S, Ci1112S, Ci1213S, Ci1314S, Ci1415S, Ci1516S, Ci1617S, Ci0317S, Cm0304, Cm0405, Cm0506, Cm0607, Cm0708, Cm0809, Cm0910, Cm1011, Cm1112, Cm1213, Cm1314, Cm1415, Cm1516, Cm1617, Cm0317, Ci_avrgS, CS) #Now Saving the original NodesCiS_final saveRDS(NodesCiS_final, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiS_final_original") #And saving the clean version is.na(NodesCiS_final) <- sapply(NodesCiS_final, is.infinite) NodesCiS_final[is.na(NodesCiS_final)] <- 0 saveRDS(NodesCiS_final, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiS_final_clean") #Adding the Library mun to identify municipalities LibraryMun <- readRDS("NewAnalysesTrase2.4/Library_mun") NodesCiS_labels <- left_join(NodesCiS_final, LibraryMun, by = c("Nodes.label" = "GEOCODE")) NodesCiS_labels$MUNICIPALITY <- ifelse(is.na(NodesCiS_labels$MUNICIPALITY), NodesCiS_labels$Nodes.label, NodesCiS_labels$MUNICIPALITY) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY NodesCiS_labels$STATE <- ifelse(is.na(NodesCiS_labels$STATE), NodesCiS_labels$Nodes.label, NodesCiS_labels$STATE) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY saveRDS(NodesCiS_labels, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiS_final_cleanLabels") ##############Now everything for receiving nodes at Mun -> Country relationships ------------ #First, calculating Ci receiving for all years together # Formula corrected without the %*% for * Ciall_rec <- colSums(matrix2003*matrix2004*matrix2005*matrix2006*matrix2007*matrix2008*matrix2009* matrix2010*matrix2011*matrix2012*matrix2013*matrix2014* matrix2015*matrix2016*matrix2017)/sqrt(colSums(matrix2003)*colSums(matrix2004)*colSums(matrix2005)*colSums(matrix2006)* colSums(matrix2007)*colSums(matrix2008)*colSums(matrix2009)*colSums(matrix2010)* colSums(matrix2011)*colSums(matrix2012)*colSums(matrix2013)*colSums(matrix2014)* colSums(matrix2015)*colSums(matrix2016)*colSums(matrix2017)) # Second, calculating Cis receiving for pairs of years and first and last year Ci0304R <- colSums(matrix2003*matrix2004)/sqrt(colSums(matrix2003)*colSums(matrix2004)) Ci0405R <- colSums(matrix2004*matrix2005)/sqrt(colSums(matrix2004)*colSums(matrix2005)) Ci0506R <- colSums(matrix2005*matrix2006)/sqrt(colSums(matrix2005)*colSums(matrix2006)) Ci0607R <- colSums(matrix2006*matrix2007)/sqrt(colSums(matrix2006)*colSums(matrix2007)) Ci0708R <- colSums(matrix2007*matrix2008)/sqrt(colSums(matrix2007)*colSums(matrix2008)) Ci0809R <- colSums(matrix2008*matrix2009)/sqrt(colSums(matrix2008)*colSums(matrix2009)) Ci0910R <- colSums(matrix2009*matrix2010)/sqrt(colSums(matrix2009)*colSums(matrix2010)) Ci1011R <- colSums(matrix2010*matrix2011)/sqrt(colSums(matrix2010)*colSums(matrix2011)) Ci1112R <- colSums(matrix2011*matrix2012)/sqrt(colSums(matrix2011)*colSums(matrix2012)) Ci1213R <- colSums(matrix2012*matrix2013)/sqrt(colSums(matrix2012)*colSums(matrix2013)) Ci1314R <- colSums(matrix2013*matrix2014)/sqrt(colSums(matrix2013)*colSums(matrix2014)) Ci1415R <- colSums(matrix2014*matrix2015)/sqrt(colSums(matrix2014)*colSums(matrix2015)) Ci1516R <- colSums(matrix2015*matrix2016)/sqrt(colSums(matrix2015)*colSums(matrix2016)) Ci1617R <- colSums(matrix2016*matrix2017)/sqrt(colSums(matrix2016)*colSums(matrix2017)) Ci0317R <- colSums(matrix2003*matrix2017)/sqrt(colSums(matrix2003)*colSums(matrix2017)) #Putting in the dataframe to save the original with Infs and NaNs that will be cleared for Cm and C calculations later NodesCiR <- data.frame(Nodes$id, Nodes$label, Ciall_rec, Ci0304R, Ci0405R, Ci0506R, Ci0607R, Ci0708R, Ci0809R, Ci0910R, Ci1011R, Ci1112R, Ci1213R, Ci1314R, Ci1415R, Ci1516R, Ci1617R, Ci0317R) saveRDS(NodesCiR, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiR_original") #Before calculating the Ci average, clean the Inf values because they screw everything # It is the same to clean before or after in the dataframe is.na(NodesCiR) <- sapply(NodesCiR, is.infinite) NodesCiR[is.na(NodesCiR)] <- 0 #Now saving the Clean NodesCiR saveRDS(NodesCiR, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiR_clean") #Calculating Cm(tm, tm+1) according to Buttner (2016). This is the average topological overlap of #the graph (not of the nodeR) for tm consecutive snapshots. So it is Cm0506 and so on. #Before calculating Cm of the graph for each pair of yearR, I need to find #maxA (the maximal number of active nodes in each pair of yearR) #It is all in the formula Cm0304 <- (1/sum(colSums(matrix2003+matrix2004) > 0))*sum(NodesCiR$Ci0304R) Cm0405 <- (1/sum(colSums(matrix2004+matrix2005) > 0))*sum(NodesCiR$Ci0405R) Cm0506 <- (1/sum(colSums(matrix2005+matrix2006) > 0))*sum(NodesCiR$Ci0506R) Cm0607 <- (1/sum(colSums(matrix2006+matrix2007) > 0))*sum(NodesCiR$Ci0607R) Cm0708 <- (1/sum(colSums(matrix2007+matrix2008) > 0))*sum(NodesCiR$Ci0708R) Cm0809 <- (1/sum(colSums(matrix2008+matrix2009) > 0))*sum(NodesCiR$Ci0809R) Cm0910 <- (1/sum(colSums(matrix2009+matrix2010) > 0))*sum(NodesCiR$Ci0910R) Cm1011 <- (1/sum(colSums(matrix2010+matrix2011) > 0))*sum(NodesCiR$Ci1011R) Cm1112 <- (1/sum(colSums(matrix2011+matrix2012) > 0))*sum(NodesCiR$Ci1112R) Cm1213 <- (1/sum(colSums(matrix2012+matrix2013) > 0))*sum(NodesCiR$Ci1213R) Cm1314 <- (1/sum(colSums(matrix2013+matrix2014) > 0))*sum(NodesCiR$Ci1314R) Cm1415 <- (1/sum(colSums(matrix2014+matrix2015) > 0))*sum(NodesCiR$Ci1415R) Cm1516 <- (1/sum(colSums(matrix2015+matrix2016) > 0))*sum(NodesCiR$Ci1516R) Cm1617 <- (1/sum(colSums(matrix2016+matrix2017) > 0))*sum(NodesCiR$Ci1617R) Cm0317 <- (1/sum(colSums(matrix2003+matrix2017) > 0))*sum(NodesCiR$Ci0317R) NodesCiRCm <- data.frame(Nodes$id, Nodes$label, Ciall_rec, Ci0304R, Ci0405R, Ci0506R, Ci0607R, Ci0708R, Ci0809R, Ci0910R, Ci1011R, Ci1112R, Ci1213R, Ci1314R, Ci1415R, Ci1516R, Ci1617R, Ci0317R, Cm0304, Cm0405, Cm0506, Cm0607, Cm0708, Cm0809, Cm0910, Cm1011, Cm1112, Cm1213, Cm1314, Cm1415, Cm1516, Cm1617, Cm0317) #Now Saving the original NodesCiRCm saveRDS(NodesCiRCm, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiRCm_original") #And saving the clean version is.na(NodesCiRCm) <- sapply(NodesCiRCm, is.infinite) NodesCiRCm[is.na(NodesCiRCm)] <- 0 saveRDS(NodesCiRCm, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiRCm_clean") #Now I will calculate the average topological overlap of nodes Ci (Ci_avrg) for all snapshots Ci_avrgR <- 1/14* (NodesCiR$Ci0304R+ NodesCiR$Ci0405R+ NodesCiR$Ci0506R+ NodesCiR$Ci0607R+ NodesCiR$Ci0708R+ NodesCiR$Ci0809R+ NodesCiR$Ci0910R+ NodesCiR$Ci1011R+ NodesCiR$Ci1112R+ NodesCiR$Ci1213R+ NodesCiR$Ci1314R+ NodesCiR$Ci1415R+ NodesCiR$Ci1516R+ NodesCiR$Ci1617R) NodesCiRCmAvrg <- data.frame(Nodes$id, Nodes$label, Ciall_rec, Ci0304R, Ci0405R, Ci0506R, Ci0607R, Ci0708R, Ci0809R, Ci0910R, Ci1011R, Ci1112R, Ci1213R, Ci1314R, Ci1415R, Ci1516R, Ci1617R, Ci0317R, Cm0304, Cm0405, Cm0506, Cm0607, Cm0708, Cm0809, Cm0910, Cm1011, Cm1112, Cm1213, Cm1314, Cm1415, Cm1516, Cm1617, Cm0317, Ci_avrgR) #Now Saving the original NodesCiRCmAvrg saveRDS(NodesCiRCmAvrg, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiRCmAvrg_original") #And saving the clean version is.na(NodesCiRCmAvrg) <- sapply(NodesCiRCmAvrg, is.infinite) NodesCiRCmAvrg[is.na(NodesCiRCmAvrg)] <- 0 saveRDS(NodesCiRCmAvrg, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiRCmAvrg_clean") #Now calculating C for the whole graph CR <- 1/14*(Cm0304+ Cm0405+ Cm0506+ Cm0607+ Cm0708+ Cm0809+ Cm0910+ Cm1011+ Cm1112+ Cm1213+ Cm1314+ Cm1415+ Cm1516+ Cm1617) NodesCiR_final <- data.frame(Nodes$id, Nodes$label, Ciall_rec, Ci0304R, Ci0405R, Ci0506R, Ci0607R, Ci0708R, Ci0809R, Ci0910R, Ci1011R, Ci1112R, Ci1213R, Ci1314R, Ci1415R, Ci1516R, Ci1617R, Ci0317R, Cm0304, Cm0405, Cm0506, Cm0607, Cm0708, Cm0809, Cm0910, Cm1011, Cm1112, Cm1213, Cm1314, Cm1415, Cm1516, Cm1617, Cm0317, Ci_avrgR, CR) #Now Saving the original NodesCiR_final saveRDS(NodesCiR_final, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiR_final_original") #And saving the clean version is.na(NodesCiR_final) <- sapply(NodesCiR_final, is.infinite) NodesCiR_final[is.na(NodesCiR_final)] <- 0 saveRDS(NodesCiR_final, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiR_final_clean") NodesCiR_final_original <- readRDS("NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiR_final_original") #Adding the Library mun to identify municipalities LibraryMun <- readRDS("NewAnalysesTrase2.4/Library_mun") NodesCiR_labels <- left_join(NodesCiR_final, LibraryMun, by = c("Nodes.label" = "GEOCODE")) NodesCiR_labels$MUNICIPALITY <- ifelse(is.na(NodesCiR_labels$MUNICIPALITY), NodesCiR_labels$Nodes.label, NodesCiR_labels$MUNICIPALITY) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY NodesCiR_labels$STATE <- ifelse(is.na(NodesCiR_labels$STATE), NodesCiR_labels$Nodes.label, NodesCiR_labels$STATE) # This searches for NAs in GEOCODE and replaces them with the same value from MUNICIPALITY saveRDS(NodesCiR_labels, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiR_final_cleanLabels") NodesCiR_labels <- readRDS("NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiR_final_cleanLabels") #Cis calculation is finished. #New session here to add annual volumes supplies (municipalities), traded (traders) and imported (countries) Nodes <- readRDS("NewAnalysesTrase2.4/Nodes") d <- readRDS("NewAnalysesTrase2.4/Trase24_clean") EdAnalyses <- readRDS("NewAnalysesTrase2.4/EdgesAnalyses/EdgesAnalyses_labelsMun_SubsetFreqLarger0") MCEdAnalyses <- readRDS("NewAnalysesTrase2.4/MunCountry_EdAnalyses/MCEdAnalyses_Labels") NodesCiS <- readRDS("NewAnalysesTrase2.4/NodesCi/NodesCiS_final_cleanLabels") NodesCiR <- readRDS("NewAnalysesTrase2.4/NodesCi/NodesCiR_final_cleanLabels") d2003 <- subset(d, d$YEAR == 2003) adf2003MV <- d2003 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2003MV) <- c("Nodes.label", "v2003") #------------ d2004 <- subset(d, d$YEAR == 2004) adf2004MV <- d2004 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2004MV) <- c("Nodes.label", "v2004") #-------------- d2005 <- subset(d, d$YEAR == 2005) adf2005MV <- d2005 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2005MV) <- c("Nodes.label", "v2005") #-------------------- d2006 <- subset(d, d$YEAR == 2006) adf2006MV <- d2006 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2006MV) <- c("Nodes.label", "v2006") #------------------- d2007 <- subset(d, d$YEAR == 2007) adf2007MV <- d2007 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2007MV) <- c("Nodes.label", "v2007") #---------------- d2008 <- subset(d, d$YEAR == 2008) adf2008MV <- d2008 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2008MV) <- c("Nodes.label", "v2008") #--------------- d2009 <- subset(d, d$YEAR == 2009) adf2009MV <- d2009 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2009MV) <- c("Nodes.label", "v2009") #------------------- d2010 <- subset(d, d$YEAR == 2010) adf2010MV <- d2010 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2010MV) <- c("Nodes.label", "v2010") #------------- d2011 <- subset(d, d$YEAR == 2011) adf2011MV <- d2011 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2011MV) <- c("Nodes.label", "v2011") #------------ d2012 <- subset(d, d$YEAR == 2012) adf2012MV <- d2012 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2012MV) <- c("Nodes.label", "v2012") #---------------- d2013 <- subset(d, d$YEAR == 2013) adf2013MV <- d2013 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2013MV) <- c("Nodes.label", "v2013") #--------------- d2014 <- subset(d, d$YEAR == 2014) adf2014MV <- d2014 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2014MV) <- c("Nodes.label", "v2014") #----------------- d2015 <- subset(d, d$YEAR == 2015) adf2015MV <- d2015 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2015MV) <- c("Nodes.label", "v2015") #------------------ d2016 <- subset(d, d$YEAR == 2016) adf2016MV <- d2016 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2016MV) <- c("Nodes.label", "v2016") #---------------------- d2017 <- subset(d, d$YEAR == 2017) adf2017MV <- d2017 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(GEOCODE) %>% # For each combination of MUNICIPALITY, GEOCODE, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2017MV) <- c("Nodes.label", "v2017") #Now for traders--------------- d2003 <- subset(d, d$YEAR == 2003) adf2003EV <- d2003 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER) %>% # For each combination of MUNICIPALITY, EXPORTER, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2003EV) <- c("Nodes.label", "v2003") #------------ d2004 <- subset(d, d$YEAR == 2004) adf2004EV <- d2004 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER) %>% # For each combination of MUNICIPALITY, EXPORTER, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2004EV) <- c("Nodes.label", "v2004") #-------------- d2005 <- subset(d, d$YEAR == 2005) adf2005EV <- d2005 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER) %>% # For each combination of MUNICIPALITY, EXPORTER, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2005EV) <- c("Nodes.label", "v2005") #-------------------- d2006 <- subset(d, d$YEAR == 2006) adf2006EV <- d2006 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER) %>% # For each combination of MUNICIPALITY, EXPORTER, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2006EV) <- c("Nodes.label", "v2006") #------------------- d2007 <- subset(d, d$YEAR == 2007) adf2007EV <- d2007 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER) %>% # For each combination of MUNICIPALITY, EXPORTER, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2007EV) <- c("Nodes.label", "v2007") #---------------- d2008 <- subset(d, d$YEAR == 2008) adf2008EV <- d2008 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER) %>% # For each combination of MUNICIPALITY, EXPORTER, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2008EV) <- c("Nodes.label", "v2008") #--------------- d2009 <- subset(d, d$YEAR == 2009) adf2009EV <- d2009 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER) %>% # For each combination of MUNICIPALITY, EXPORTER, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2009EV) <- c("Nodes.label", "v2009") #------------------- d2010 <- subset(d, d$YEAR == 2010) adf2010EV <- d2010 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER) %>% # For each combination of MUNICIPALITY, EXPORTER, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2010EV) <- c("Nodes.label", "v2010") #------------- d2011 <- subset(d, d$YEAR == 2011) adf2011EV <- d2011 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER) %>% # For each combination of MUNICIPALITY, EXPORTER, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2011EV) <- c("Nodes.label", "v2011") #------------ d2012 <- subset(d, d$YEAR == 2012) adf2012EV <- d2012 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER) %>% # For each combination of MUNICIPALITY, EXPORTER, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2012EV) <- c("Nodes.label", "v2012") #---------------- d2013 <- subset(d, d$YEAR == 2013) adf2013EV <- d2013 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER) %>% # For each combination of MUNICIPALITY, EXPORTER, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2013EV) <- c("Nodes.label", "v2013") #--------------- d2014 <- subset(d, d$YEAR == 2014) adf2014EV <- d2014 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER) %>% # For each combination of MUNICIPALITY, EXPORTER, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2014EV) <- c("Nodes.label", "v2014") #----------------- d2015 <- subset(d, d$YEAR == 2015) adf2015EV <- d2015 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER) %>% # For each combination of MUNICIPALITY, EXPORTER, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2015EV) <- c("Nodes.label", "v2015") #------------------ d2016 <- subset(d, d$YEAR == 2016) adf2016EV <- d2016 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER) %>% # For each combination of MUNICIPALITY, EXPORTER, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2016EV) <- c("Nodes.label", "v2016") #---------------------- d2017 <- subset(d, d$YEAR == 2017) adf2017EV <- d2017 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(EXPORTER) %>% # For each combination of MUNICIPALITY, EXPORTER, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2017EV) <- c("Nodes.label", "v2017") #Now for Countries d2003 <- subset(d, d$YEAR == 2003) adf2003CV <- d2003 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(COUNTRY) %>% # For each combination of MUNICIPALITY, COUNTRY, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2003CV) <- c("Nodes.label", "v2003") #------------ d2004 <- subset(d, d$YEAR == 2004) adf2004CV <- d2004 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(COUNTRY) %>% # For each combination of MUNICIPALITY, COUNTRY, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2004CV) <- c("Nodes.label", "v2004") #-------------- d2005 <- subset(d, d$YEAR == 2005) adf2005CV <- d2005 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(COUNTRY) %>% # For each combination of MUNICIPALITY, COUNTRY, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2005CV) <- c("Nodes.label", "v2005") #-------------------- d2006 <- subset(d, d$YEAR == 2006) adf2006CV <- d2006 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(COUNTRY) %>% # For each combination of MUNICIPALITY, COUNTRY, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2006CV) <- c("Nodes.label", "v2006") #------------------- d2007 <- subset(d, d$YEAR == 2007) adf2007CV <- d2007 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(COUNTRY) %>% # For each combination of MUNICIPALITY, COUNTRY, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2007CV) <- c("Nodes.label", "v2007") #---------------- d2008 <- subset(d, d$YEAR == 2008) adf2008CV <- d2008 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(COUNTRY) %>% # For each combination of MUNICIPALITY, COUNTRY, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2008CV) <- c("Nodes.label", "v2008") #--------------- d2009 <- subset(d, d$YEAR == 2009) adf2009CV <- d2009 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(COUNTRY) %>% # For each combination of MUNICIPALITY, COUNTRY, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2009CV) <- c("Nodes.label", "v2009") #------------------- d2010 <- subset(d, d$YEAR == 2010) adf2010CV <- d2010 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(COUNTRY) %>% # For each combination of MUNICIPALITY, COUNTRY, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2010CV) <- c("Nodes.label", "v2010") #------------- d2011 <- subset(d, d$YEAR == 2011) adf2011CV <- d2011 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(COUNTRY) %>% # For each combination of MUNICIPALITY, COUNTRY, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2011CV) <- c("Nodes.label", "v2011") #------------ d2012 <- subset(d, d$YEAR == 2012) adf2012CV <- d2012 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(COUNTRY) %>% # For each combination of MUNICIPALITY, COUNTRY, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2012CV) <- c("Nodes.label", "v2012") #---------------- d2013 <- subset(d, d$YEAR == 2013) adf2013CV <- d2013 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(COUNTRY) %>% # For each combination of MUNICIPALITY, COUNTRY, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2013CV) <- c("Nodes.label", "v2013") #--------------- d2014 <- subset(d, d$YEAR == 2014) adf2014CV <- d2014 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(COUNTRY) %>% # For each combination of MUNICIPALITY, COUNTRY, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2014CV) <- c("Nodes.label", "v2014") #----------------- d2015 <- subset(d, d$YEAR == 2015) adf2015CV <- d2015 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(COUNTRY) %>% # For each combination of MUNICIPALITY, COUNTRY, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2015CV) <- c("Nodes.label", "v2015") #------------------ d2016 <- subset(d, d$YEAR == 2016) adf2016CV <- d2016 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(COUNTRY) %>% # For each combination of MUNICIPALITY, COUNTRY, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2016CV) <- c("Nodes.label", "v2016") #---------------------- d2017 <- subset(d, d$YEAR == 2017) adf2017CV <- d2017 %>% # ... the %>% symbol shows that you are going to do something to the data group_by(COUNTRY) %>% # For each combination of MUNICIPALITY, COUNTRY, EXPORTER summarize(SUM_SOY_TONS = sum(SUM_SOY_TONS)) %>% # Calculate the sum ungroup() %>% # ungroup() makes R forget that you were grouping arrange(desc(SUM_SOY_TONS)) # Order from top to bottom colnames(adf2017CV) <- c("Nodes.label", "v2017") # Now rbinding them in annual volumes for all nodes NodesVol2003 <- rbind(adf2003MV, adf2003EV, adf2003CV) NodesVol2004 <- rbind(adf2004MV, adf2004EV, adf2004CV) NodesVol2005 <- rbind(adf2005MV, adf2005EV, adf2005CV) NodesVol2006 <- rbind(adf2006MV, adf2006EV, adf2006CV) NodesVol2007 <- rbind(adf2007MV, adf2007EV, adf2007CV) NodesVol2008 <- rbind(adf2008MV, adf2008EV, adf2008CV) NodesVol2009 <- rbind(adf2009MV, adf2009EV, adf2009CV) NodesVol2010 <- rbind(adf2010MV, adf2010EV, adf2010CV) NodesVol2011 <- rbind(adf2011MV, adf2011EV, adf2011CV) NodesVol2012 <- rbind(adf2012MV, adf2012EV, adf2012CV) NodesVol2013 <- rbind(adf2013MV, adf2013EV, adf2013CV) NodesVol2014 <- rbind(adf2014MV, adf2014EV, adf2014CV) NodesVol2015 <- rbind(adf2015MV, adf2015EV, adf2015CV) NodesVol2016 <- rbind(adf2016MV, adf2016EV, adf2016CV) NodesVol2017 <- rbind(adf2017MV, adf2017EV, adf2017CV) NodesCiSVol <- left_join(NodesCiS, NodesVol2003, by = c("Nodes.label" = "Nodes.label")) NodesCiSVol <- left_join(NodesCiSVol, NodesVol2004, by = c("Nodes.label" = "Nodes.label")) NodesCiSVol <- left_join(NodesCiSVol, NodesVol2005, by = c("Nodes.label" = "Nodes.label")) NodesCiSVol <- left_join(NodesCiSVol, NodesVol2006, by = c("Nodes.label" = "Nodes.label")) NodesCiSVol <- left_join(NodesCiSVol, NodesVol2007, by = c("Nodes.label" = "Nodes.label")) NodesCiSVol <- left_join(NodesCiSVol, NodesVol2008, by = c("Nodes.label" = "Nodes.label")) NodesCiSVol <- left_join(NodesCiSVol, NodesVol2009, by = c("Nodes.label" = "Nodes.label")) NodesCiSVol <- left_join(NodesCiSVol, NodesVol2010, by = c("Nodes.label" = "Nodes.label")) NodesCiSVol <- left_join(NodesCiSVol, NodesVol2011, by = c("Nodes.label" = "Nodes.label")) NodesCiSVol <- left_join(NodesCiSVol, NodesVol2012, by = c("Nodes.label" = "Nodes.label")) NodesCiSVol <- left_join(NodesCiSVol, NodesVol2013, by = c("Nodes.label" = "Nodes.label")) NodesCiSVol <- left_join(NodesCiSVol, NodesVol2014, by = c("Nodes.label" = "Nodes.label")) NodesCiSVol <- left_join(NodesCiSVol, NodesVol2015, by = c("Nodes.label" = "Nodes.label")) NodesCiSVol <- left_join(NodesCiSVol, NodesVol2016, by = c("Nodes.label" = "Nodes.label")) NodesCiSVol <- left_join(NodesCiSVol, NodesVol2017, by = c("Nodes.label" = "Nodes.label")) NodesCiSVol$vtotal <- apply(NodesCiSVol[,c(38:52)], 1, FUN=sum) is.na(NodesCiSVol) <- sapply(NodesCiSVol, is.infinite) NodesCiSVol[is.na(NodesCiSVol)] <- 0 NodesCiSVol$trend <- NodesCiSVol$v2017- NodesCiSVol$v2003 saveRDS(NodesCiSVol, "NewAnalysesTrase2.4/NodesCi/NodesCiSVol") #---- Same for NodesCiR NodesCiRVol <- left_join(NodesCiR, NodesVol2003, by = c("Nodes.label" = "Nodes.label")) NodesCiRVol <- left_join(NodesCiRVol, NodesVol2004, by = c("Nodes.label" = "Nodes.label")) NodesCiRVol <- left_join(NodesCiRVol, NodesVol2005, by = c("Nodes.label" = "Nodes.label")) NodesCiRVol <- left_join(NodesCiRVol, NodesVol2006, by = c("Nodes.label" = "Nodes.label")) NodesCiRVol <- left_join(NodesCiRVol, NodesVol2007, by = c("Nodes.label" = "Nodes.label")) NodesCiRVol <- left_join(NodesCiRVol, NodesVol2008, by = c("Nodes.label" = "Nodes.label")) NodesCiRVol <- left_join(NodesCiRVol, NodesVol2009, by = c("Nodes.label" = "Nodes.label")) NodesCiRVol <- left_join(NodesCiRVol, NodesVol2010, by = c("Nodes.label" = "Nodes.label")) NodesCiRVol <- left_join(NodesCiRVol, NodesVol2011, by = c("Nodes.label" = "Nodes.label")) NodesCiRVol <- left_join(NodesCiRVol, NodesVol2012, by = c("Nodes.label" = "Nodes.label")) NodesCiRVol <- left_join(NodesCiRVol, NodesVol2013, by = c("Nodes.label" = "Nodes.label")) NodesCiRVol <- left_join(NodesCiRVol, NodesVol2014, by = c("Nodes.label" = "Nodes.label")) NodesCiRVol <- left_join(NodesCiRVol, NodesVol2015, by = c("Nodes.label" = "Nodes.label")) NodesCiRVol <- left_join(NodesCiRVol, NodesVol2016, by = c("Nodes.label" = "Nodes.label")) NodesCiRVol <- left_join(NodesCiRVol, NodesVol2017, by = c("Nodes.label" = "Nodes.label")) NodesCiRVol$vtotal <- apply(NodesCiRVol[,c(38:52)], 1, FUN=sum) is.na(NodesCiRVol) <- sapply(NodesCiRVol, is.infinite) NodesCiRVol[is.na(NodesCiRVol)] <- 0 NodesCiRVol$trend <- NodesCiRVol$v2017- NodesCiRVol$v2003 saveRDS(NodesCiRVol, "NewAnalysesTrase2.4/NodesCi/NodesCiRVol") NodesCiRVol <- left_join(NodesCiRVol, count, by = c("Nodes.id" = "id")) NodesCiRVol$frequency[is.na(NodesCiRVol$frequency)] <- 0 saveRDS(NodesCiRVol, "NewAnalysesTrase2.4/NodesCi/NodesCiRVol") #I need to calculate the frequency of annual volumes being larger than 27, because nodes with volume #less than 27 tons at any given year cannot be called resilient. #Let's go in the braço style: NodesVol2003$year <- 2003 colnames(NodesVol2003) <- c("Nodes.label", "vol", "year") NodesVol2004$year <- 2004 colnames(NodesVol2004) <- c("Nodes.label", "vol", "year") NodesVol2005$year <- 2005 colnames(NodesVol2005) <- c("Nodes.label", "vol", "year") NodesVol2006$year <- 2006 colnames(NodesVol2006) <- c("Nodes.label", "vol", "year") NodesVol2007$year <- 2007 colnames(NodesVol2007) <- c("Nodes.label", "vol", "year") NodesVol2008$year <- 2008 colnames(NodesVol2008) <- c("Nodes.label", "vol", "year") NodesVol2009$year <- 2009 colnames(NodesVol2009) <- c("Nodes.label", "vol", "year") NodesVol2010$year <- 2010 colnames(NodesVol2010) <- c("Nodes.label", "vol", "year") NodesVol2011$year <- 2011 colnames(NodesVol2011) <- c("Nodes.label", "vol", "year") NodesVol2012$year <- 2012 colnames(NodesVol2012) <- c("Nodes.label", "vol", "year") NodesVol2013$year <- 2013 colnames(NodesVol2013) <- c("Nodes.label", "vol", "year") NodesVol2014$year <- 2014 colnames(NodesVol2014) <- c("Nodes.label", "vol", "year") NodesVol2015$year <- 2015 colnames(NodesVol2015) <- c("Nodes.label", "vol", "year") NodesVol2016$year <- 2016 colnames(NodesVol2016) <- c("Nodes.label", "vol", "year") NodesVol2017$year <- 2017 colnames(NodesVol2017) <- c("Nodes.label", "vol", "year") NodesVol <- rbind(NodesVol2003, NodesVol2004, NodesVol2005, NodesVol2006, NodesVol2007, NodesVol2008, NodesVol2009, NodesVol2010, NodesVol2011, NodesVol2012, NodesVol2013, NodesVol2014, NodesVol2015, NodesVol2016, NodesVol2017) NodesVolID <- inner_join(NodesVol, Nodes, by = c("Nodes.label" = "label")) NodesVolID2 <- subset(NodesVolID, vol >= 27) count <- count(NodesVolID2, vars = NodesVolID2$id) colnames(count) <- c("id", "frequency") NodesCiSVol <- left_join(NodesCiSVol, count, by = c("Nodes.id" = "id")) NodesCiSVol$frequency[is.na(NodesCiSVol$frequency)] <- 0 saveRDS(NodesCiSVol, "NewAnalysesTrase2.4/NodesCi/NodesCiSVol") #Testes sum(NodesCiSVol$v2003) totalVol_trend <- sum(NodesCiSVol$v2017) - sum(NodesCiSVol$v2003) ################ Adding annual volumes volumes to NodesCi tables for Mun -> Country #I just need to take the annual volumes from Mun and Countries from NodesCiVol and join with NodesCiMunCountry NodesCiSVol <- readRDS("NewAnalysesTrase2.4/NodesCi/NodesCiSVol") NodesCiRVol <- readRDS("NewAnalysesTrase2.4/NodesCi/NodesCiRVol") NodesCiR_labels <- readRDS("NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiR_final_cleanLabels") NodesCiS_labels <- readRDS("NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiS_final_cleanLabels") #Subsetting only municipalities and countries and the columns with annual volumes #NodesCiSVolSub <- subset(NodesCiSVol, Nodes.id>0 & Nodes.id<=2529 | Nodes.id >=4239 & Nodes.id <=4365) #NodesCiSVolSub <- NodesCiSVolSub[c(1,38:55)] #For LHs the subset is different, here below NodesCiSVolSub <- subset(NodesCiSVol, Nodes.id>0 & Nodes.id<=508 | Nodes.id >=2216) NodesCiSVolSub <- NodesCiSVolSub[c(1,38:55)] ##NodesCiRVolSub <- subset(NodesCiRVol, Nodes.id>0 & Nodes.id<=2529 | Nodes.id >=4239 & Nodes.id <=4365) ##NodesCiRVolSub <- NodesCiRVolSub[c(1,38:55)] #For LHs NodesCiRVolSub <- subset(NodesCiRVol, Nodes.id>0 & Nodes.id<=508 | Nodes.id >=2216) NodesCiRVolSub <- NodesCiRVolSub[c(1,38:55)] #Now joining them NodesCiS_labels <- left_join(NodesCiS_labels, NodesCiSVolSub, by = c("Nodes.id" = "Nodes.id")) saveRDS(NodesCiS_labels, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiSVol") NodesCiR_labels <- left_join(NodesCiR_labels, NodesCiRVolSub, by = c("Nodes.id" = "Nodes.id")) saveRDS(NodesCiR_labels, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiRVol") write_delim(NodesCiS_labels, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiSVol_MunCountry.csv", delim = ";") write_delim(NodesCiR_labels, "NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiRVol_MunCountry.csv", delim = ";") ##Everything that matters for the panel data analysis is ready, so now let's just retrieve NodesCiSMunCountry <- readRDS("NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiSVol") NodesCiRMunCountry <- readRDS("NewAnalysesTrase2.4/NodesCi/CI_MunCountry/NodesCiRVol") v24NodesCiSVol <- readRDS("NewAnalysesTrase2.4/NodesCi/NodesCiSVol") NodesCiRVol <- readRDS("NewAnalysesTrase2.4/NodesCi/NodesCiRVol") NodesWPiSMunCountry <- readRDS("NewAnalysesTrase2.4/NodesWPi/WPi_MunCountry/NodesWPiS_labels") NodesWPiRMunCountry <- readRDS("NewAnalysesTrase2.4/NodesWPi/WPi_MunCountry/NodesWPiR_labels") NodesWPiS <- readRDS("NewAnalysesTrase2.4/NodesWPi/NodesWPiS_labels") NodesWPiR <- readRDS("NewAnalysesTrase2.4/NodesWPi/NodesWPiR_labels") ADF <- readRDS("NewAnalysesTrase2.4/Trase24_clean") ##Basically, what I need to do is to retrieve only GEOCODEs and CIsending of LH_municipalities and the volumes #Also differentiate Brazil and other countries as destination ##Now I just have to continue preparing the dataset for the panel analysis