Intro

A script to calculate the mean GF distance for empirical data.

Load the data and necessary libraries

Load the experimental data from Sicard et al. 2013 on FBNSV in V. faba. Many different datasets from this paper could be used for similar analysis, but we chose this one because it considers the GF in different leaf levels.

# Load libraries
library(magrittr)
library(utils)
library(plyr)
library(dplyr)

# Load data
setwd("C:/R_Data")
df_data <- read.csv("Dataset_3_FBNSV_Sicard_Fig_3A.csv") %>%
    as.data.frame()

Analysis

Calculate the GF distance using some simple R code, after creating an array containing only the GF data you want. Run the code for samples 1 to 3 to obtain all the results needed.

gf.data <- df_data %>%
  filter(Leaf == 1) %>%
  select(f_C, f_M, f_N, f_R, f_S, f_U1, f_U2, f_U4)

# Now determine the mean distance between all genome formula values.
n.row <- nrow(gf.data)
pw.comp <- array(data = NA, dim = c(n.row, n.row))

for(i in 1:n.row) {
for(j in 1:n.row) {
    pw.comp[i,j] = sqrt( sum ( (gf.data[i,] - gf.data[j,])^2)  )    
}
}

# Now determine the mean distance to neighbours for each value.
dist.results <- rep(NA, n.row)
comp <- 1:n.row

for(i in 1:n.row) {
    comp.now = comp[-i]
    dist.results[i] = mean(pw.comp[i, comp.now])
}

# Determine mean and distance values, the two goals of our analysis.
mean.gf.dist = mean(dist.results)
sd.gf.dist = sd(dist.results)
num.reps.d = nrow(gf.data)

print(dist.results)
## [1] 0.5428762 0.2740040 0.3550402 0.2837913 0.3362704 0.2677749 0.3687947
## [8] 0.4692369 0.2709646
print(mean.gf.dist)
## [1] 0.3520837
print(sd.gf.dist)
## [1] 0.09689769

Quickly run a Kendall rank test on the results obtained for all leaf levels (in lieu of Spearman due to ties). The outputs from the previous analyses (outputted as dist.results) on all leaf levels were aggregated in a single CSV file for this analysis.

# Load data
setwd("C:/R_Data")
df_overview <- read.csv("Dataset_4_GF_dist_results_FBNSV.csv") %>%
    as.data.frame()

cor(df_overview$Leaf, df_overview$GF_dist, method = c("kendall"))
## [1] -0.3679683
cor.test(df_overview$Leaf, df_overview$GF_dist, method=c("kendall"))
## 
##  Kendall's rank correlation tau
## 
## data:  df_overview$Leaf and df_overview$GF_dist
## z = -4.4018, p-value = 1.074e-05
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
##        tau 
## -0.3679683