This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(statsr)
library(tidyverse)
library(readxl)
library(visNetwork)
library(igraph)
library(tidygraph)
library(ggraph)
library(formattable)
# Let's load up the data from excel file
institutions_data <- read_excel("Data.xlsx", sheet = "Institutions")
authorship_list <- read_excel("Data.xlsx", sheet = "Authorships")
article_list <- read_excel("Data.xlsx", sheet = "Articles")
nrow(institutions_data)
## [1] 100
# Summaries by type
institutions_data %>% group_by(Type) %>% summarise(N = n()) %>% mutate(perc = percent(N / sum(N))) %>% arrange(desc(perc))
## # A tibble: 5 × 3
## Type N perc
## <chr> <int> <formttbl>
## 1 Academic 51 51.00%
## 2 Healthcare 17 17.00%
## 3 Private Practice 14 14.00%
## 4 Research 10 10.00%
## 5 Commercial 8 8.00%
# Create the institution list
inst_list <- institutions_data %>% select(Organisation, Country, Type, AuthorCount, ArtCount, ArtPerc, SumWeightedCoeff, Earliest, Latest) %>%
mutate(ArtPerc = formattable::percent(ArtPerc))
# Display Top 10 Institutions by Article Count
top_inst_by_artcount <- inst_list %>%
top_n(10, ArtCount)
# Display Top 10 Institutions by Sum of Weight Coefficient of the authors
top_inst_by_TWC <- inst_list %>%
top_n(10, SumWeightedCoeff)
# outer join the two list and arrange by TWC
top_inst_all <- merge(top_inst_by_artcount, top_inst_by_TWC,
by=c("Organisation", "Country","Type", "AuthorCount", "SumWeightedCoeff","ArtCount", "ArtPerc", "Earliest", "Latest" ), all = TRUE)
top_inst_all %>% arrange(desc(SumWeightedCoeff))
## Organisation Country Type
## 1 Charles Drew University of Medicine and Science USA Academic
## 2 Erom Co. Ltd South Korea Commercial
## 3 Chiba University Japan Academic
## 4 University of Pécs Hungary Academic
## 5 University of Mansoura Egypt Academic
## 6 Daiwa Pharmaceutical Co. Ltd. Japan Commercial
## 7 University of Miami Miller School of Medicine USA Academic
## 8 University of California at Irvine USA Academic
## 9 Slovak Academy of Sciences Slovakia Academic
## 10 Ajou University South Korea Academic
## 11 STR Biotech Co. Ltd. South Korea Commercial
## 12 National Center for Radiation Research and Technology Egypt Research
## 13 Korea University South Korea Academic
## AuthorCount SumWeightedCoeff ArtCount ArtPerc Earliest Latest
## 1 6 35.033862 30 30.61% 1998 2021
## 2 13 15.485714 8 8.16% 2004 2022
## 3 12 14.494709 7 7.14% 2000 2017
## 4 12 14.366667 7 7.14% 2013 2018
## 5 5 13.122222 9 9.18% 2008 2020
## 6 10 10.636508 11 11.22% 2000 2017
## 7 21 10.589107 4 4.08% 2012 2020
## 8 3 9.000000 9 9.18% 2003 2021
## 9 5 5.218519 2 2.04% 2009 2013
## 10 2 5.000000 3 3.06% 2013 2018
## 11 2 3.700529 5 5.10% 2013 2022
## 12 2 2.544444 4 4.08% 2008 2016
## 13 2 2.131217 4 4.08% 2004 2011
top_inst_all %>% arrange(desc(ArtCount))
## Organisation Country Type
## 1 Charles Drew University of Medicine and Science USA Academic
## 2 Daiwa Pharmaceutical Co. Ltd. Japan Commercial
## 3 University of California at Irvine USA Academic
## 4 University of Mansoura Egypt Academic
## 5 Erom Co. Ltd South Korea Commercial
## 6 Chiba University Japan Academic
## 7 University of Pécs Hungary Academic
## 8 STR Biotech Co. Ltd. South Korea Commercial
## 9 Korea University South Korea Academic
## 10 National Center for Radiation Research and Technology Egypt Research
## 11 University of Miami Miller School of Medicine USA Academic
## 12 Ajou University South Korea Academic
## 13 Slovak Academy of Sciences Slovakia Academic
## AuthorCount SumWeightedCoeff ArtCount ArtPerc Earliest Latest
## 1 6 35.033862 30 30.61% 1998 2021
## 2 10 10.636508 11 11.22% 2000 2017
## 3 3 9.000000 9 9.18% 2003 2021
## 4 5 13.122222 9 9.18% 2008 2020
## 5 13 15.485714 8 8.16% 2004 2022
## 6 12 14.494709 7 7.14% 2000 2017
## 7 12 14.366667 7 7.14% 2013 2018
## 8 2 3.700529 5 5.10% 2013 2022
## 9 2 2.131217 4 4.08% 2004 2011
## 10 2 2.544444 4 4.08% 2008 2016
## 11 21 10.589107 4 4.08% 2012 2020
## 12 2 5.000000 3 3.06% 2013 2018
## 13 5 5.218519 2 2.04% 2009 2013
# inner join
top_inst <- merge(top_inst_by_artcount, top_inst_by_TWC,
by=c("Organisation", "Country","Type", "AuthorCount", "SumWeightedCoeff","ArtCount", "ArtPerc", "Earliest", "Latest" ), all = FALSE)
top_inst %>% arrange(desc(SumWeightedCoeff))
## Organisation Country Type
## 1 Charles Drew University of Medicine and Science USA Academic
## 2 Erom Co. Ltd South Korea Commercial
## 3 Chiba University Japan Academic
## 4 University of Pécs Hungary Academic
## 5 University of Mansoura Egypt Academic
## 6 Daiwa Pharmaceutical Co. Ltd. Japan Commercial
## 7 University of Miami Miller School of Medicine USA Academic
## 8 University of California at Irvine USA Academic
## AuthorCount SumWeightedCoeff ArtCount ArtPerc Earliest Latest
## 1 6 35.03386 30 30.61% 1998 2021
## 2 13 15.48571 8 8.16% 2004 2022
## 3 12 14.49471 7 7.14% 2000 2017
## 4 12 14.36667 7 7.14% 2013 2018
## 5 5 13.12222 9 9.18% 2008 2020
## 6 10 10.63651 11 11.22% 2000 2017
## 7 21 10.58911 4 4.08% 2012 2020
## 8 3 9.00000 9 9.18% 2003 2021
top_inst %>% arrange(desc(ArtCount))
## Organisation Country Type
## 1 Charles Drew University of Medicine and Science USA Academic
## 2 Daiwa Pharmaceutical Co. Ltd. Japan Commercial
## 3 University of California at Irvine USA Academic
## 4 University of Mansoura Egypt Academic
## 5 Erom Co. Ltd South Korea Commercial
## 6 Chiba University Japan Academic
## 7 University of Pécs Hungary Academic
## 8 University of Miami Miller School of Medicine USA Academic
## AuthorCount SumWeightedCoeff ArtCount ArtPerc Earliest Latest
## 1 6 35.03386 30 30.61% 1998 2021
## 2 10 10.63651 11 11.22% 2000 2017
## 3 3 9.00000 9 9.18% 2003 2021
## 4 5 13.12222 9 9.18% 2008 2020
## 5 13 15.48571 8 8.16% 2004 2022
## 6 12 14.49471 7 7.14% 2000 2017
## 7 12 14.36667 7 7.14% 2013 2018
## 8 21 10.58911 4 4.08% 2012 2020
# Find the research focus of the institutions
# get distinct institutions
dis_inst_list <- authorship_list %>% select(ArticleID, Affiliation) %>% distinct_all()
# Join the institutions with distinct authorship list
dis_inst_list <- dis_inst_list %>%
left_join(inst_list, by=c("Affiliation" = "Organisation" ))
# Join the article list to get the study types
research_focus <- dis_inst_list %>%
left_join(article_list, by=c("ArticleID" = "ArticleID")) %>%
select(Organisation = Affiliation, ArtCount, HumanStudy, AnimalStudy, InVitroStudy, ChemicalAnalysis, ClinicalDesign) %>%
mutate(HumanStudy = factor(HumanStudy),AnimalStudy =factor(AnimalStudy), InVitroStudy = factor(InVitroStudy), ChemicalAnalysis = factor(ChemicalAnalysis)) %>%
mutate(ClinicalDesign = factor(ClinicalDesign, levels = c("Randomised controlled trial" , "Non-randomised controlled trial", "Before and after study", "Descriptive cross-sectional studies", "Case series", "Case report"))) %>%
mutate(InterventionStudy = case_when(as.numeric(ClinicalDesign) < 4 ~ 1,
as.numeric(ClinicalDesign) > 0 ~ 0,
TRUE ~ 0)) %>%
mutate(OberservationStudy = case_when(as.numeric(ClinicalDesign) < 4 ~ 0,
as.numeric(ClinicalDesign) > 0 ~ 1,
TRUE ~ 0)) %>%
select(-c("ClinicalDesign"))
research_focus
## # A tibble: 211 × 8
## Organisation ArtCo…¹ Human…² Anima…³ InVit…⁴ Chemi…⁵ Inter…⁶ Obers…⁷
## <chr> <dbl> <fct> <fct> <fct> <fct> <dbl> <dbl>
## 1 Pharmacognosia 1 Yes No No No 1 0
## 2 University of Miami … 4 Yes No No No 1 0
## 3 Fisher Institute for… 1 Yes No No No 1 0
## 4 Korea University 4 No Yes Yes Yes 0 0
## 5 University of Mansou… 9 No Yes No No 0 0
## 6 National Center for … 4 No Yes No No 0 0
## 7 Charles Drew Univers… 30 No Yes No No 0 0
## 8 University of Mansou… 9 No Yes No No 0 0
## 9 National Center for … 4 No Yes No No 0 0
## 10 Charles Drew Univers… 30 No Yes No No 0 0
## # … with 201 more rows, and abbreviated variable names ¹ArtCount, ²HumanStudy,
## # ³AnimalStudy, ⁴InVitroStudy, ⁵ChemicalAnalysis, ⁶InterventionStudy,
## # ⁷OberservationStudy
# Calculate the % of each study type over article count
research_focus <- research_focus %>%
group_by(Organisation) %>%
summarise(H_p= percent(sum(as.numeric(HumanStudy)-1)/ ArtCount),
A_p = percent(sum(as.numeric(AnimalStudy)-1)/ ArtCount),
I_p = percent(sum(as.numeric(InVitroStudy)-1)/ ArtCount),
C_p = percent(sum(as.numeric(ChemicalAnalysis)-1)/ ArtCount),
HI_p = percent(sum(InterventionStudy)/ ArtCount),
HO_p = percent(sum(OberservationStudy)/ ArtCount )) %>% distinct_all()
## `summarise()` has grouped output by 'Organisation'. You can override using the
## `.groups` argument.
research_focus
## # A tibble: 101 × 7
## # Groups: Organisation [101]
## Organisation H_p A_p I_p C_p HI_p HO_p
## <chr> <formt> <formt> <formt> <form> <formt> <formt>
## 1 Affiliated Hospital of Nanjin… 0.00% 100.00% 0.00% 0.00% 0.00% 0.00%
## 2 Aichi-Gakuin University 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
## 3 Aichi Gakusen University 0.00% 100.00% 0.00% 0.00% 0.00% 0.00%
## 4 Ajou University 0.00% 100.00% 100.00% 66.67% 0.00% 0.00%
## 5 Al-Qalam University 0.00% 100.00% 0.00% 0.00% 0.00% 0.00%
## 6 Andong Science College 0.00% 100.00% 0.00% 0.00% 0.00% 0.00%
## 7 Anhui University of Tradition… 0.00% 100.00% 100.00% 0.00% 0.00% 0.00%
## 8 Atreyu Uozaki Long Term Care … 100.00% 0.00% 0.00% 0.00% 100.00% 0.00%
## 9 Barclay’s, Inc. 100.00% 0.00% 0.00% 0.00% 100.00% 0.00%
## 10 Baylor College of Medicine 100.00% 0.00% 0.00% 0.00% 0.00% 100.00%
## # … with 91 more rows
# Display top institutions with corresponding research focus
top_inst_list <- top_inst %>%
left_join(research_focus, by=c("Organisation" = "Organisation")) %>% arrange(desc(ArtCount))
top_inst_list %>% select(Organisation, Earliest, Latest, HI_p, HO_p, A_p, I_p, C_p )
## Organisation Earliest Latest HI_p
## 1 Charles Drew University of Medicine and Science 1998 2021 26.67%
## 2 Daiwa Pharmaceutical Co. Ltd. 2000 2017 18.18%
## 3 University of California at Irvine 2003 2021 11.11%
## 4 University of Mansoura 2008 2020 0.00%
## 5 Erom Co. Ltd 2004 2022 12.50%
## 6 Chiba University 2000 2017 0.00%
## 7 University of Pécs 2013 2018 14.29%
## 8 University of Miami Miller School of Medicine 2012 2020 100.00%
## HO_p A_p I_p C_p
## 1 0.00% 30.00% 53.33% 0.00%
## 2 0.00% 45.45% 27.27% 18.18%
## 3 0.00% 0.00% 100.00% 0.00%
## 4 0.00% 88.89% 11.11% 0.00%
## 5 0.00% 62.50% 50.00% 0.00%
## 6 0.00% 71.43% 14.29% 14.29%
## 7 85.71% 0.00% 0.00% 0.00%
## 8 0.00% 0.00% 0.00% 0.00%
# Create the institution list with article ID
institution_list <- authorship_list %>% select(ArticleID, Affiliation, Country)
institution_list
## # A tibble: 471 × 3
## ArticleID Affiliation Country
## <chr> <chr> <chr>
## 1 Ali et al. (2012) Pharmacognosia USA
## 2 Ali et al. (2012) University of Miami Miller School of Medic… USA
## 3 Ali et al. (2012) University of Miami Miller School of Medic… USA
## 4 Ali et al. (2012) University of Miami Miller School of Medic… USA
## 5 Ali et al. (2012) University of Miami Miller School of Medic… USA
## 6 Ali et al. (2012) University of Miami Miller School of Medic… USA
## 7 Ali et al. (2012) Fisher Institute for Medical Research USA
## 8 Ali et al. (2012) University of Miami Miller School of Medic… USA
## 9 An (2011) Korea University South …
## 10 Badr El-Din et al. (2008) University of Mansoura Egypt
## # … with 461 more rows
#Create network nodes based on the unique institutional affiliation found in the list
nodes <- institution_list %>% select(Affiliation, Country) %>% distinct_all()
nodes <- rowid_to_column(nodes, "id")
#Node label will be the institution and group them by Country
nodes <- nodes %>% rename( label = Affiliation, group = Country)
nodes
## # A tibble: 101 × 3
## id label group
## <int> <chr> <chr>
## 1 1 Pharmacognosia USA
## 2 2 University of Miami Miller School of Medicine USA
## 3 3 Fisher Institute for Medical Research USA
## 4 4 Korea University South Korea
## 5 5 University of Mansoura Egypt
## 6 6 National Center for Radiation Research and Technology Egypt
## 7 7 Charles Drew University of Medicine and Science USA
## 8 8 Al-Qalam University Iraq
## 9 9 University of California at Los Angeles USA
## 10 10 Daegu Haany University South Korea
## # … with 91 more rows
# Create edges as a data frame
edges <- data.frame(fromA= character(0), toA= character(0), category = character(0) )
# Edges are defined through iterating all the articles based on co-occurrence within the same article
for (a in unique(institution_list$ArticleID)) {
i_list <- institution_list %>% filter(ArticleID == a)
u_list <- i_list$Affiliation
len <- length(u_list) -1
if (len > 0) {
for (i in c(1:len)) {
k <- i+ 1
for (j in c(k:length(u_list))) {
if (u_list[i][1] <= u_list[j][1]) {
new <- c(u_list[i][1], u_list[j][1], a)
} else {
new <- c(u_list[j][1], u_list[i][1], a)
}
edges[nrow(edges) + 1,] <- new
}
}
}
}
edges <- edges %>% left_join(nodes, by = c("fromA"= "label")) %>% rename(from=id)
edges <- edges %>% left_join(nodes, by = c("toA"= "label")) %>% rename(to=id)
edges <- edges %>% select(from, to, category)
#edges
# Display the raw network diagram
visNetwork(nodes, edges)
#Use routes_tidy package to write the graph into GML file for file exchange
routes_tidy <- tbl_graph(nodes = nodes,
edges = edges,
directed = FALSE)
write_graph(routes_tidy, "int_graph.gml", format="gml" )
routes_tidy
## # A tbl_graph: 101 nodes and 1525 edges
## #
## # An undirected multigraph with 29 components
## #
## # Node Data: 101 × 3 (active)
## id label group
## <int> <chr> <chr>
## 1 1 Pharmacognosia USA
## 2 2 University of Miami Miller School of Medicine USA
## 3 3 Fisher Institute for Medical Research USA
## 4 4 Korea University South Korea
## 5 5 University of Mansoura Egypt
## 6 6 National Center for Radiation Research and Technology Egypt
## # … with 95 more rows
## #
## # Edge Data: 1,525 × 3
## from to category
## <int> <int> <chr>
## 1 1 2 Ali et al. (2012)
## 2 1 2 Ali et al. (2012)
## 3 1 2 Ali et al. (2012)
## # … with 1,522 more rows
#collapse the edges by creating a value to each edge based on count of co-occurance
edges1 <- edges %>% filter(from != to) %>% group_by(from, to, category) %>% summarise(value = n())
## `summarise()` has grouped output by 'from', 'to'. You can override using the
## `.groups` argument.
# use igraph
g <- graph_from_data_frame(edges1, directed = FALSE, vertices = nodes)
# Define node size based on the degree
nodes <- nodes %>% mutate(value = (degree(g)^2)) %>% mutate(font.size = 14+degree(g))
# Plot the network
visNetwork(nodes, edges1, main = "Collaorative Networks of Institutions in RBAC Research", height = "800", width = "100%") %>%
visNodes(scaling = list(min = 5, max = 50), borderWidth=0) %>%
visEdges(scaling = list(min = 1, max = 5), color=edges1$category)
#Use routes_tidy package to write the graph into GML file for file exchange
routes_tidy1 <- tbl_graph(nodes = nodes,
edges = edges1,
directed = FALSE)
write_graph(routes_tidy1, "int_graph1.gml", format="gml" )
routes_tidy1
## # A tbl_graph: 101 nodes and 183 edges
## #
## # An undirected multigraph with 29 components
## #
## # Node Data: 101 × 5 (active)
## id label group value font.si…
## <int> <chr> <chr> <dbl> <dbl>
## 1 1 Pharmacognosia USA 4 16
## 2 2 University of Miami Miller School of Medicine USA 49 21
## 3 3 Fisher Institute for Medical Research USA 4 16
## 4 4 Korea University South Kor… 49 21
## 5 5 University of Mansoura Egypt 225 29
## 6 6 National Center for Radiation Research and Te… Egypt 81 23
## # … with 95 more rows
## #
## # Edge Data: 183 × 4
## from to category value
## <int> <int> <chr> <int>
## 1 1 2 Ali et al. (2012) 6
## 2 2 56 Lewis et al. (2020a) 17
## 3 1 3 Ali et al. (2012) 1
## # … with 180 more rows