This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(statsr)
library(tidyverse)
library(readxl)
library(visNetwork)
library(igraph)
library(tidygraph)
library(ggraph)
library(formattable)
# Let's load up the Authors data from excel file
authors_data <- read_excel("Data.xlsx", sheet = "Authors")
authorship_list <- read_excel("Data.xlsx", sheet = "Authorships")
article_list <- read_excel("Data.xlsx", sheet = "Articles")
# Create the author list
author_list <- authors_data %>% select(AuthorID, FullName, Affiliation, Country, ArtCount, ArtPerc, SumWeightedCoeff, Earliest, Latest) %>%
mutate(ArtPerc = formattable::percent(ArtPerc))
The publishing period of the top 10 authors by article count.
# Display Top 10 Authors by Article Count
top_author_list_artcount <- author_list %>%
arrange(desc(ArtCount)) %>%
select(FullName, Affiliation, Country, ArtCount, ArtPerc, SumWeightedCoeff, Earliest, Latest) %>%
top_n(10, ArtCount)
top_author_list_artcount
## # A tibble: 16 × 8
## FullName Affil…¹ Country ArtCo…² ArtPerc SumWe…³ Earli…⁴ Latest
## <chr> <chr> <chr> <dbl> <formt> <dbl> <dbl> <dbl>
## 1 Ghoneum, Mamdooh Charle… USA 30 30.61% 30 1998 2021
## 2 Badr El-Din, Nariman … Univer… Egypt 9 9.18% 7.64 2008 2020
## 3 Maeda, Hiroaki Daiwa … Japan 7 7.14% 5.36 2000 2004
## 4 Egashira, Yukari Chiba … Japan 6 6.12% 4.5 2001 2017
## 5 Gollapudi, Sastry Univer… USA 6 6.12% 6 2003 2011
## 6 Hajtó, Tibor Univer… Hungary 6 6.12% 6 2013 2018
## 7 Hong, Seong Gil Erom C… South … 6 6.12% 3.75 2005 2022
## 8 Hwang, Sung Joo Erom C… South … 5 5.10% 1.51 2004 2022
## 9 Lee, Seong Ae STR Bi… South … 5 5.10% 1.66 2013 2022
## 10 Ali, Doaa A. Univer… Egypt 4 4.08% 2.14 2014 2020
## 11 Kim, Hwa Young Erom C… South … 4 4.08% 2.46 2005 2011
## 12 Kim, Jin Min Erom C… South … 4 4.08% 0.786 2007 2022
## 13 Lewis, John E. Univer… USA 4 4.08% 4 2012 2020
## 14 Park, Mi Hyoun Erom C… South … 4 4.08% 2.31 2004 2007
## 15 Shaheen, Magda Charle… USA 4 4.08% 1.42 2010 2020
## 16 Woolger, Judi M. Univer… USA 4 4.08% 0.270 2012 2020
## # … with abbreviated variable names ¹Affiliation, ²ArtCount, ³SumWeightedCoeff,
## # ⁴Earliest
# Display Top 10 Authors by Sum of Weight Coefficient Based on Author Rank
top_author_list_TWC <- author_list %>%
arrange(desc(SumWeightedCoeff)) %>%
select(FullName, Affiliation, Country,SumWeightedCoeff, ArtCount, ArtPerc, Earliest, Latest) %>%
top_n(10, SumWeightedCoeff)
top_author_list_TWC
## # A tibble: 13 × 8
## FullName Affil…¹ Country SumWe…² ArtCo…³ ArtPerc Earli…⁴ Latest
## <chr> <chr> <chr> <dbl> <dbl> <formt> <dbl> <dbl>
## 1 Ghoneum, Mamdooh Charle… USA 30 30 30.61% 1998 2021
## 2 Badr El-Din, Nariman … Univer… Egypt 7.64 9 9.18% 2008 2020
## 3 Gollapudi, Sastry Univer… USA 6 6 6.12% 2003 2011
## 4 Hajtó, Tibor Univer… Hungary 6 6 6.12% 2013 2018
## 5 Maeda, Hiroaki Daiwa … Japan 5.36 7 7.14% 2000 2004
## 6 Egashira, Yukari Chiba … Japan 4.5 6 6.12% 2001 2017
## 7 Lewis, John E. Univer… USA 4 4 4.08% 2012 2020
## 8 Hong, Seong Gil Erom C… South … 3.75 6 6.12% 2005 2022
## 9 Elsaid, Ahmed F. Zagazi… Egypt 3 3 3.06% 2018 2021
## 10 Friedman, Mendel U.S. D… USA 3 3 3.06% 2013 2018
## 11 Kim, Sung Phil Ajou U… South … 3 3 3.06% 2013 2018
## 12 Konefal, Janet Univer… USA 3 3 3.06% 2020 2020
## 13 Nam, Seok Hyun Ajou U… South … 3 3 3.06% 2013 2018
## # … with abbreviated variable names ¹Affiliation, ²SumWeightedCoeff, ³ArtCount,
## # ⁴Earliest
# Find the research focus of the authors
# get distinct
dis_authorship_list <- authorship_list %>% select(ArticleID, FullName) %>% distinct_all()
# Join the authors with distinct authorship list
dis_authorship_list <- dis_authorship_list %>%
left_join(author_list, by=c("FullName" = "FullName"))
# Join the article list to get the study types
research_focus <- dis_authorship_list %>%
left_join(article_list, by=c("ArticleID" = "ArticleID")) %>%
select(FullName, ArtCount, HumanStudy, AnimalStudy, InVitroStudy, ChemicalAnalysis, ClinicalDesign) %>%
mutate(HumanStudy = factor(HumanStudy),AnimalStudy =factor(AnimalStudy), InVitroStudy = factor(InVitroStudy), ChemicalAnalysis = factor(ChemicalAnalysis)) %>%
mutate(ClinicalDesign = factor(ClinicalDesign, levels = c("Randomised controlled trial" , "Non-randomised controlled trial", "Before and after study", "Descriptive cross-sectional studies", "Case series", "Case report"))) %>%
mutate(InterventionStudy = case_when(as.numeric(ClinicalDesign) < 4 ~ 1,
as.numeric(ClinicalDesign) > 0 ~ 0,
TRUE ~ 0)) %>%
mutate(OberservationStudy = case_when(as.numeric(ClinicalDesign) < 4 ~ 0,
as.numeric(ClinicalDesign) > 0 ~ 1,
TRUE ~ 0)) %>%
select(-c("ClinicalDesign"))
#research_focus
# Calculate the % of each study type over article count
research_focus <- research_focus %>%
group_by(FullName) %>%
summarise(H_p= percent(sum(as.numeric(HumanStudy)-1)/ ArtCount),
A_p = percent(sum(as.numeric(AnimalStudy)-1)/ ArtCount),
I_p = percent(sum(as.numeric(InVitroStudy)-1)/ ArtCount),
C_p = percent(sum(as.numeric(ChemicalAnalysis)-1)/ ArtCount),
HI_p = percent(sum(InterventionStudy)/ ArtCount),
HO_p = percent(sum(OberservationStudy)/ ArtCount )) %>% distinct_all()
## `summarise()` has grouped output by 'FullName'. You can override using the
## `.groups` argument.
#research_focus
# Display top authors with corresponding research focus
top_author_list_artcount_m <- top_author_list_artcount %>%
left_join(research_focus, by=c("FullName" = "FullName"))
top_author_list_artcount_m
## # A tibble: 16 × 14
## FullName Affil…¹ Country ArtCo…² ArtPerc SumWe…³ Earli…⁴ Latest H_p
## <chr> <chr> <chr> <dbl> <formt> <dbl> <dbl> <dbl> <formt>
## 1 Ghoneum, Mamd… Charle… USA 30 30.61% 30 1998 2021 26.67%
## 2 Badr El-Din, … Univer… Egypt 9 9.18% 7.64 2008 2020 0.00%
## 3 Maeda, Hiroaki Daiwa … Japan 7 7.14% 5.36 2000 2004 14.29%
## 4 Egashira, Yuk… Chiba … Japan 6 6.12% 4.5 2001 2017 0.00%
## 5 Gollapudi, Sa… Univer… USA 6 6.12% 6 2003 2011 0.00%
## 6 Hajtó, Tibor Univer… Hungary 6 6.12% 6 2013 2018 100.00%
## 7 Hong, Seong G… Erom C… South … 6 6.12% 3.75 2005 2022 16.67%
## 8 Hwang, Sung J… Erom C… South … 5 5.10% 1.51 2004 2022 0.00%
## 9 Lee, Seong Ae STR Bi… South … 5 5.10% 1.66 2013 2022 0.00%
## 10 Ali, Doaa A. Univer… Egypt 4 4.08% 2.14 2014 2020 0.00%
## 11 Kim, Hwa Young Erom C… South … 4 4.08% 2.46 2005 2011 0.00%
## 12 Kim, Jin Min Erom C… South … 4 4.08% 0.786 2007 2022 25.00%
## 13 Lewis, John E. Univer… USA 4 4.08% 4 2012 2020 100.00%
## 14 Park, Mi Hyoun Erom C… South … 4 4.08% 2.31 2004 2007 0.00%
## 15 Shaheen, Magda Charle… USA 4 4.08% 1.42 2010 2020 100.00%
## 16 Woolger, Judi… Univer… USA 4 4.08% 0.270 2012 2020 100.00%
## # … with 5 more variables: A_p <formttbl>, I_p <formttbl>, C_p <formttbl>,
## # HI_p <formttbl>, HO_p <formttbl>, and abbreviated variable names
## # ¹Affiliation, ²ArtCount, ³SumWeightedCoeff, ⁴Earliest
top_author_list_TWC_m <- top_author_list_TWC %>%
left_join(research_focus, by=c("FullName" = "FullName"))
#top_author_list_TWC_m
top_author_list <- merge(top_author_list_artcount_m, top_author_list_TWC_m, by = c("FullName", "Affiliation", "Country", "ArtCount", "ArtPerc", "SumWeightedCoeff", "Earliest","Latest",
"H_p", "A_p", "I_p", "C_p", "HI_p", "HO_p" ))
top_author_list %>% arrange(desc(SumWeightedCoeff))
## FullName Affiliation
## 1 Ghoneum, Mamdooh Charles Drew University of Medicine and Science
## 2 Badr El-Din, Nariman K. University of Mansoura
## 3 Gollapudi, Sastry University of California at Irvine
## 4 Hajtó, Tibor University of Pécs
## 5 Maeda, Hiroaki Daiwa Pharmaceutical Co. Ltd.
## 6 Egashira, Yukari Chiba University
## 7 Lewis, John E. University of Miami Miller School of Medicine
## 8 Hong, Seong Gil Erom Co. Ltd
## Country ArtCount ArtPerc SumWeightedCoeff Earliest Latest H_p A_p
## 1 USA 30 30.61% 30.000000 1998 2021 26.67% 30.00%
## 2 Egypt 9 9.18% 7.644444 2008 2020 0.00% 88.89%
## 3 USA 6 6.12% 6.000000 2003 2011 0.00% 0.00%
## 4 Hungary 6 6.12% 6.000000 2013 2018 100.00% 0.00%
## 5 Japan 7 7.14% 5.357143 2000 2004 14.29% 57.14%
## 6 Japan 6 6.12% 4.500000 2001 2017 0.00% 83.33%
## 7 USA 4 4.08% 4.000000 2012 2020 100.00% 0.00%
## 8 South Korea 6 6.12% 3.750794 2005 2022 16.67% 66.67%
## I_p C_p HI_p HO_p
## 1 53.33% 0.00% 26.67% 0.00%
## 2 11.11% 0.00% 0.00% 0.00%
## 3 100.00% 0.00% 0.00% 0.00%
## 4 0.00% 0.00% 0.00% 100.00%
## 5 14.29% 28.57% 14.29% 0.00%
## 6 16.67% 0.00% 0.00% 0.00%
## 7 0.00% 0.00% 100.00% 0.00%
## 8 50.00% 0.00% 16.67% 0.00%
#Create network nodes based on the author full name found in the list
nodes <- author_list %>% select(FullName, Country, ArtCount) %>% distinct_all()
nodes <- rowid_to_column(nodes, "id")
#Node label will be the author and group them by Country
nodes <- nodes %>% rename( label = FullName, group = Country, weight = ArtCount )
#nodes
#authorship_list
# Create edges as a data frame
edges <- data.frame(fromA= character(0), toA= character(0), category = character(0) )
# Edges are defined through iterating all the articles based on co-authorship within the same article
for (a in unique(authorship_list$ArticleID)) {
i_list <- authorship_list %>% filter(ArticleID == a)
u_list <- i_list$FullName
len <- length(u_list) -1
if (len > 0) {
for (i in c(1:len)) {
k <- i+ 1
for (j in c(k:length(u_list))) {
if (u_list[i][1] <= u_list[j][1]) {
new <- c(u_list[i][1], u_list[j][1], a)
} else {
new <- c(u_list[j][1], u_list[i][1], a)
}
edges[nrow(edges) + 1,] <- new
}
}
}
}
edges <- edges %>% left_join(nodes, by = c("fromA"= "label")) %>% rename(from=id)
edges <- edges %>% left_join(nodes, by = c("toA"= "label")) %>% rename(to=id)
edges <- edges %>% select(from, to, category)
#edges
# Display the raw network diagram
visNetwork(nodes, edges) %>%
visIgraphLayout()
#Use routes_tidy package to write the graph into GML file for file exchange
routes_tidy <- tbl_graph(nodes = nodes,
edges = edges,
directed = FALSE)
write_graph(routes_tidy, "author_graph.gml", format="gml" )
routes_tidy
## # A tbl_graph: 289 nodes and 1525 edges
## #
## # An undirected multigraph with 32 components
## #
## # Node Data: 289 × 4 (active)
## id label group weight
## <int> <chr> <chr> <dbl>
## 1 1 Abbas, Muhammad H. USA 2
## 2 2 Abdel Fattah, Salma M. Egypt 2
## 3 3 Abedi, Sarah USA 1
## 4 4 Abou Mossallam, Ahlam A. Egypt 1
## 5 5 Agrawal, Anshu USA 1
## 6 6 Agrawal, Sudhanshu USA 3
## # … with 283 more rows
## #
## # Edge Data: 1,525 × 3
## from to category
## <int> <int> <chr>
## 1 10 170 Ali et al. (2012)
## 2 10 146 Ali et al. (2012)
## 3 10 15 Ali et al. (2012)
## # … with 1,522 more rows
#collapse the edges by creating a value to each edge based on count of co-occurance
edges1 <- edges %>% filter(from != to) %>% group_by(from, to, category) %>% summarise(value = n())
## `summarise()` has grouped output by 'from', 'to'. You can override using the
## `.groups` argument.
# use igraph
#g <- graph_from_data_frame(edges1, directed = FALSE, vertices = nodes)
# Define node size based on the value
nodes <- nodes %>% mutate(value = weight) %>% mutate(font.size = ifelse(weight> 12, 36+12^2,36+weight^2) )
#nodes <- nodes %>% mutate(font.size = 24+value)
# Define node size based on the degree
# nodes <- nodes %>% mutate(value = (degree(g)^2)) %>% mutate(font.size = 14+degree(g))
nodes %>% arrange(desc(font.size))
## # A tibble: 289 × 6
## id label group weight value font.size
## <int> <chr> <chr> <dbl> <dbl> <dbl>
## 1 68 Ghoneum, Mamdooh USA 30 30 180
## 2 17 Badr El-Din, Nariman K. Egypt 9 9 117
## 3 153 Maeda, Hiroaki Japan 7 7 85
## 4 51 Egashira, Yukari Japan 6 6 72
## 5 71 Gollapudi, Sastry USA 6 6 72
## 6 73 Hajtó, Tibor Hungary 6 6 72
## 7 90 Hong, Seong Gil South Korea 6 6 72
## 8 94 Hwang, Sung Joo South Korea 5 5 61
## 9 144 Lee, Seong Ae South Korea 5 5 61
## 10 9 Ali, Doaa A. Egypt 4 4 52
## # … with 279 more rows
# Plot the network
visNetwork(nodes, edges1, main = "Collaorative Networks of Authors in RBAC Research", height = "1000", width = "100%" ) %>%
visNodes(scaling = list(min = 5, max = 100), borderWidth=0) %>%
visEdges(scaling = list(min = 1, max = 5), color=edges1$category) %>%
visPhysics(stabilization= FALSE, solver = "forceAtlas2Based")
# visIgraphLayout()
#Use routes_tidy package to write the graph into GML file for file exchange
routes_tidy1 <- tbl_graph(nodes = nodes,
edges = edges1,
directed = FALSE)
write_graph(routes_tidy1, "author_graph1.gml", format="gml" )
routes_tidy1
## # A tbl_graph: 289 nodes and 1479 edges
## #
## # An undirected multigraph with 32 components
## #
## # Node Data: 289 × 6 (active)
## id label group weight value font.size
## <int> <chr> <chr> <dbl> <dbl> <dbl>
## 1 1 Abbas, Muhammad H. USA 2 2 40
## 2 2 Abdel Fattah, Salma M. Egypt 2 2 40
## 3 3 Abedi, Sarah USA 1 1 37
## 4 4 Abou Mossallam, Ahlam A. Egypt 1 1 37
## 5 5 Agrawal, Anshu USA 1 1 37
## 6 6 Agrawal, Sudhanshu USA 3 3 45
## # … with 283 more rows
## #
## # Edge Data: 1,479 × 4
## from to category value
## <int> <int> <chr> <int>
## 1 1 16 Lewis et al. (2020a) 1
## 2 1 16 Lewis et al. (2020b) 1
## 3 1 39 Lewis et al. (2020a) 1
## # … with 1,476 more rows