R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Part II. Bibliometric analysis

Load packages

library(statsr)
library(tidyverse)
library(readxl)
library(visNetwork)
library(igraph)
library(tidygraph)
library(ggraph)
library(formattable)

The data

# Let's load up the Authors data from excel file
authors_data <- read_excel("Data.xlsx", sheet = "Authors")
authorship_list <- read_excel("Data.xlsx", sheet = "Authorships")
article_list <- read_excel("Data.xlsx", sheet = "Articles")

Authors

# Create the author list 
author_list <- authors_data %>% select(AuthorID, FullName, Affiliation, Country, ArtCount, ArtPerc, SumWeightedCoeff, Earliest, Latest) %>% 
  mutate(ArtPerc = formattable::percent(ArtPerc)) 

The publishing period of the top 10 authors by article count.

# Display Top 10 Authors by Article Count
top_author_list_artcount <- author_list %>% 
  arrange(desc(ArtCount)) %>% 
  select(FullName, Affiliation, Country, ArtCount, ArtPerc, SumWeightedCoeff, Earliest, Latest) %>%
  top_n(10, ArtCount)
top_author_list_artcount
## # A tibble: 16 × 8
##    FullName               Affil…¹ Country ArtCo…² ArtPerc SumWe…³ Earli…⁴ Latest
##    <chr>                  <chr>   <chr>     <dbl> <formt>   <dbl>   <dbl>  <dbl>
##  1 Ghoneum, Mamdooh       Charle… USA          30 30.61%   30        1998   2021
##  2 Badr El-Din, Nariman … Univer… Egypt         9 9.18%     7.64     2008   2020
##  3 Maeda, Hiroaki         Daiwa … Japan         7 7.14%     5.36     2000   2004
##  4 Egashira, Yukari       Chiba … Japan         6 6.12%     4.5      2001   2017
##  5 Gollapudi, Sastry      Univer… USA           6 6.12%     6        2003   2011
##  6 Hajtó, Tibor           Univer… Hungary       6 6.12%     6        2013   2018
##  7 Hong, Seong Gil        Erom C… South …       6 6.12%     3.75     2005   2022
##  8 Hwang, Sung Joo        Erom C… South …       5 5.10%     1.51     2004   2022
##  9 Lee, Seong Ae          STR Bi… South …       5 5.10%     1.66     2013   2022
## 10 Ali, Doaa A.           Univer… Egypt         4 4.08%     2.14     2014   2020
## 11 Kim, Hwa Young         Erom C… South …       4 4.08%     2.46     2005   2011
## 12 Kim, Jin Min           Erom C… South …       4 4.08%     0.786    2007   2022
## 13 Lewis, John E.         Univer… USA           4 4.08%     4        2012   2020
## 14 Park, Mi Hyoun         Erom C… South …       4 4.08%     2.31     2004   2007
## 15 Shaheen, Magda         Charle… USA           4 4.08%     1.42     2010   2020
## 16 Woolger, Judi M.       Univer… USA           4 4.08%     0.270    2012   2020
## # … with abbreviated variable names ¹​Affiliation, ²​ArtCount, ³​SumWeightedCoeff,
## #   ⁴​Earliest
# Display Top 10 Authors by Sum of Weight Coefficient Based on Author Rank
top_author_list_TWC <- author_list %>% 
  arrange(desc(SumWeightedCoeff)) %>% 
  select(FullName, Affiliation, Country,SumWeightedCoeff, ArtCount, ArtPerc, Earliest, Latest) %>%
  top_n(10, SumWeightedCoeff)
top_author_list_TWC
## # A tibble: 13 × 8
##    FullName               Affil…¹ Country SumWe…² ArtCo…³ ArtPerc Earli…⁴ Latest
##    <chr>                  <chr>   <chr>     <dbl>   <dbl> <formt>   <dbl>  <dbl>
##  1 Ghoneum, Mamdooh       Charle… USA       30         30 30.61%     1998   2021
##  2 Badr El-Din, Nariman … Univer… Egypt      7.64       9 9.18%      2008   2020
##  3 Gollapudi, Sastry      Univer… USA        6          6 6.12%      2003   2011
##  4 Hajtó, Tibor           Univer… Hungary    6          6 6.12%      2013   2018
##  5 Maeda, Hiroaki         Daiwa … Japan      5.36       7 7.14%      2000   2004
##  6 Egashira, Yukari       Chiba … Japan      4.5        6 6.12%      2001   2017
##  7 Lewis, John E.         Univer… USA        4          4 4.08%      2012   2020
##  8 Hong, Seong Gil        Erom C… South …    3.75       6 6.12%      2005   2022
##  9 Elsaid, Ahmed F.       Zagazi… Egypt      3          3 3.06%      2018   2021
## 10 Friedman, Mendel       U.S. D… USA        3          3 3.06%      2013   2018
## 11 Kim, Sung Phil         Ajou U… South …    3          3 3.06%      2013   2018
## 12 Konefal, Janet         Univer… USA        3          3 3.06%      2020   2020
## 13 Nam, Seok Hyun         Ajou U… South …    3          3 3.06%      2013   2018
## # … with abbreviated variable names ¹​Affiliation, ²​SumWeightedCoeff, ³​ArtCount,
## #   ⁴​Earliest
# Find the research focus of the authors

# get distinct 
dis_authorship_list <- authorship_list %>% select(ArticleID, FullName) %>% distinct_all()

# Join the authors with distinct authorship list
dis_authorship_list <- dis_authorship_list %>% 
  left_join(author_list, by=c("FullName" = "FullName")) 

# Join the article list to get the study types

research_focus <- dis_authorship_list %>% 
  left_join(article_list, by=c("ArticleID" = "ArticleID")) %>% 
  select(FullName, ArtCount, HumanStudy, AnimalStudy, InVitroStudy, ChemicalAnalysis, ClinicalDesign) %>%
  mutate(HumanStudy = factor(HumanStudy),AnimalStudy =factor(AnimalStudy), InVitroStudy = factor(InVitroStudy), ChemicalAnalysis = factor(ChemicalAnalysis)) %>%
  mutate(ClinicalDesign = factor(ClinicalDesign, levels = c("Randomised controlled trial" , "Non-randomised controlled trial", "Before and after study", "Descriptive cross-sectional studies", "Case series", "Case report"))) %>%
  mutate(InterventionStudy = case_when(as.numeric(ClinicalDesign) < 4 ~ 1,
                                       as.numeric(ClinicalDesign) > 0 ~ 0,
                                       TRUE ~ 0))  %>%
  mutate(OberservationStudy = case_when(as.numeric(ClinicalDesign) < 4 ~ 0,
                                       as.numeric(ClinicalDesign) > 0 ~ 1,
                                       TRUE ~ 0)) %>%
  select(-c("ClinicalDesign"))
#research_focus


# Calculate the % of each study type over article count

research_focus <- research_focus %>% 
  group_by(FullName) %>% 
  summarise(H_p= percent(sum(as.numeric(HumanStudy)-1)/ ArtCount), 
            A_p = percent(sum(as.numeric(AnimalStudy)-1)/ ArtCount), 
            I_p = percent(sum(as.numeric(InVitroStudy)-1)/ ArtCount), 
            C_p = percent(sum(as.numeric(ChemicalAnalysis)-1)/ ArtCount), 
            HI_p = percent(sum(InterventionStudy)/ ArtCount), 
            HO_p = percent(sum(OberservationStudy)/ ArtCount )) %>% distinct_all()
## `summarise()` has grouped output by 'FullName'. You can override using the
## `.groups` argument.
#research_focus
# Display top authors with corresponding research focus

top_author_list_artcount_m <- top_author_list_artcount %>% 
  left_join(research_focus, by=c("FullName" = "FullName"))
top_author_list_artcount_m
## # A tibble: 16 × 14
##    FullName       Affil…¹ Country ArtCo…² ArtPerc SumWe…³ Earli…⁴ Latest H_p    
##    <chr>          <chr>   <chr>     <dbl> <formt>   <dbl>   <dbl>  <dbl> <formt>
##  1 Ghoneum, Mamd… Charle… USA          30 30.61%   30        1998   2021 26.67% 
##  2 Badr El-Din, … Univer… Egypt         9 9.18%     7.64     2008   2020 0.00%  
##  3 Maeda, Hiroaki Daiwa … Japan         7 7.14%     5.36     2000   2004 14.29% 
##  4 Egashira, Yuk… Chiba … Japan         6 6.12%     4.5      2001   2017 0.00%  
##  5 Gollapudi, Sa… Univer… USA           6 6.12%     6        2003   2011 0.00%  
##  6 Hajtó, Tibor   Univer… Hungary       6 6.12%     6        2013   2018 100.00%
##  7 Hong, Seong G… Erom C… South …       6 6.12%     3.75     2005   2022 16.67% 
##  8 Hwang, Sung J… Erom C… South …       5 5.10%     1.51     2004   2022 0.00%  
##  9 Lee, Seong Ae  STR Bi… South …       5 5.10%     1.66     2013   2022 0.00%  
## 10 Ali, Doaa A.   Univer… Egypt         4 4.08%     2.14     2014   2020 0.00%  
## 11 Kim, Hwa Young Erom C… South …       4 4.08%     2.46     2005   2011 0.00%  
## 12 Kim, Jin Min   Erom C… South …       4 4.08%     0.786    2007   2022 25.00% 
## 13 Lewis, John E. Univer… USA           4 4.08%     4        2012   2020 100.00%
## 14 Park, Mi Hyoun Erom C… South …       4 4.08%     2.31     2004   2007 0.00%  
## 15 Shaheen, Magda Charle… USA           4 4.08%     1.42     2010   2020 100.00%
## 16 Woolger, Judi… Univer… USA           4 4.08%     0.270    2012   2020 100.00%
## # … with 5 more variables: A_p <formttbl>, I_p <formttbl>, C_p <formttbl>,
## #   HI_p <formttbl>, HO_p <formttbl>, and abbreviated variable names
## #   ¹​Affiliation, ²​ArtCount, ³​SumWeightedCoeff, ⁴​Earliest
top_author_list_TWC_m <- top_author_list_TWC %>% 
  left_join(research_focus, by=c("FullName" = "FullName"))
#top_author_list_TWC_m

top_author_list <- merge(top_author_list_artcount_m, top_author_list_TWC_m, by = c("FullName", "Affiliation", "Country", "ArtCount", "ArtPerc", "SumWeightedCoeff", "Earliest","Latest", 
                                                                               "H_p", "A_p", "I_p", "C_p", "HI_p", "HO_p" ))
top_author_list %>% arrange(desc(SumWeightedCoeff))
##                  FullName                                     Affiliation
## 1        Ghoneum, Mamdooh Charles Drew University of Medicine and Science
## 2 Badr El-Din, Nariman K.                          University of Mansoura
## 3       Gollapudi, Sastry              University of California at Irvine
## 4            Hajtó, Tibor                              University of Pécs
## 5          Maeda, Hiroaki                   Daiwa Pharmaceutical Co. Ltd.
## 6        Egashira, Yukari                                Chiba University
## 7          Lewis, John E.   University of Miami Miller School of Medicine
## 8         Hong, Seong Gil                                    Erom Co. Ltd
##       Country ArtCount ArtPerc SumWeightedCoeff Earliest Latest     H_p    A_p
## 1         USA       30  30.61%        30.000000     1998   2021  26.67% 30.00%
## 2       Egypt        9   9.18%         7.644444     2008   2020   0.00% 88.89%
## 3         USA        6   6.12%         6.000000     2003   2011   0.00%  0.00%
## 4     Hungary        6   6.12%         6.000000     2013   2018 100.00%  0.00%
## 5       Japan        7   7.14%         5.357143     2000   2004  14.29% 57.14%
## 6       Japan        6   6.12%         4.500000     2001   2017   0.00% 83.33%
## 7         USA        4   4.08%         4.000000     2012   2020 100.00%  0.00%
## 8 South Korea        6   6.12%         3.750794     2005   2022  16.67% 66.67%
##       I_p    C_p    HI_p    HO_p
## 1  53.33%  0.00%  26.67%   0.00%
## 2  11.11%  0.00%   0.00%   0.00%
## 3 100.00%  0.00%   0.00%   0.00%
## 4   0.00%  0.00%   0.00% 100.00%
## 5  14.29% 28.57%  14.29%   0.00%
## 6  16.67%  0.00%   0.00%   0.00%
## 7   0.00%  0.00% 100.00%   0.00%
## 8  50.00%  0.00%  16.67%   0.00%
#Create network nodes based on the author full name found in the list
nodes <- author_list %>% select(FullName, Country, ArtCount) %>% distinct_all()
nodes <- rowid_to_column(nodes, "id")

#Node label will be the author and group them by Country 
nodes <- nodes %>% rename( label = FullName, group = Country, weight = ArtCount )
#nodes
#authorship_list
# Create edges as a data frame 
edges <- data.frame(fromA= character(0), toA= character(0), category = character(0) )

# Edges are defined through iterating all the articles based on co-authorship within the same article
for (a in unique(authorship_list$ArticleID)) {
  i_list <- authorship_list %>% filter(ArticleID == a)
  u_list <- i_list$FullName
  len <- length(u_list) -1 
  if (len > 0) {
    for (i in c(1:len)) {
        k <- i+ 1
        for (j in c(k:length(u_list))) {
          if (u_list[i][1] <= u_list[j][1]) {
              new <- c(u_list[i][1], u_list[j][1], a)
          } else {
              new <- c(u_list[j][1], u_list[i][1], a)
          }
          edges[nrow(edges) + 1,] <- new
        }
    }
  }
}
edges <- edges %>% left_join(nodes, by = c("fromA"= "label")) %>% rename(from=id)
edges <- edges %>% left_join(nodes, by = c("toA"= "label")) %>% rename(to=id)
edges <- edges %>% select(from, to, category)
#edges

Author Network

# Display the raw network diagram
visNetwork(nodes, edges) %>%
  visIgraphLayout()
#Use routes_tidy package to write the graph into GML file for file exchange 
routes_tidy <- tbl_graph(nodes = nodes,
                         edges = edges,
                         directed = FALSE)
write_graph(routes_tidy, "author_graph.gml", format="gml" )
routes_tidy
## # A tbl_graph: 289 nodes and 1525 edges
## #
## # An undirected multigraph with 32 components
## #
## # Node Data: 289 × 4 (active)
##      id label                    group weight
##   <int> <chr>                    <chr>  <dbl>
## 1     1 Abbas, Muhammad H.       USA        2
## 2     2 Abdel Fattah, Salma M.   Egypt      2
## 3     3 Abedi, Sarah             USA        1
## 4     4 Abou Mossallam, Ahlam A. Egypt      1
## 5     5 Agrawal, Anshu           USA        1
## 6     6 Agrawal, Sudhanshu       USA        3
## # … with 283 more rows
## #
## # Edge Data: 1,525 × 3
##    from    to category         
##   <int> <int> <chr>            
## 1    10   170 Ali et al. (2012)
## 2    10   146 Ali et al. (2012)
## 3    10    15 Ali et al. (2012)
## # … with 1,522 more rows
#collapse the edges by creating a value to each edge based on count of co-occurance
edges1 <- edges %>% filter(from != to) %>% group_by(from, to, category) %>% summarise(value = n())
## `summarise()` has grouped output by 'from', 'to'. You can override using the
## `.groups` argument.
# use igraph 
#g <- graph_from_data_frame(edges1, directed = FALSE, vertices = nodes)

# Define node size based on the value 
nodes <- nodes %>% mutate(value = weight) %>% mutate(font.size = ifelse(weight> 12, 36+12^2,36+weight^2) )
#nodes <- nodes %>% mutate(font.size = 24+value)
# Define node size based on the degree 
# nodes <- nodes %>% mutate(value = (degree(g)^2)) %>% mutate(font.size = 14+degree(g))
nodes %>% arrange(desc(font.size))
## # A tibble: 289 × 6
##       id label                   group       weight value font.size
##    <int> <chr>                   <chr>        <dbl> <dbl>     <dbl>
##  1    68 Ghoneum, Mamdooh        USA             30    30       180
##  2    17 Badr El-Din, Nariman K. Egypt            9     9       117
##  3   153 Maeda, Hiroaki          Japan            7     7        85
##  4    51 Egashira, Yukari        Japan            6     6        72
##  5    71 Gollapudi, Sastry       USA              6     6        72
##  6    73 Hajtó, Tibor            Hungary          6     6        72
##  7    90 Hong, Seong Gil         South Korea      6     6        72
##  8    94 Hwang, Sung Joo         South Korea      5     5        61
##  9   144 Lee, Seong Ae           South Korea      5     5        61
## 10     9 Ali, Doaa A.            Egypt            4     4        52
## # … with 279 more rows
# Plot the network 
visNetwork(nodes, edges1,  main = "Collaorative Networks of Authors in RBAC Research",  height = "1000", width = "100%" ) %>% 
  visNodes(scaling = list(min = 5, max = 100), borderWidth=0) %>% 
  visEdges(scaling = list(min = 1, max = 5), color=edges1$category) %>% 
  visPhysics(stabilization= FALSE, solver = "forceAtlas2Based")
#  visIgraphLayout()
#Use routes_tidy package to write the graph into GML file for file exchange 
routes_tidy1 <- tbl_graph(nodes = nodes,
                         edges = edges1,
                         directed = FALSE)
write_graph(routes_tidy1, "author_graph1.gml", format="gml" )
routes_tidy1
## # A tbl_graph: 289 nodes and 1479 edges
## #
## # An undirected multigraph with 32 components
## #
## # Node Data: 289 × 6 (active)
##      id label                    group weight value font.size
##   <int> <chr>                    <chr>  <dbl> <dbl>     <dbl>
## 1     1 Abbas, Muhammad H.       USA        2     2        40
## 2     2 Abdel Fattah, Salma M.   Egypt      2     2        40
## 3     3 Abedi, Sarah             USA        1     1        37
## 4     4 Abou Mossallam, Ahlam A. Egypt      1     1        37
## 5     5 Agrawal, Anshu           USA        1     1        37
## 6     6 Agrawal, Sudhanshu       USA        3     3        45
## # … with 283 more rows
## #
## # Edge Data: 1,479 × 4
##    from    to category             value
##   <int> <int> <chr>                <int>
## 1     1    16 Lewis et al. (2020a)     1
## 2     1    16 Lewis et al. (2020b)     1
## 3     1    39 Lewis et al. (2020a)     1
## # … with 1,476 more rows