R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Part III. Bibliometric analysis - Institutions

Load packages

library(statsr)
library(tidyverse)
library(readxl)
library(visNetwork)
library(igraph)
library(tidygraph)
library(ggraph)
library(formattable)

The data

# Let's load up the data from excel file
institutions_data <- read_excel("Data.xlsx", sheet = "Institutions")
authorship_list <- read_excel("Data.xlsx", sheet = "Authorships")
article_list <- read_excel("Data.xlsx", sheet = "Articles")

nrow(institutions_data)
## [1] 100
# Summaries by type
institutions_data %>% group_by(Type) %>% summarise(N = n()) %>% mutate(perc = percent(N / sum(N))) %>% arrange(desc(perc))
## # A tibble: 5 × 3
##   Type                 N perc      
##   <chr>            <int> <formttbl>
## 1 Academic            51 51.00%    
## 2 Healthcare          17 17.00%    
## 3 Private Practice    14 14.00%    
## 4 Research            10 10.00%    
## 5 Commercial           8 8.00%
# Create the institution list
inst_list <- institutions_data %>% select(Organisation, Country, Type, AuthorCount, ArtCount, ArtPerc, SumWeightedCoeff, Earliest, Latest) %>% 
  mutate(ArtPerc = formattable::percent(ArtPerc)) 

# Display Top 10 Institutions by Article Count
top_inst_by_artcount <- inst_list %>% 
  top_n(10, ArtCount)

# Display Top 10 Institutions by Sum of Weight Coefficient of the authors
top_inst_by_TWC <- inst_list %>% 
  top_n(10, SumWeightedCoeff)

# outer join the two list and arrange by TWC
top_inst_all <- merge(top_inst_by_artcount, top_inst_by_TWC, 
                       by=c("Organisation", "Country","Type", "AuthorCount", "SumWeightedCoeff","ArtCount", "ArtPerc",  "Earliest", "Latest" ), all = TRUE) 



top_inst_all %>% arrange(desc(SumWeightedCoeff))
##                                             Organisation     Country       Type
## 1        Charles Drew University of Medicine and Science         USA   Academic
## 2                                           Erom Co. Ltd South Korea Commercial
## 3                                       Chiba University       Japan   Academic
## 4                                     University of Pécs     Hungary   Academic
## 5                                 University of Mansoura       Egypt   Academic
## 6                          Daiwa Pharmaceutical Co. Ltd.       Japan Commercial
## 7          University of Miami Miller School of Medicine         USA   Academic
## 8                     University of California at Irvine         USA   Academic
## 9                             Slovak Academy of Sciences    Slovakia   Academic
## 10                                       Ajou University South Korea   Academic
## 11                                  STR Biotech Co. Ltd. South Korea Commercial
## 12 National Center for Radiation Research and Technology       Egypt   Research
## 13                                      Korea University South Korea   Academic
##    AuthorCount SumWeightedCoeff ArtCount ArtPerc Earliest Latest
## 1            6        35.033862       30  30.61%     1998   2021
## 2           13        15.485714        8   8.16%     2004   2022
## 3           12        14.494709        7   7.14%     2000   2017
## 4           12        14.366667        7   7.14%     2013   2018
## 5            5        13.122222        9   9.18%     2008   2020
## 6           10        10.636508       11  11.22%     2000   2017
## 7           21        10.589107        4   4.08%     2012   2020
## 8            3         9.000000        9   9.18%     2003   2021
## 9            5         5.218519        2   2.04%     2009   2013
## 10           2         5.000000        3   3.06%     2013   2018
## 11           2         3.700529        5   5.10%     2013   2022
## 12           2         2.544444        4   4.08%     2008   2016
## 13           2         2.131217        4   4.08%     2004   2011
top_inst_all %>% arrange(desc(ArtCount))
##                                             Organisation     Country       Type
## 1        Charles Drew University of Medicine and Science         USA   Academic
## 2                          Daiwa Pharmaceutical Co. Ltd.       Japan Commercial
## 3                     University of California at Irvine         USA   Academic
## 4                                 University of Mansoura       Egypt   Academic
## 5                                           Erom Co. Ltd South Korea Commercial
## 6                                       Chiba University       Japan   Academic
## 7                                     University of Pécs     Hungary   Academic
## 8                                   STR Biotech Co. Ltd. South Korea Commercial
## 9                                       Korea University South Korea   Academic
## 10 National Center for Radiation Research and Technology       Egypt   Research
## 11         University of Miami Miller School of Medicine         USA   Academic
## 12                                       Ajou University South Korea   Academic
## 13                            Slovak Academy of Sciences    Slovakia   Academic
##    AuthorCount SumWeightedCoeff ArtCount ArtPerc Earliest Latest
## 1            6        35.033862       30  30.61%     1998   2021
## 2           10        10.636508       11  11.22%     2000   2017
## 3            3         9.000000        9   9.18%     2003   2021
## 4            5        13.122222        9   9.18%     2008   2020
## 5           13        15.485714        8   8.16%     2004   2022
## 6           12        14.494709        7   7.14%     2000   2017
## 7           12        14.366667        7   7.14%     2013   2018
## 8            2         3.700529        5   5.10%     2013   2022
## 9            2         2.131217        4   4.08%     2004   2011
## 10           2         2.544444        4   4.08%     2008   2016
## 11          21        10.589107        4   4.08%     2012   2020
## 12           2         5.000000        3   3.06%     2013   2018
## 13           5         5.218519        2   2.04%     2009   2013
# inner join
top_inst  <- merge(top_inst_by_artcount, top_inst_by_TWC, 
                       by=c("Organisation", "Country","Type", "AuthorCount", "SumWeightedCoeff","ArtCount", "ArtPerc",  "Earliest", "Latest"  ), all = FALSE) 

top_inst %>% arrange(desc(SumWeightedCoeff))
##                                      Organisation     Country       Type
## 1 Charles Drew University of Medicine and Science         USA   Academic
## 2                                    Erom Co. Ltd South Korea Commercial
## 3                                Chiba University       Japan   Academic
## 4                              University of Pécs     Hungary   Academic
## 5                          University of Mansoura       Egypt   Academic
## 6                   Daiwa Pharmaceutical Co. Ltd.       Japan Commercial
## 7   University of Miami Miller School of Medicine         USA   Academic
## 8              University of California at Irvine         USA   Academic
##   AuthorCount SumWeightedCoeff ArtCount ArtPerc Earliest Latest
## 1           6         35.03386       30  30.61%     1998   2021
## 2          13         15.48571        8   8.16%     2004   2022
## 3          12         14.49471        7   7.14%     2000   2017
## 4          12         14.36667        7   7.14%     2013   2018
## 5           5         13.12222        9   9.18%     2008   2020
## 6          10         10.63651       11  11.22%     2000   2017
## 7          21         10.58911        4   4.08%     2012   2020
## 8           3          9.00000        9   9.18%     2003   2021
top_inst %>% arrange(desc(ArtCount))
##                                      Organisation     Country       Type
## 1 Charles Drew University of Medicine and Science         USA   Academic
## 2                   Daiwa Pharmaceutical Co. Ltd.       Japan Commercial
## 3              University of California at Irvine         USA   Academic
## 4                          University of Mansoura       Egypt   Academic
## 5                                    Erom Co. Ltd South Korea Commercial
## 6                                Chiba University       Japan   Academic
## 7                              University of Pécs     Hungary   Academic
## 8   University of Miami Miller School of Medicine         USA   Academic
##   AuthorCount SumWeightedCoeff ArtCount ArtPerc Earliest Latest
## 1           6         35.03386       30  30.61%     1998   2021
## 2          10         10.63651       11  11.22%     2000   2017
## 3           3          9.00000        9   9.18%     2003   2021
## 4           5         13.12222        9   9.18%     2008   2020
## 5          13         15.48571        8   8.16%     2004   2022
## 6          12         14.49471        7   7.14%     2000   2017
## 7          12         14.36667        7   7.14%     2013   2018
## 8          21         10.58911        4   4.08%     2012   2020
# Find the research focus of the institutions 

# get distinct institutions
dis_inst_list <- authorship_list %>% select(ArticleID, Affiliation) %>% distinct_all()

# Join the institutions with distinct authorship list
dis_inst_list <- dis_inst_list %>% 
  left_join(inst_list, by=c("Affiliation" = "Organisation" )) 

# Join the article list to get the study types

research_focus <- dis_inst_list %>% 
  left_join(article_list, by=c("ArticleID" = "ArticleID")) %>% 
  select(Organisation = Affiliation, ArtCount, HumanStudy, AnimalStudy, InVitroStudy, ChemicalAnalysis, ClinicalDesign) %>%
  mutate(HumanStudy = factor(HumanStudy),AnimalStudy =factor(AnimalStudy), InVitroStudy = factor(InVitroStudy), ChemicalAnalysis = factor(ChemicalAnalysis)) %>%
  mutate(ClinicalDesign = factor(ClinicalDesign, levels = c("Randomised controlled trial" , "Non-randomised controlled trial", "Before and after study", "Descriptive cross-sectional studies", "Case series", "Case report"))) %>%
  mutate(InterventionStudy = case_when(as.numeric(ClinicalDesign) < 4 ~ 1,
                                       as.numeric(ClinicalDesign) > 0 ~ 0,
                                       TRUE ~ 0))  %>%
  mutate(OberservationStudy = case_when(as.numeric(ClinicalDesign) < 4 ~ 0,
                                       as.numeric(ClinicalDesign) > 0 ~ 1,
                                       TRUE ~ 0)) %>%
  select(-c("ClinicalDesign"))
research_focus
## # A tibble: 211 × 8
##    Organisation          ArtCo…¹ Human…² Anima…³ InVit…⁴ Chemi…⁵ Inter…⁶ Obers…⁷
##    <chr>                   <dbl> <fct>   <fct>   <fct>   <fct>     <dbl>   <dbl>
##  1 Pharmacognosia              1 Yes     No      No      No            1       0
##  2 University of Miami …       4 Yes     No      No      No            1       0
##  3 Fisher Institute for…       1 Yes     No      No      No            1       0
##  4 Korea University            4 No      Yes     Yes     Yes           0       0
##  5 University of Mansou…       9 No      Yes     No      No            0       0
##  6 National Center for …       4 No      Yes     No      No            0       0
##  7 Charles Drew Univers…      30 No      Yes     No      No            0       0
##  8 University of Mansou…       9 No      Yes     No      No            0       0
##  9 National Center for …       4 No      Yes     No      No            0       0
## 10 Charles Drew Univers…      30 No      Yes     No      No            0       0
## # … with 201 more rows, and abbreviated variable names ¹​ArtCount, ²​HumanStudy,
## #   ³​AnimalStudy, ⁴​InVitroStudy, ⁵​ChemicalAnalysis, ⁶​InterventionStudy,
## #   ⁷​OberservationStudy
# Calculate the % of each study type over article count

research_focus <- research_focus %>% 
  group_by(Organisation) %>% 
  summarise(H_p= percent(sum(as.numeric(HumanStudy)-1)/ ArtCount), 
            A_p = percent(sum(as.numeric(AnimalStudy)-1)/ ArtCount), 
            I_p = percent(sum(as.numeric(InVitroStudy)-1)/ ArtCount), 
            C_p = percent(sum(as.numeric(ChemicalAnalysis)-1)/ ArtCount), 
            HI_p = percent(sum(InterventionStudy)/ ArtCount), 
            HO_p = percent(sum(OberservationStudy)/ ArtCount )) %>% distinct_all()
## `summarise()` has grouped output by 'Organisation'. You can override using the
## `.groups` argument.
research_focus
## # A tibble: 101 × 7
## # Groups:   Organisation [101]
##    Organisation                   H_p     A_p     I_p     C_p    HI_p    HO_p   
##    <chr>                          <formt> <formt> <formt> <form> <formt> <formt>
##  1 Affiliated Hospital of Nanjin… 0.00%   100.00% 0.00%   0.00%  0.00%   0.00%  
##  2 Aichi-Gakuin University        0.00%   0.00%   100.00% 0.00%  0.00%   0.00%  
##  3 Aichi Gakusen University       0.00%   100.00% 0.00%   0.00%  0.00%   0.00%  
##  4 Ajou University                0.00%   100.00% 100.00% 66.67% 0.00%   0.00%  
##  5 Al-Qalam University            0.00%   100.00% 0.00%   0.00%  0.00%   0.00%  
##  6 Andong Science College         0.00%   100.00% 0.00%   0.00%  0.00%   0.00%  
##  7 Anhui University of Tradition… 0.00%   100.00% 100.00% 0.00%  0.00%   0.00%  
##  8 Atreyu Uozaki Long Term Care … 100.00% 0.00%   0.00%   0.00%  100.00% 0.00%  
##  9 Barclay’s, Inc.                100.00% 0.00%   0.00%   0.00%  100.00% 0.00%  
## 10 Baylor College of Medicine     100.00% 0.00%   0.00%   0.00%  0.00%   100.00%
## # … with 91 more rows
# Display top institutions with corresponding research focus

top_inst_list <- top_inst %>%
  left_join(research_focus, by=c("Organisation" = "Organisation")) %>% arrange(desc(ArtCount))

top_inst_list %>% select(Organisation, Earliest, Latest, HI_p, HO_p, A_p, I_p, C_p  )
##                                      Organisation Earliest Latest    HI_p
## 1 Charles Drew University of Medicine and Science     1998   2021  26.67%
## 2                   Daiwa Pharmaceutical Co. Ltd.     2000   2017  18.18%
## 3              University of California at Irvine     2003   2021  11.11%
## 4                          University of Mansoura     2008   2020   0.00%
## 5                                    Erom Co. Ltd     2004   2022  12.50%
## 6                                Chiba University     2000   2017   0.00%
## 7                              University of Pécs     2013   2018  14.29%
## 8   University of Miami Miller School of Medicine     2012   2020 100.00%
##     HO_p    A_p     I_p    C_p
## 1  0.00% 30.00%  53.33%  0.00%
## 2  0.00% 45.45%  27.27% 18.18%
## 3  0.00%  0.00% 100.00%  0.00%
## 4  0.00% 88.89%  11.11%  0.00%
## 5  0.00% 62.50%  50.00%  0.00%
## 6  0.00% 71.43%  14.29% 14.29%
## 7 85.71%  0.00%   0.00%  0.00%
## 8  0.00%  0.00%   0.00%  0.00%
# Create the institution list with article ID 
institution_list <- authorship_list %>% select(ArticleID, Affiliation, Country)
institution_list
## # A tibble: 471 × 3
##    ArticleID                 Affiliation                                 Country
##    <chr>                     <chr>                                       <chr>  
##  1 Ali et al. (2012)         Pharmacognosia                              USA    
##  2 Ali et al. (2012)         University of Miami Miller School of Medic… USA    
##  3 Ali et al. (2012)         University of Miami Miller School of Medic… USA    
##  4 Ali et al. (2012)         University of Miami Miller School of Medic… USA    
##  5 Ali et al. (2012)         University of Miami Miller School of Medic… USA    
##  6 Ali et al. (2012)         University of Miami Miller School of Medic… USA    
##  7 Ali et al. (2012)         Fisher Institute for Medical Research       USA    
##  8 Ali et al. (2012)         University of Miami Miller School of Medic… USA    
##  9 An (2011)                 Korea University                            South …
## 10 Badr El-Din et al. (2008) University of Mansoura                      Egypt  
## # … with 461 more rows
#Create network nodes based on the unique institutional affiliation found in the list
nodes <- institution_list %>% select(Affiliation, Country) %>% distinct_all()
nodes <- rowid_to_column(nodes, "id")

#Node label will be the institution and group them by Country 
nodes <- nodes %>% rename( label = Affiliation, group = Country)
nodes
## # A tibble: 101 × 3
##       id label                                                 group      
##    <int> <chr>                                                 <chr>      
##  1     1 Pharmacognosia                                        USA        
##  2     2 University of Miami Miller School of Medicine         USA        
##  3     3 Fisher Institute for Medical Research                 USA        
##  4     4 Korea University                                      South Korea
##  5     5 University of Mansoura                                Egypt      
##  6     6 National Center for Radiation Research and Technology Egypt      
##  7     7 Charles Drew University of Medicine and Science       USA        
##  8     8 Al-Qalam University                                   Iraq       
##  9     9 University of California at Los Angeles               USA        
## 10    10 Daegu Haany University                                South Korea
## # … with 91 more rows
# Create edges as a data frame 
edges <- data.frame(fromA= character(0), toA= character(0), category = character(0) )

# Edges are defined through iterating all the articles based on co-occurrence within the same article
for (a in unique(institution_list$ArticleID)) {
  i_list <- institution_list %>% filter(ArticleID == a)
  u_list <- i_list$Affiliation
  len <- length(u_list) -1 
  if (len > 0) {
    for (i in c(1:len)) {
        k <- i+ 1
        for (j in c(k:length(u_list))) {
          if (u_list[i][1] <= u_list[j][1]) {
              new <- c(u_list[i][1], u_list[j][1], a)
          } else {
              new <- c(u_list[j][1], u_list[i][1], a)
          }
          edges[nrow(edges) + 1,] <- new
        }
    }
  }
}
edges <- edges %>% left_join(nodes, by = c("fromA"= "label")) %>% rename(from=id)
edges <- edges %>% left_join(nodes, by = c("toA"= "label")) %>% rename(to=id)
edges <- edges %>% select(from, to, category)
#edges
# Display the raw network diagram
visNetwork(nodes, edges) 
#Use routes_tidy package to write the graph into GML file for file exchange 
routes_tidy <- tbl_graph(nodes = nodes,
                         edges = edges,
                         directed = FALSE)
write_graph(routes_tidy, "int_graph.gml", format="gml" )
routes_tidy
## # A tbl_graph: 101 nodes and 1525 edges
## #
## # An undirected multigraph with 29 components
## #
## # Node Data: 101 × 3 (active)
##      id label                                                 group      
##   <int> <chr>                                                 <chr>      
## 1     1 Pharmacognosia                                        USA        
## 2     2 University of Miami Miller School of Medicine         USA        
## 3     3 Fisher Institute for Medical Research                 USA        
## 4     4 Korea University                                      South Korea
## 5     5 University of Mansoura                                Egypt      
## 6     6 National Center for Radiation Research and Technology Egypt      
## # … with 95 more rows
## #
## # Edge Data: 1,525 × 3
##    from    to category         
##   <int> <int> <chr>            
## 1     1     2 Ali et al. (2012)
## 2     1     2 Ali et al. (2012)
## 3     1     2 Ali et al. (2012)
## # … with 1,522 more rows
#collapse the edges by creating a value to each edge based on count of co-occurance
edges1 <- edges %>% filter(from != to) %>% group_by(from, to, category) %>% summarise(value = n())
## `summarise()` has grouped output by 'from', 'to'. You can override using the
## `.groups` argument.
# use igraph 
g <- graph_from_data_frame(edges1, directed = FALSE, vertices = nodes)

# Define node size based on the degree 
nodes <- nodes %>% mutate(value = (degree(g)^2)) %>% mutate(font.size = 14+degree(g))

# Plot the network 
visNetwork(nodes, edges1, main = "Collaorative Networks of Institutions in RBAC Research",  height = "800", width = "100%") %>% 
  visNodes(scaling = list(min = 5, max = 50), borderWidth=0) %>% 
  visEdges(scaling = list(min = 1, max = 5), color=edges1$category)
#Use routes_tidy package to write the graph into GML file for file exchange 
routes_tidy1 <- tbl_graph(nodes = nodes,
                         edges = edges1,
                         directed = FALSE)
write_graph(routes_tidy1, "int_graph1.gml", format="gml" )
routes_tidy1
## # A tbl_graph: 101 nodes and 183 edges
## #
## # An undirected multigraph with 29 components
## #
## # Node Data: 101 × 5 (active)
##      id label                                          group      value font.si…
##   <int> <chr>                                          <chr>      <dbl>    <dbl>
## 1     1 Pharmacognosia                                 USA            4       16
## 2     2 University of Miami Miller School of Medicine  USA           49       21
## 3     3 Fisher Institute for Medical Research          USA            4       16
## 4     4 Korea University                               South Kor…    49       21
## 5     5 University of Mansoura                         Egypt        225       29
## 6     6 National Center for Radiation Research and Te… Egypt         81       23
## # … with 95 more rows
## #
## # Edge Data: 183 × 4
##    from    to category             value
##   <int> <int> <chr>                <int>
## 1     1     2 Ali et al. (2012)        6
## 2     2    56 Lewis et al. (2020a)    17
## 3     1     3 Ali et al. (2012)        1
## # … with 180 more rows