R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Part IV. Bibliometric analysis - Citations

Load packages

library(statsr)
library(tidyverse)
library(readxl)
library(visNetwork)
library(igraph)
library(tidygraph)
library(ggraph)
library(numbers)
# Let's load up the data from excel file
ref_list <- read_excel("Data.xlsx", sheet = "References")
article_list <- read_excel("Data.xlsx", sheet = "Articles")
# Display the total number of citations
Citation_count <- ref_list %>% filter(!is.na(RefArtID)) %>% 
  group_by(RefArtID) %>% 
  summarise(Citations=n())

# Join with Google Citation Count in Article list 
Citation_count <- Citation_count %>% right_join(article_list, by=c("RefArtID" = "ArticleID")) %>%
  select(RefArtID, Year, GoogleCitations, Citations, StudyDesign) 
Citation_count %>% filter(!is.na(GoogleCitations)) %>% summarise(length(which(GoogleCitations>0)), mean(GoogleCitations), median(GoogleCitations), sd(GoogleCitations), quantile(GoogleCitations, 0.25), quantile(GoogleCitations, 0.75), IQR(GoogleCitations))
## # A tibble: 1 × 7
##   length(which(GoogleCitations…¹ mean(…² media…³ sd(Go…⁴ quant…⁵ quant…⁶ IQR(G…⁷
##                            <int>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1                             89    27.9    14.5    34.6       5    42.5    37.5
## # … with abbreviated variable names ¹​`length(which(GoogleCitations > 0))`,
## #   ²​`mean(GoogleCitations)`, ³​`median(GoogleCitations)`,
## #   ⁴​`sd(GoogleCitations)`, ⁵​`quantile(GoogleCitations, 0.25)`,
## #   ⁶​`quantile(GoogleCitations, 0.75)`, ⁷​`IQR(GoogleCitations)`
Top_cite_by_Google <- Citation_count %>% top_n(10, GoogleCitations) %>% arrange(desc(GoogleCitations))
Top_cite_by_Others <- Citation_count %>% top_n(10, Citations) %>% arrange(desc(Citations))
Top_cite_by_Google
## # A tibble: 10 × 5
##    RefArtID                      Year GoogleCitations Citations StudyDesign     
##    <chr>                        <dbl>           <dbl>     <int> <chr>           
##  1 Ghoneum (1998b)               1998             180        59 Before and afte…
##  2 Ghoneum (1998a)               1998             146        44 Cell            
##  3 Ghoneum & Jewett (2000)       2000             132        35 Cell            
##  4 Ghoneum & Matsuura (2004)     2004             124        25 Cell            
##  5 Ghoneum & Abedi (2004)        2004             100        29 Animal+Cell     
##  6 Noaman et al. (2008)          2008              98        18 Animal          
##  7 Ghoneum & Gollapudi (2003)    2003              93        27 Cell            
##  8 Kim H.Y. et al. (2007)        2007              85         5 Animal          
##  9 Pérez-Martínez et al. (2015)  2015              73        12 Animal+Cell     
## 10 Badr El-Din et al. (2008)     2008              69        18 Animal
Top_cite_by_Others
## # A tibble: 11 × 5
##    RefArtID                    Year GoogleCitations Citations StudyDesign       
##    <chr>                      <dbl>           <dbl>     <int> <chr>             
##  1 Ghoneum (1998b)             1998             180        59 Before and after …
##  2 Ghoneum (1998a)             1998             146        44 Cell              
##  3 Ghoneum & Jewett (2000)     2000             132        35 Cell              
##  4 Ghoneum & Abedi (2004)      2004             100        29 Animal+Cell       
##  5 Ghoneum & Gollapudi (2003)  2003              93        27 Cell              
##  6 Ghoneum & Brown (1999)      1999              61        25 Before and after …
##  7 Ghoneum & Matsuura (2004)   2004             124        25 Cell              
##  8 Jacoby et al. (2001)        2001              41        19 Animal            
##  9 Badr El-Din et al. (2008)   2008              69        18 Animal            
## 10 Ghoneum & Agrawal (2011)    2011              60        18 Cell              
## 11 Noaman et al. (2008)        2008              98        18 Animal
Top_cite <- merge(Top_cite_by_Google, Top_cite_by_Others, by = c("RefArtID", "Year", "GoogleCitations", "Citations", "StudyDesign" ), all = TRUE) 
Top_cite %>% arrange(desc(GoogleCitations))
##                        RefArtID Year GoogleCitations Citations
## 1               Ghoneum (1998b) 1998             180        59
## 2               Ghoneum (1998a) 1998             146        44
## 3       Ghoneum & Jewett (2000) 2000             132        35
## 4     Ghoneum & Matsuura (2004) 2004             124        25
## 5        Ghoneum & Abedi (2004) 2004             100        29
## 6          Noaman et al. (2008) 2008              98        18
## 7    Ghoneum & Gollapudi (2003) 2003              93        27
## 8        Kim H.Y. et al. (2007) 2007              85         5
## 9  Pérez-Martínez et al. (2015) 2015              73        12
## 10    Badr El-Din et al. (2008) 2008              69        18
## 11       Ghoneum & Brown (1999) 1999              61        25
## 12     Ghoneum & Agrawal (2011) 2011              60        18
## 13         Jacoby et al. (2001) 2001              41        19
##               StudyDesign
## 1  Before and after study
## 2                    Cell
## 3                    Cell
## 4                    Cell
## 5             Animal+Cell
## 6                  Animal
## 7                    Cell
## 8                  Animal
## 9             Animal+Cell
## 10                 Animal
## 11 Before and after study
## 12                   Cell
## 13                 Animal
ggplot(Citation_count, aes(x = GoogleCitations)) + 
  geom_histogram(binwidth = 5)

# References count

Reference_count <- ref_list %>% filter(!is.na(RefArtID))  %>%  group_by(SourceArtID) %>% summarise(References=n()) %>% arrange(desc(References))
Nil_count <-  ref_list %>% filter(is.na(RefArtID)) %>% mutate(References = 0) %>% select(SourceArtID,References )
Reference_count <- rbind(Reference_count,Nil_count ) %>%
  left_join(Citation_count, by = c("SourceArtID" = "RefArtID")) %>%
  select(SourceArtID, References, Citations) %>% 
  mutate(Citations= replace_na(Citations,0)) %>%
  left_join(article_list, by=c("SourceArtID" = "ArticleID")) %>%
  select(SourceArtID, Year, References, GoogleCitations, Citations, StudyDesign) 
  
nrow(Reference_count)
## [1] 98
Reference_count  %>% top_n(20, References)
## # A tibble: 22 × 6
##    SourceArtID                 Year References GoogleCitations Citations Study…¹
##    <chr>                      <dbl>      <dbl>           <dbl>     <int> <chr>  
##  1 Badr El-Din et al. (2016a)  2016         22              28         5 Animal 
##  2 Badr El-Din et al. (2020)   2020         20              12         1 Animal 
##  3 Ooi et al. (2020)           2020         20               5         0 Random…
##  4 Badr El-Din et al. (2016b)  2016         19              18         4 Animal 
##  5 Zhu et al. (2017)           2017         18               5         0 Cell   
##  6 Badr El-Din et al. (2019)   2019         16               7         1 Animal 
##  7 Ghoneum et al. (2014)       2014         16              55         8 Cell   
##  8 Elsaid et al. (2021)        2021         15               2         0 Random…
##  9 Hajtó (2017)                2017         15               2         0 Case r…
## 10 Bang et al. (2010)          2010         14              57        14 Random…
## # … with 12 more rows, and abbreviated variable name ¹​StudyDesign
# Filter out invalid nodes 

Valid_Arts <- Reference_count %>% filter(References>0 | Citations > 0 )
nrow(Valid_Arts)
## [1] 93
Valid_Arts
## # A tibble: 93 × 6
##    SourceArtID                 Year References GoogleCitations Citations Study…¹
##    <chr>                      <dbl>      <dbl>           <dbl>     <int> <chr>  
##  1 Badr El-Din et al. (2016a)  2016         22              28         5 Animal 
##  2 Badr El-Din et al. (2020)   2020         20              12         1 Animal 
##  3 Ooi et al. (2020)           2020         20               5         0 Random…
##  4 Badr El-Din et al. (2016b)  2016         19              18         4 Animal 
##  5 Zhu et al. (2017)           2017         18               5         0 Cell   
##  6 Badr El-Din et al. (2019)   2019         16               7         1 Animal 
##  7 Ghoneum et al. (2014)       2014         16              55         8 Cell   
##  8 Elsaid et al. (2021)        2021         15               2         0 Random…
##  9 Hajtó (2017)                2017         15               2         0 Case r…
## 10 Bang et al. (2010)          2010         14              57        14 Random…
## # … with 83 more rows, and abbreviated variable name ¹​StudyDesign
#Create nodes

nodes <- ref_list %>% select(SourceID, SourceArtID, SourceYear) %>% distinct_all()
nodes <- nodes %>% right_join(Valid_Arts, by = c("SourceArtID" = "SourceArtID")) 
nodes <- nodes %>% rename(id = SourceID, label = SourceArtID, level= SourceYear, value = GoogleCitations, value1 = Citations , group= StudyDesign) %>% select(id,label, level, value, value1, group)
nodes$group <- factor(nodes$group, levels = c("Randomised controlled trial" , "Non-randomised controlled trial", "Before and after study", "Descriptive cross-sectional studies", "Case series", "Case report"))
nodes <- nodes %>% mutate(group = case_when(as.numeric(group) < 4 ~ "Interventional",
                                    as.numeric(group) > 0 ~ "Observational",
                                    TRUE ~ "Preclincal"))
nrow(nodes)
## [1] 93
nodes
## # A tibble: 93 × 6
##       id label                      level value value1 group         
##    <dbl> <chr>                      <dbl> <dbl>  <int> <chr>         
##  1     1 Ali et al. (2012)           2012    21      5 Interventional
##  2     2 An (2011)                   2011     5      0 Preclincal    
##  3     3 Badr El-Din et al. (2008)   2008    69     18 Preclincal    
##  4     4 Badr El-Din et al. (2016a)  2016    28      5 Preclincal    
##  5     5 Badr El-Din et al. (2016b)  2016    18      4 Preclincal    
##  6     6 Badr El-Din et al. (2016c)  2016     1      1 Preclincal    
##  7     7 Badr El-Din et al. (2019)   2019     7      1 Preclincal    
##  8     8 Badr El-Din et al. (2020)   2020    12      1 Preclincal    
##  9     9 Bae et al. (2004)           2004    20      0 Preclincal    
## 10    10 Bang et al. (2010)          2010    57     14 Interventional
## # … with 83 more rows
# Create edges

edges  <- ref_list %>% filter(!is.na(DestID)) %>% select(SourceID, DestID) %>% rename(from = SourceID, to = DestID)
nrow(edges)
## [1] 598
#edges
# Define the group and define font.size based on value = number of citations
#nodes <- nodes  %>% mutate(group = div(value,5)) %>% mutate(font.size = 14+value)
nodes <- nodes  %>% mutate(font.size = 20+value1*2)
nodes %>% top_n(10, font.size) %>% arrange(desc(font.size))
## # A tibble: 11 × 7
##       id label                      level value value1 group          font.size
##    <dbl> <chr>                      <dbl> <dbl>  <int> <chr>              <dbl>
##  1    37 Ghoneum (1998b)             1998   180     59 Interventional       138
##  2    38 Ghoneum (1998a)             1998   146     44 Preclincal           108
##  3    35 Ghoneum & Jewett (2000)     2000   132     35 Preclincal            90
##  4    26 Ghoneum & Abedi (2004)      2004   100     29 Preclincal            78
##  5    31 Ghoneum & Gollapudi (2003)  2003    93     27 Preclincal            74
##  6    29 Ghoneum & Brown (1999)      1999    61     25 Interventional        70
##  7    36 Ghoneum & Matsuura (2004)   2004   124     25 Preclincal            70
##  8    56 Jacoby et al. (2001)        2001    41     19 Preclincal            58
##  9     3 Badr El-Din et al. (2008)   2008    69     18 Preclincal            56
## 10    27 Ghoneum & Agrawal (2011)    2011    60     18 Preclincal            56
## 11    80 Noaman et al. (2008)        2008    98     18 Preclincal            56
visNetwork(nodes, edges,  main = "Citation Networks for RBAC Research", height = 1000, width="100%" ) %>%
  visIgraphLayout() %>%
  visEdges(arrows = "to", smooth = TRUE, dashes = FALSE, color = list(opacity = 0.3)) %>% 
  visHierarchicalLayout(direction = "LR", levelSeparation = 200, nodeSpacing = 300) %>% 
  visNodes(scaling = list(min = 25, max = 500), borderWidth=0, shape = "ellipse")