This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(statsr)
library(tidyverse)
library(readxl)
library(visNetwork)
library(igraph)
library(tidygraph)
library(ggraph)
library(numbers)
# Let's load up the data from excel file
ref_list <- read_excel("Data.xlsx", sheet = "References")
article_list <- read_excel("Data.xlsx", sheet = "Articles")
# Display the total number of citations
Citation_count <- ref_list %>% filter(!is.na(RefArtID)) %>%
group_by(RefArtID) %>%
summarise(Citations=n())
# Join with Google Citation Count in Article list
Citation_count <- Citation_count %>% right_join(article_list, by=c("RefArtID" = "ArticleID")) %>%
select(RefArtID, Year, GoogleCitations, Citations, StudyDesign)
Citation_count %>% filter(!is.na(GoogleCitations)) %>% summarise(length(which(GoogleCitations>0)), mean(GoogleCitations), median(GoogleCitations), sd(GoogleCitations), quantile(GoogleCitations, 0.25), quantile(GoogleCitations, 0.75), IQR(GoogleCitations))
## # A tibble: 1 × 7
## length(which(GoogleCitations…¹ mean(…² media…³ sd(Go…⁴ quant…⁵ quant…⁶ IQR(G…⁷
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 89 27.9 14.5 34.6 5 42.5 37.5
## # … with abbreviated variable names ¹`length(which(GoogleCitations > 0))`,
## # ²`mean(GoogleCitations)`, ³`median(GoogleCitations)`,
## # ⁴`sd(GoogleCitations)`, ⁵`quantile(GoogleCitations, 0.25)`,
## # ⁶`quantile(GoogleCitations, 0.75)`, ⁷`IQR(GoogleCitations)`
Top_cite_by_Google <- Citation_count %>% top_n(10, GoogleCitations) %>% arrange(desc(GoogleCitations))
Top_cite_by_Others <- Citation_count %>% top_n(10, Citations) %>% arrange(desc(Citations))
Top_cite_by_Google
## # A tibble: 10 × 5
## RefArtID Year GoogleCitations Citations StudyDesign
## <chr> <dbl> <dbl> <int> <chr>
## 1 Ghoneum (1998b) 1998 180 59 Before and afte…
## 2 Ghoneum (1998a) 1998 146 44 Cell
## 3 Ghoneum & Jewett (2000) 2000 132 35 Cell
## 4 Ghoneum & Matsuura (2004) 2004 124 25 Cell
## 5 Ghoneum & Abedi (2004) 2004 100 29 Animal+Cell
## 6 Noaman et al. (2008) 2008 98 18 Animal
## 7 Ghoneum & Gollapudi (2003) 2003 93 27 Cell
## 8 Kim H.Y. et al. (2007) 2007 85 5 Animal
## 9 Pérez-Martínez et al. (2015) 2015 73 12 Animal+Cell
## 10 Badr El-Din et al. (2008) 2008 69 18 Animal
Top_cite_by_Others
## # A tibble: 11 × 5
## RefArtID Year GoogleCitations Citations StudyDesign
## <chr> <dbl> <dbl> <int> <chr>
## 1 Ghoneum (1998b) 1998 180 59 Before and after …
## 2 Ghoneum (1998a) 1998 146 44 Cell
## 3 Ghoneum & Jewett (2000) 2000 132 35 Cell
## 4 Ghoneum & Abedi (2004) 2004 100 29 Animal+Cell
## 5 Ghoneum & Gollapudi (2003) 2003 93 27 Cell
## 6 Ghoneum & Brown (1999) 1999 61 25 Before and after …
## 7 Ghoneum & Matsuura (2004) 2004 124 25 Cell
## 8 Jacoby et al. (2001) 2001 41 19 Animal
## 9 Badr El-Din et al. (2008) 2008 69 18 Animal
## 10 Ghoneum & Agrawal (2011) 2011 60 18 Cell
## 11 Noaman et al. (2008) 2008 98 18 Animal
Top_cite <- merge(Top_cite_by_Google, Top_cite_by_Others, by = c("RefArtID", "Year", "GoogleCitations", "Citations", "StudyDesign" ), all = TRUE)
Top_cite %>% arrange(desc(GoogleCitations))
## RefArtID Year GoogleCitations Citations
## 1 Ghoneum (1998b) 1998 180 59
## 2 Ghoneum (1998a) 1998 146 44
## 3 Ghoneum & Jewett (2000) 2000 132 35
## 4 Ghoneum & Matsuura (2004) 2004 124 25
## 5 Ghoneum & Abedi (2004) 2004 100 29
## 6 Noaman et al. (2008) 2008 98 18
## 7 Ghoneum & Gollapudi (2003) 2003 93 27
## 8 Kim H.Y. et al. (2007) 2007 85 5
## 9 Pérez-Martínez et al. (2015) 2015 73 12
## 10 Badr El-Din et al. (2008) 2008 69 18
## 11 Ghoneum & Brown (1999) 1999 61 25
## 12 Ghoneum & Agrawal (2011) 2011 60 18
## 13 Jacoby et al. (2001) 2001 41 19
## StudyDesign
## 1 Before and after study
## 2 Cell
## 3 Cell
## 4 Cell
## 5 Animal+Cell
## 6 Animal
## 7 Cell
## 8 Animal
## 9 Animal+Cell
## 10 Animal
## 11 Before and after study
## 12 Cell
## 13 Animal
ggplot(Citation_count, aes(x = GoogleCitations)) +
geom_histogram(binwidth = 5)
# References count
Reference_count <- ref_list %>% filter(!is.na(RefArtID)) %>% group_by(SourceArtID) %>% summarise(References=n()) %>% arrange(desc(References))
Nil_count <- ref_list %>% filter(is.na(RefArtID)) %>% mutate(References = 0) %>% select(SourceArtID,References )
Reference_count <- rbind(Reference_count,Nil_count ) %>%
left_join(Citation_count, by = c("SourceArtID" = "RefArtID")) %>%
select(SourceArtID, References, Citations) %>%
mutate(Citations= replace_na(Citations,0)) %>%
left_join(article_list, by=c("SourceArtID" = "ArticleID")) %>%
select(SourceArtID, Year, References, GoogleCitations, Citations, StudyDesign)
nrow(Reference_count)
## [1] 98
Reference_count %>% top_n(20, References)
## # A tibble: 22 × 6
## SourceArtID Year References GoogleCitations Citations Study…¹
## <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 Badr El-Din et al. (2016a) 2016 22 28 5 Animal
## 2 Badr El-Din et al. (2020) 2020 20 12 1 Animal
## 3 Ooi et al. (2020) 2020 20 5 0 Random…
## 4 Badr El-Din et al. (2016b) 2016 19 18 4 Animal
## 5 Zhu et al. (2017) 2017 18 5 0 Cell
## 6 Badr El-Din et al. (2019) 2019 16 7 1 Animal
## 7 Ghoneum et al. (2014) 2014 16 55 8 Cell
## 8 Elsaid et al. (2021) 2021 15 2 0 Random…
## 9 Hajtó (2017) 2017 15 2 0 Case r…
## 10 Bang et al. (2010) 2010 14 57 14 Random…
## # … with 12 more rows, and abbreviated variable name ¹StudyDesign
# Filter out invalid nodes
Valid_Arts <- Reference_count %>% filter(References>0 | Citations > 0 )
nrow(Valid_Arts)
## [1] 93
Valid_Arts
## # A tibble: 93 × 6
## SourceArtID Year References GoogleCitations Citations Study…¹
## <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 Badr El-Din et al. (2016a) 2016 22 28 5 Animal
## 2 Badr El-Din et al. (2020) 2020 20 12 1 Animal
## 3 Ooi et al. (2020) 2020 20 5 0 Random…
## 4 Badr El-Din et al. (2016b) 2016 19 18 4 Animal
## 5 Zhu et al. (2017) 2017 18 5 0 Cell
## 6 Badr El-Din et al. (2019) 2019 16 7 1 Animal
## 7 Ghoneum et al. (2014) 2014 16 55 8 Cell
## 8 Elsaid et al. (2021) 2021 15 2 0 Random…
## 9 Hajtó (2017) 2017 15 2 0 Case r…
## 10 Bang et al. (2010) 2010 14 57 14 Random…
## # … with 83 more rows, and abbreviated variable name ¹StudyDesign
#Create nodes
nodes <- ref_list %>% select(SourceID, SourceArtID, SourceYear) %>% distinct_all()
nodes <- nodes %>% right_join(Valid_Arts, by = c("SourceArtID" = "SourceArtID"))
nodes <- nodes %>% rename(id = SourceID, label = SourceArtID, level= SourceYear, value = GoogleCitations, value1 = Citations , group= StudyDesign) %>% select(id,label, level, value, value1, group)
nodes$group <- factor(nodes$group, levels = c("Randomised controlled trial" , "Non-randomised controlled trial", "Before and after study", "Descriptive cross-sectional studies", "Case series", "Case report"))
nodes <- nodes %>% mutate(group = case_when(as.numeric(group) < 4 ~ "Interventional",
as.numeric(group) > 0 ~ "Observational",
TRUE ~ "Preclincal"))
nrow(nodes)
## [1] 93
nodes
## # A tibble: 93 × 6
## id label level value value1 group
## <dbl> <chr> <dbl> <dbl> <int> <chr>
## 1 1 Ali et al. (2012) 2012 21 5 Interventional
## 2 2 An (2011) 2011 5 0 Preclincal
## 3 3 Badr El-Din et al. (2008) 2008 69 18 Preclincal
## 4 4 Badr El-Din et al. (2016a) 2016 28 5 Preclincal
## 5 5 Badr El-Din et al. (2016b) 2016 18 4 Preclincal
## 6 6 Badr El-Din et al. (2016c) 2016 1 1 Preclincal
## 7 7 Badr El-Din et al. (2019) 2019 7 1 Preclincal
## 8 8 Badr El-Din et al. (2020) 2020 12 1 Preclincal
## 9 9 Bae et al. (2004) 2004 20 0 Preclincal
## 10 10 Bang et al. (2010) 2010 57 14 Interventional
## # … with 83 more rows
# Create edges
edges <- ref_list %>% filter(!is.na(DestID)) %>% select(SourceID, DestID) %>% rename(from = SourceID, to = DestID)
nrow(edges)
## [1] 598
#edges
# Define the group and define font.size based on value = number of citations
#nodes <- nodes %>% mutate(group = div(value,5)) %>% mutate(font.size = 14+value)
nodes <- nodes %>% mutate(font.size = 20+value1*2)
nodes %>% top_n(10, font.size) %>% arrange(desc(font.size))
## # A tibble: 11 × 7
## id label level value value1 group font.size
## <dbl> <chr> <dbl> <dbl> <int> <chr> <dbl>
## 1 37 Ghoneum (1998b) 1998 180 59 Interventional 138
## 2 38 Ghoneum (1998a) 1998 146 44 Preclincal 108
## 3 35 Ghoneum & Jewett (2000) 2000 132 35 Preclincal 90
## 4 26 Ghoneum & Abedi (2004) 2004 100 29 Preclincal 78
## 5 31 Ghoneum & Gollapudi (2003) 2003 93 27 Preclincal 74
## 6 29 Ghoneum & Brown (1999) 1999 61 25 Interventional 70
## 7 36 Ghoneum & Matsuura (2004) 2004 124 25 Preclincal 70
## 8 56 Jacoby et al. (2001) 2001 41 19 Preclincal 58
## 9 3 Badr El-Din et al. (2008) 2008 69 18 Preclincal 56
## 10 27 Ghoneum & Agrawal (2011) 2011 60 18 Preclincal 56
## 11 80 Noaman et al. (2008) 2008 98 18 Preclincal 56
visNetwork(nodes, edges, main = "Citation Networks for RBAC Research", height = 1000, width="100%" ) %>%
visIgraphLayout() %>%
visEdges(arrows = "to", smooth = TRUE, dashes = FALSE, color = list(opacity = 0.3)) %>%
visHierarchicalLayout(direction = "LR", levelSeparation = 200, nodeSpacing = 300) %>%
visNodes(scaling = list(min = 25, max = 500), borderWidth=0, shape = "ellipse")