
## Ceratodon purpureus Genome Manuscript R plots 
## Carey et al., 2020 in prep

## Analyses run/script by Sarah B. Carey

## R version 3.5.3

################## Fig. 1D, density plot ##################

library(karyoploteR)
# version 1.8.8

fake.gtf <- read.csv("MS_plotData/gene_density_filtered.csv",header=T)
gtf.GR <- toGRanges(data.frame(chr=fake.gtf$Chromosome, start=fake.gtf$Start, end=fake.gtf$Stop))

TE.gtf <- read.csv("MS_plotData/TE_all_density_filtered.csv",header=T)
TE.gtf.GR <- toGRanges(data.frame(chr=TE.gtf$Chromosome, start=TE.gtf$Start, end=TE.gtf$Stop))

TE.copia.gtf <- read.csv("MS_plotData/copia_density_filtered.csv",header=T)
TE.copia.gtf.GR <- toGRanges(data.frame(chr=TE.copia.gtf$Chromosome, start=TE.copia.gtf$Start, end=TE.copia.gtf$Stop))

TE.hat.gtf <- read.csv("MS_plotData/hat_density_filtered.csv",header=T)
TE.hat.gtf.GR <- toGRanges(data.frame(chr=TE.hat.gtf$Chromosome, start=TE.hat.gtf$Start, end=TE.hat.gtf$Stop))

TE.lanisha.gtf <- read.csv("MS_plotData/lanisha_density_filtered.csv",header=T)
TE.lanisha.gtf.GR <- toGRanges(data.frame(chr=TE.lanisha.gtf$Chromosome, start=TE.lanisha.gtf$Start, end=TE.lanisha.gtf$Stop))

TE.RLC5.gtf <- read.csv("MS_plotData/RLC5_density_filtered.csv",header=T)
TE.RLC5.gtf.GR <- toGRanges(data.frame(chr=TE.RLC5.gtf$Chromosome, start=TE.RLC5.gtf$Start, end=TE.RLC5.gtf$Stop))


r40 <- read.table("MS_plotData/R40_genome_lengths_sex.txt", header=T)

custom.genome <- toGRanges(data.frame(chr=r40$Chromosome, start=r40$ChromStart, end=r40$ChromEnd))

png("Figures/densities_ceratodon_16March2021.png", width = 16, height = 5, units = 'in', res = 1000)

pp <- getDefaultPlotParams(plot.type = 4)
pp$ideogramlateralmargin <- 0.005
pp$leftmargin <- 0.05

kp <- plotKaryotype(genome=custom.genome,pin=8, plot.type = 4,labels.plotter = NULL, plot.params=pp)
kp <- kpDataBackground(kp, data.panel = 1, color="gray95")
kpAddChromosomeNames(kp,font=2, xoffset=-4)
kpAddLabels(kp, "Genes", r0=0.9, srt=90, label.margin=0.035,font=2)
kpAddLabels(kp, "TEs", r0=0.45, srt=90, label.margin=0.035,font=2)
kpAddLabels(kp, "Copia", r0=0.1, srt=90, label.margin=0.035,font=2)
kpAddLabels(kp, "Lanisha", r0=-0.25, srt=90, label.margin=0.035,font=2)
kpAddLabels(kp, "hAT", r0=-0.75, srt=90, label.margin=0.035,font=2)

kpAddBaseNumbers(kp, tick.dist = 10000000, tick.len = 5, tick.col="black", cex=0.75,
                 minor.tick.dist = 5000000, minor.tick.len = 5, minor.tick.col = "black", add.units = F)

# gene density
kp <- kpLines(kp, data=gtf.GR, data.panel = 1, col=("gray0"),y=fake.gtf$Density,r0=0.8,r1=0.96,ymax=0.75,lwd=2)
kpAxis(kp, ymax=kp$latest.plot$computed.values$max.density, r0=0.8, r1=0.96, cex=1, numticks=3, labels=c("0","","0.75"))
kpAxis(kp, ymax=kp$latest.plot$computed.values$max.density, r0=0.8, r1=0.96, cex=1, numticks=3, labels=c("0","","0.75"), side=3)

# all TE density
kp <- kpLines(kp, data=TE.gtf.GR, data.panel = 1, col=("gray40"),y=TE.gtf$Density,r0=0.6,r1=0.76,lwd=2)
kpAxis(kp, ymax=kp$latest.plot$computed.values$max.density, r0=0.6, r1=0.76, cex=1, numticks=3, labels=c("0","","1"))
kpAxis(kp, ymax=kp$latest.plot$computed.values$max.density, r0=0.6, r1=0.76, cex=1, numticks=3,  labels=c("0","","1"), side=3)

# copia density, RLC5 overlayed
kp <- kpLines(kp, data=TE.copia.gtf.GR, data.panel = 1, col=("#009E73"),y=TE.copia.gtf$Density,r0=0.4,r1=0.56,ymax=0.5,lwd=2)
kp <- kpLines(kp, data=TE.RLC5.gtf.GR, data.panel = 1, col=("#CC79A7"),y=TE.RLC5.gtf$Density,r0=0.4,r1=0.56,ymax=0.5,lwd=2)
kpAxis(kp, ymax=kp$latest.plot$computed.values$max.density, r0=0.4, r1=0.56, cex=1, numticks=3, labels=c("0","","0.5"))
kpAxis(kp, ymax=kp$latest.plot$computed.values$max.density, r0=0.4, r1=0.56, cex=1, numticks=3,  labels=c("0","","0.5"), side=3)

# lanisha density
kp <- kpLines(kp, data=TE.lanisha.gtf.GR, data.panel = 1, col=("#0072B2"),y=TE.lanisha.gtf$Density,r0=0.2,r1=0.36,ymax=0.5,lwd=2)
kpAxis(kp, ymax=kp$latest.plot$computed.values$max.density, r0=0.2, r1=0.36, cex=1, numticks=3, labels=c("0","","0.5"))
kpAxis(kp, ymax=kp$latest.plot$computed.values$max.density, r0=0.2, r1=0.36, cex=1, numticks=3, labels=c("0","","0.5"), side=3)

# hAT density
kp <- kpLines(kp, data=TE.hat.gtf.GR, data.panel = 1, col=("#D55E00"),y=TE.hat.gtf$Density,r0=0,r1=0.16,ymax=0.75,lwd=2)
kpAxis(kp, ymax=kp$latest.plot$computed.values$max.density, r0=0, r1=0.16, cex=1, numticks=3, labels=c("0","","0.75"))
kpAxis(kp, ymax=kp$latest.plot$computed.values$max.density, r0=0, r1=0.16, cex=1, numticks=3, labels=c("0","","0.75"), side=3)

dev.off()



################## Fig. 2, codon metrics ##################

library("ggplot2")
# version 3.3.1
library("ggsignif")
# version 0.6.0

codonbias_cerat <- read.csv("MS_plotData/codon_analyses_nonZero.csv",header=TRUE)

############################ ENC


png("Figures/enc_Dec062020.png", width = 8, height = 8, units = 'in', res = 1000)

stat_box_data <- function(x, upper_limit = -3) {
  return( 
    data.frame(
      y = upper_limit,
      label = paste(format(round(mean(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE))
    )
  )
}

b <- ggplot(codonbias_cerat, (aes(x=chromosome, y=enc, fill=chromosome)))
b + xlab("") +
  ylab("ENC") +
  theme(axis.title.x = element_text(size=33)) +
  theme(axis.title.y = element_text(size=33))+
  theme(axis.text.x = element_text(size=25),
        axis.text.y = element_text(size=25)) +
  guides(fill=FALSE) + 
  theme(axis.text.x= element_text(size=33)) +
  geom_point(position=position_jitterdodge(jitter.width = 0.75, jitter.height = 0), 
             aes(group=chromosome, color=factor(chromosome)), alpha=0.2, shape=19, size=2) + 
  theme(legend.position="none") + 
  scale_colour_manual(values=c("#CC79A7", "#D55E00","#0072B2")) + 
  geom_boxplot(color=c("#CC79A7", "#D55E00","#0072B2"), fill="white", alpha=0, lwd=2) +
  geom_signif(comparisons=list(c("Autosomal", "U-linked")), map_signif_level=TRUE, 
              test="wilcox.test" ,y_position=74, size=1.5, textsize=10, color="gray40") +
  geom_signif(comparisons=list(c("Autosomal", "V-linked")), map_signif_level=TRUE, 
              test="wilcox.test", y_position=68, size=1.5, textsize=10, color="gray40") +
  geom_signif(comparisons=list(c("U-linked", "V-linked")), map_signif_level=TRUE,
              test="wilcox.test", y_position=63, size=1.5, textsize=10, color="gray40") +
  stat_summary(fun.data = stat_box_data, geom = "text", hjust = 0.5, 
               vjust = 0.000001, cex=10, color="gray40") +
  ylim(c(-3,75))

dev.off()


pairwise.wilcox.test(codonbias_cerat$enc, codonbias_cerat$chromosome, 
                     p.adjust.method = "BH")


############################ fop

png("Figures/fop_Dec062020.png", width = 8, height = 8, units = 'in', res = 1000)

stat_box_data <- function(x, upper_limit = -0.05) {
  return( 
    data.frame(
      y = upper_limit,
      label = paste(format(round(mean(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE))
    )
  )
}


b <- ggplot(codonbias_cerat, (aes(x=chromosome, y=fop, fill=chromosome)))
b + xlab("") +
  ylab("fop") +
  theme(axis.title.x = element_text(size=33)) +
  theme(axis.title.y = element_text(size=33))+
  theme(axis.text.x = element_text(size=25),
        axis.text.y = element_text(size=25)) + 
  guides(fill=FALSE) + 
  theme(axis.text.x= element_text(size=33)) +
  geom_point(position=position_jitterdodge(jitter.width = 0.75, jitter.height = 0), 
             aes(group=chromosome, color=factor(chromosome)), alpha=0.2, shape=19, size=2) + 
  theme(legend.position="none") + 
  scale_colour_manual(values=c("#CC79A7", "#D55E00","#0072B2")) + 
  geom_boxplot(color=c("#CC79A7", "#D55E00","#0072B2"), fill="white", alpha=0, lwd=2) +
  geom_signif(comparisons=list(c("Autosomal", "U-linked")), map_signif_level=TRUE, 
              test="wilcox.test" ,y_position=1.25, size=1.5, textsize=10, color="gray40") +
  geom_signif(comparisons=list(c("Autosomal", "V-linked")), map_signif_level=TRUE, 
              test="wilcox.test", y_position=1.1, size=1.5, textsize=10, color="gray40") +
  geom_signif(comparisons=list(c("U-linked", "V-linked")), map_signif_level=TRUE,
              test="wilcox.test", y_position=1, size=1.5, textsize=10, color="gray40") +
  stat_summary(fun.data = stat_box_data, geom = "text", hjust = 0.5, 
               vjust = 0.000001, cex=10, color="gray40") +
  ylim(c(-0.1,1.3))

dev.off()


pairwise.wilcox.test(codonbias_cerat$fop, codonbias_cerat$chromosome, 
                     p.adjust.method = "BH")

############################ gc3

png("Figures/gc3s_Dec062020.png", width = 8, height = 8, units = 'in', res = 1000)


stat_box_data <- function(x, upper_limit = -0.05) {
  return( 
    data.frame(
      y = upper_limit,
      label = paste(format(round(mean(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE))
    )
  )
}

b <- ggplot(codonbias_cerat, (aes(x=chromosome, y=gc3s, fill=chromosome)))
b + xlab("") +
  ylab("GC3s") +
  theme(axis.title.x = element_text(size=33)) +
  theme(axis.title.y = element_text(size=33))+
  theme(axis.text.x = element_text(size=25),
        axis.text.y = element_text(size=25)) +
  guides(fill=FALSE) + 
  theme(axis.text.x= element_text(size=33)) +
  geom_point(position=position_jitterdodge(jitter.width = 0.75, jitter.height = 0), 
             aes(group=chromosome, color=factor(chromosome)), alpha=0.2, shape=19, size=2) + 
  theme(legend.position="none") + 
  scale_colour_manual(values=c("#CC79A7", "#D55E00","#0072B2")) + 
  geom_boxplot(color=c("#CC79A7", "#D55E00","#0072B2"), fill="white", alpha=0, lwd=2) +
  geom_signif(comparisons=list(c("Autosomal", "U-linked")), map_signif_level=TRUE, 
              test="wilcox.test" ,y_position=1.25, size=1.5, textsize=10, color="gray40") +
  geom_signif(comparisons=list(c("Autosomal", "V-linked")), map_signif_level=TRUE, 
              test="wilcox.test", y_position=1.1, size=1.5, textsize=10, color="gray40") +
  geom_signif(comparisons=list(c("U-linked", "V-linked")), map_signif_level=TRUE,
              test="wilcox.test", y_position=1, size=1.5, textsize=10, color="gray40") +
  stat_summary(fun.data = stat_box_data, geom = "text", hjust = 0.5, 
               vjust = 0.000001, cex=10, color="gray40") +
  ylim(c(-0.1,1.3))

dev.off()


pairwise.wilcox.test(codonbias_cerat$gc3s, codonbias_cerat$chromosome, 
                     p.adjust.method = "BH")


########################## dN/dS


dNdS_data <- read.csv("MS_plotData/dnds_july2019.csv", header=TRUE)

png("Figures/dNdS_Mar152021.png", width = 8, height = 8, units = 'in', res = 1000)

stat_box_data <- function(x, upper_limit = -0.2) {
  return( 
    data.frame(
      y = upper_limit,
      label = paste(format(round(mean(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE))
    )
  )
}

b <- ggplot(dNdS_data, (aes(x = Sex, y=dNdS, fill=Sex)))
b + xlab("") +
  ylab("dN/dS") +
  theme(axis.title.x = element_text(size=33)) +
  theme(axis.title.y = element_text(size=33))+
  theme(axis.text.x = element_text(size=25),
        axis.text.y = element_text(size=25)) +
  guides(fill=FALSE) + 
  theme(axis.text.x= element_text(size=33)) +
  geom_point(position=position_jitterdodge(jitter.width = 0.75, jitter.height = 0), 
             aes(group=Sex, color=factor(Sex)), alpha=0.2, shape=19, size=2) + 
  theme(legend.position="none") + 
  scale_colour_manual(values=c("#CC79A7", "#D55E00","#0072B2")) + 
  geom_boxplot(color=c("#CC79A7", "#D55E00","#0072B2"), fill="white", alpha=0, lwd=2) +
  geom_signif(comparisons=list(c("Autosomal", "U-linked")), map_signif_level=TRUE, 
              test="wilcox.test" ,y_position=2, size=1.5, textsize=10, color="gray40") +
  geom_signif(comparisons=list(c("Autosomal", "V-linked")), map_signif_level=TRUE, 
              test="wilcox.test", y_position=1.7, size=1.5, textsize=10, color="gray40") +
  geom_signif(comparisons=list(c("U-linked", "V-linked")), map_signif_level=TRUE,
              test="wilcox.test", y_position=1.4, size=1.5, textsize=10, color="gray40") +
  stat_summary(fun.data = stat_box_data, geom = "text", hjust = 0.5, 
               vjust = 0.000001, cex=10, color="gray40") +
  ylim(c(-0.25,2.1))

dev.off()


pairwise.wilcox.test(dNdS_data$dNdS, dNdS_data$Sex, 
                     p.adjust.method = "BH")



############ Fig. 3A, sex chromosome evolution tree ############

library(ggtree)
# version 1.14.6
library(ggplot2)
# version 3.3.1
library(ape)
# version 5.3
library("phytools")
# version 0.7-20
library("devtools")
# version 2.2.2
library("svglite")
# version 1.2.3


tree_file <- read.tree("MS_plotData/sex_chrom_tree.tre")

genus <- c("Brachythecium", "Hylocomium", "Aulacomnium", "Syntrichia","Ceratodon", "Scouleria", "Physcomitrium", "Buxbaumia","Sphagnum",
           "Schistochila", "Bazzania", "Pellia", "Conocephalum","Lunularia", "Marchantia", "Sphaerocarpos", "Treubia","Hornworts","Tracheophytes")
species <- c("rivulare", "splendens", "palustre", "princeps","purpureus", "aquatica", "patens","aphylla", "palustre",
             "spp.", "trilobata", "endiviifolia", "conicum", "cruciata", "polymorpha", "texanus","lacunosa","","")
d <- data.frame(label = tree_file$tip.label, genus = genus,
                species = species)

ggsave("Figures/sex_chrom_tree.png", tree_plot, units="in", width=12, height=6, dpi=1000,
       device="png")

tree_plot <- ggtree(tree_file, branch.length = "none", color="black", size=1.25) %<+% d +
  geom_tiplab(size=6, color="black", aes(label=paste0('bolditalic(', genus, ')~bolditalic(', species, ')')), parse=T) +
  xlim(NA,15) +
  theme_tree("white") 
tree_plot

dev.off()

## Further editing, like adding the Ancestral Elements, done in Inkscape


#################### Fig. 3B, Ks plot on UV sex chromosomes ##################

library(karyoploteR)
# version 1.8.8

r40 <- read.table("MS_plotData/R40_genome_lengths_sex.txt", header=T)

custom.genome <- toGRanges(data.frame(chr=r40$Chromosome, start=r40$ChromStart, end=r40$ChromEnd))
kp <- plotKaryotype(genome=custom.genome,pin=8)
kp

data.ks <- read.table("MS_plotData/GG1_R40_Ks.txt", header = T)

data.ks.start.GR <- toGRanges(data.frame(chr=data.ks$Chromosome, start=data.ks$Start, end=data.ks$Stop))
data.ks.end.GR <- toGRanges(data.frame(chr=data.ks$Chromosome.1, start=data.ks$Start.1, end=data.ks$Stop.1))

data.ks.start.GR.bux <- toGRanges(data.frame(chr=data.ks[1,1], start=data.ks[1,2], end=data.ks[1,3]))
data.ks.end.GR.bux <- toGRanges(data.frame(chr=data.ks[1,4], start=data.ks[1,5], end=data.ks[1,6]))

data.ks.start.GR.pleuro <- toGRanges(data.frame(chr=data.ks[c(2:4),1], start=data.ks[c(2:4),2], end=data.ks[c(2:4),3]))
data.ks.end.GR.pleuro <- toGRanges(data.frame(chr=data.ks[c(2:4),4], start=data.ks[c(2:4),5], end=data.ks[c(2:4),6]))

data.ks.start.GR.scou <- toGRanges(data.frame(chr=data.ks[c(5:13),1], start=data.ks[c(5:13),2], end=data.ks[c(5:13),3]))
data.ks.end.GR.scou <- toGRanges(data.frame(chr=data.ks[c(5:13),4], start=data.ks[c(5:13),5], end=data.ks[c(5:13),6]))

data.ks.start.GR.recent <- toGRanges(data.frame(chr=data.ks[c(296:336),1], start=data.ks[c(296:336),2], end=data.ks[c(296:336),3]))
data.ks.end.GR.recent <- toGRanges(data.frame(chr=data.ks[c(296:336),4], start=data.ks[c(296:336),5], end=data.ks[c(296:336),6]))


svg("Figures/Ks_links.svg", width = 10, height = 8)

custom.genome <- toGRanges(data.frame(chr=r40$Chromosome, start=r40$ChromStart, end=r40$ChromEnd))
kp <- plotKaryotype(genome=custom.genome,pin=8,chromosomes = c("V","U"), plot.type = 2)
kp

kpPoints(kp,chr=data.ks$Chromosome.1,x=data.ks$Start.1,y=data.ks$Ks,ymax=3,cex=0.75)


kpAxis(kp, numticks = 6, side = 2, cex = 1, ymax=3, col="black", 
       chromosomes = "U",data.panel=1)
kpAxis(kp, numticks = 6, side = 1, cex = 1, ymax=3, col="black", 
       chromosomes = "U",data.panel=1)


kpPlotLinks(kp, data=data.ks.start.GR, data2=data.ks.end.GR,lwd=2, col=c("antiquewhite"), y=-0.2)
kpPlotLinks(kp, data=data.ks.start.GR.recent, data2=data.ks.end.GR.recent,lwd=2, col=c("darkgoldenrod"), y=-0.2)

kpPlotLinks(kp, data=data.ks.start.GR.bux, data2=data.ks.end.GR.bux,lwd=5, col="#ff8080ff", y=-0.2)
kpPlotLinks(kp, data=data.ks.start.GR.pleuro, data2=data.ks.end.GR.pleuro,lwd=5, col="lightSalmon1", y=-0.2)
kpPlotLinks(kp, data=data.ks.start.GR.scou, data2=data.ks.end.GR.scou,lwd=5, col="gold1", y=-0.2)

dev.off()

## Further editing, such as matching the color palette, done in Inkscape


################## Supplementary figures ##################

################## Fig. S4, gene trees w/ancient sex-linkage ##################

library(ggtree)
# version 1.14.6
library(ggplot2)
# version 3.3.1
library(ape)
# version 5.3
library("phytools")
# version 0.7-20
library("devtools")
# version 2.2.2
library("svglite")
# version 1.2.3

## example diagram, showing a gene sex-linked only in Ceratodon

tree_file <- read.tree("MS_plotData/sex_chrom_tree_exampleCerat.tre")


genus <- c("Brachythecium", "Hylocomium", "Aulacomnium", "Syntrichia","Ceratodon","Ceratodon", "Scouleria", "Buxbaumia","Sphagnum")
species <- c("rivulare", "splendens", "palustre", "princeps","purpureus", "purpureus", "aquatica","aphylla", "palustre")
sex <- c(" ", " ", " ", " ","female", "male", " ", " "," ")

d <- data.frame(label = tree_file$tip.label, genus = genus,
                species = species, sex = sex)

ggsave("Figures/sex_chrom_tree_exampleCerat.png", tree_plot, units="in", width=6, height=6, dpi=1000,
       device="png")

tree_plot <- rotate(tree_file,19)
tree_plot <- ggtree(tree_plot, branch.length = "none", color="black", size=1.25) %<+% d +
  geom_tiplab(size=6, color="black", aes(label=paste0('bolditalic(', genus, ')~bolditalic(', species, ')~bold(', sex, ')')), parse=T) +
  xlim(NA,40) +
  theme_tree("white") +
  geom_hilight(node=5, fill="#D55E00", alpha=0.5, extend=30) +  
  geom_hilight(node=6, fill="#0072B2", alpha=0.5, extend=30)
tree_plot

dev.off()

## example diagram, showing a gene sex-linked a long time ago so is shared across many species

tree_file <- read.tree("MS_plotData/sex_chrom_tree_exampleAncient.tre")


genus <- c("Brachythecium", "Hylocomium", "Aulacomnium", "Syntrichia","Ceratodon", "Scouleria", "Buxbaumia","Brachythecium", "Hylocomium", "Aulacomnium", "Syntrichia","Ceratodon","Scouleria", "Buxbaumia","Sphagnum")
species <- c("rivulare", "splendens", "palustre", "princeps","purpureus", "aquatica", "aphylla","rivulare", "splendens", "palustre", "princeps","purpureus", "aquatica", "aphylla", "palustre")
sex <- c("female", "female", "female", "female","female", "female", "female","male","male", "male", "male","male", "male", "male"," ")

d <- data.frame(label = tree_file$tip.label, genus = genus,
                species = species, sex = sex) 

ggsave("Figures/sex_chrom_tree_exampleAncient.png", tree_plot, units="in", width=6, height=6, dpi=1000,
       device="png")

tree_plot <- rotate(tree_file,17)
tree_plot <- ggtree(tree_plot, branch.length = "none", color="black", size=1.25) %<+% d +
  geom_tiplab(size=6, color="black", aes(label=paste0('bolditalic(', genus, ')~bolditalic(', species, ')~bold(', sex, ')')), parse=T) +
  xlim(NA,40) +
  theme_tree("white") +
  geom_hilight(node=18, fill="#D55E00", alpha=0.5, extend=30) +  
  geom_hilight(node=26, fill="#0072B2", alpha=0.5, extend=30)
tree_plot

dev.off()


## buxbaumia coalesence ie the oldest sex-linked gene in mosses
tree_file <- read.raxml("MS_plotData/RAxML_bipartitionsBranchLabels.cluster402.tree")

##get node numbers
tree_plot <- ggtree(tree_file, color="black", size=1) + 
  geom_tiplab(size=2, color="black") +
  geom_nodelab(aes(label=node),size=2, color="red")+
  theme_tree("white") 
tree_plot


png("Figures/bux.png", width = 5, height = 10, units = 'in', res = 1000)

tree_plot <- ggtree(tree_file, color="black", size=1) + 
  geom_tiplab(size=2, color="black") +
  geom_nodelab(aes(label=bootstrap),size=2, color="red") +
  theme_tree("white") +
  xlim(0,2)

tree_plot <- collapse(tree_plot,node=194) + 
  geom_point2(aes(subset=(node==194)), shape=23, size=5, fill='black')


tree_plot  +  
  geom_hilight(node=273, fill="#D55E00", alpha=0.5, extend=1) +
  geom_hilight(node=240, fill="#0072B2", alpha=0.5, extend=1) +
  geom_treescale(color="gray50", fontsize=5,x=0, y=45)

dev.off()



## liverwort leafy/thallose split, ie one of the oldest sex-linked genes in liverworts
tree_file <- read.raxml("MS_plotData/RAxML_bipartitionsBranchLabels.cluster17.tree")

##get node numbers
tree_plot <- ggtree(tree_file, color="black", size=1) + 
  geom_tiplab(size=2, color="black") +
  geom_nodelab(aes(label=node),size=2, color="red")+
  theme_tree("white") 
tree_plot


png("Figures/leafythallose.png", width = 5, height = 10, units = 'in', res = 1000)

tree_plot <- ggtree(tree_file, color="black", size=1) + 
  geom_tiplab(size=2, color="black") +
  geom_nodelab(aes(label=bootstrap),size=2, color="red") +
  theme_tree("white") +
  xlim(0,1.75)

tree_plot <- collapse(tree_plot,node=443) + 
  geom_point2(aes(subset=(node==443)), shape=23, size=5, fill='black')

tree_plot  +  
  geom_hilight(node=393, fill="#D55E00", alpha=0.5, extend=1) +  
  geom_hilight(node=373, fill="#0072B2", alpha=0.5, extend=1)  +
  geom_treescale(color="gray50", fontsize=5,x=1.65, y=45)

dev.off()


## marchantia and ceratodon ABC1 V-linked gene


################## Fig. S5, gene expression ##################

library("DESeq2")
#DESeq2 v.1.22.2

### differential expression analysis
countData <- as.matrix(read.csv("MS_plotData/gene_count_matrix.csv", row.names="gene_id"))

colData <- read.csv("MS_plotData/ceratodon_diff_expr_pheno.csv", sep=",", row.names=1)

dds <- DESeqDataSetFromMatrix(countData = countData,
                              colData = colData, design = ~ Stage_Sex + Population)

dds <- DESeq(dds)

res.gam <- results(dds, contrast=c("Stage_Sex","Gametophore_Female","Gametophore_Male"))
res.proto <- results(dds, contrast=c("Stage_Sex","Protonema_Female","Protonema_Male"))
res.fe <- results(dds, contrast=c("Stage_Sex","Gametophore_Female","Protonema_Female"))
res.ma <- results(dds, contrast=c("Stage_Sex","Gametophore_Male","Protonema_Male"))

(resOrdered.gam <- res.gam[order(res.gam$padj), ]) 
(resOrdered.proto <- res.proto[order(res.proto$padj), ])
(resOrdered.fe <- res.fe[order(res.fe$padj), ]) 
(resOrdered.ma <- res.ma[order(res.ma$padj), ])


write.csv(as.data.frame(resOrdered.gam), file="Tables/results_btwnSex_gam_r40Ref.csv")
write.csv(as.data.frame(resOrdered.proto), file="Tables/results_btwnSex_proto_r40Ref.csv")
write.csv(as.data.frame(resOrdered.fe), file="Tables/results_btwnStage_females_r40ref.csv")
write.csv(as.data.frame(resOrdered.ma), file="Tables/results_btwnStage_males_r40ref.csv")

### heat map figures
# heat maps code was modified from (https://genviz.org/module-04-expression/0004/02/01/DifferentialExpression/)

### differentially expressed genes

library("DESeq2")
#DESeq2 v.1.22.2
library("ggdendro")
# version 0.1.22
library("ggplot2")
# version 3.3.1
library("grid")
# version 3.5.3
library("gridExtra")
# version 2.3
library("gtable")
# version 0.3.0
library("reshape2")
# version 1.4.3
library("viridis")
# version 0.5.1

countData <- as.matrix(read.csv("MS_plotData/gene_count_matrix.csv", row.names="gene_id"))

colData <- read.csv("MS_plotData/ceratodon_diff_expr_pheno.csv", sep=",", row.names=1)

dds <- DESeqDataSetFromMatrix(countData = countData,
                              colData = colData, design = ~ Stage_Sex + Population)

deseq2Data <- DESeq(dds)
deseq2Data <- estimateSizeFactors(deseq2Data)
deseq2Results <- results(deseq2Data, contrast=c("Stage_Sex","Gametophore_Female","Gametophore_Male"))
deseq2ResDF <- as.data.frame(deseq2Results)
deseq2VST <- vst(deseq2Data)
deseq2VST <- assay(deseq2VST)
deseq2VST <- as.data.frame(deseq2VST)
deseq2VST$Gene <- rownames(deseq2VST)

# read in list of genes to plot
DEGs <- read.csv("MS_plotData/ceratodon_DEGs.csv", header=FALSE)

deseq2VST <- deseq2VST[deseq2VST$Gene %in% DEGs$V1,]


deseq2VST <- melt(deseq2VST, id.vars=c("Gene"))
deseq2VSTMatrix <- dcast(deseq2VST, Gene ~ variable)
rownames(deseq2VSTMatrix) <- deseq2VSTMatrix$Gene
deseq2VSTMatrix$Gene <- NULL

distanceGene <- dist(deseq2VSTMatrix)
distanceSample <- dist(t(deseq2VSTMatrix))

clusterGene <- hclust(distanceGene, method="average")
clusterSample <- hclust(distanceSample, method="average")


sampleModel <- as.dendrogram(clusterSample)
sampleDendrogramData <- segment(dendro_data(sampleModel, type = "rectangle"))
sampleDendrogram <- ggplot(sampleDendrogramData) + 
  geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + theme_dendro()

deseq2VST$variable <- factor(deseq2VST$variable, 
                             levels=clusterSample$labels[clusterSample$order])

heatmap <- ggplot(deseq2VST, aes(x=variable, y=Gene, fill=value)) +
  geom_raster() + scale_fill_viridis(trans="sqrt") + 
  theme(axis.text.x=element_text(angle=65, hjust=1), axis.text.y=element_blank(), 
        axis.ticks.y=element_blank()) + xlab("Sample") +
  theme(plot.margin=unit(c(1,1,1.5,1.2),"cm"))

sampleDendrogram_1 <- sampleDendrogram + scale_x_continuous(expand=c(.0085, .0085)) + scale_y_continuous(expand=c(0, 0))
heatmap_1 <- heatmap + scale_x_discrete(expand=c(0, 0)) + scale_y_discrete(expand=c(0, 0))

sampleDendrogramGrob <- ggplotGrob(sampleDendrogram_1)
heatmapGrob <- ggplotGrob(heatmap_1)

sampleDendrogramGrob <- gtable_add_cols(sampleDendrogramGrob, heatmapGrob$widths[7], 6)
sampleDendrogramGrob <- gtable_add_cols(sampleDendrogramGrob, heatmapGrob$widths[8], 7)

maxWidth <- unit.pmax(sampleDendrogramGrob$widths, heatmapGrob$widths)
sampleDendrogramGrob$widths <- as.list(maxWidth)
heatmapGrob$widths <- as.list(maxWidth)

finalGrob <- arrangeGrob(sampleDendrogramGrob, heatmapGrob, ncol=1, heights=c(2,5))
colData2 <- as.data.frame(colData)
colData2$names <- rownames(colData2)

colData2$names <- factor(colData2$names, levels=clusterSample$labels[clusterSample$order])
colData2 <- colData2[colData2$names %in% deseq2VST$variable,]

sample_stage <- ggplot(colData2, 
                       aes(x=names, y=1, 
                           fill=Stage)) + 
  geom_tile() + scale_x_discrete(expand=c(0, 0)) + 
  scale_y_discrete(expand=c(0, 0)) + 
  scale_fill_manual(name="Stage", values=c("forestgreen","green")) +
  theme_void() + theme(legend.position=c(1.1,2))


sample_sex <- ggplot(colData2, 
                     aes(x=names, y=1, 
                         fill=Sex)) + 
  geom_tile() + scale_x_discrete(expand=c(0, 0)) + 
  scale_y_discrete(expand=c(0, 0)) + 
  scale_fill_manual(name="Sex", values=c("salmon","turquoise")) +
  theme_void() + theme(legend.position=c(1.075,5))



sample_stageGrob <- ggplotGrob(sample_stage)
sample_sexGrob <- ggplotGrob(sample_sex)


maxWidth <- unit.pmax(sampleDendrogramGrob$widths, heatmapGrob$widths, sample_sexGrob$widths, sample_stageGrob$widths)
sampleDendrogramGrob$widths <- as.list(maxWidth)
heatmapGrob$widths <- as.list(maxWidth)
sample_sexGrob$widths <- as.list(maxWidth)
sample_stageGrob$widths <- as.list(maxWidth)


# Final plot for sex-biased genes
finalGrob <- arrangeGrob(sampleDendrogramGrob, sample_sexGrob, sample_stageGrob, heatmapGrob, 
                         ncol=1, heights=c(1.5,0.25,0.25,8))

png("Figures/ceratodon_DEGs_heatmap.png", width = 8, height = 8, units = 'in', res = 1000)

grid.draw(finalGrob)

dev.off()


### U-linked genes with putative reproductive functions

library("DESeq2")
#DESeq2 v.1.22.2
library("ggdendro")
# version 0.1.22
library("ggplot2")
# version 3.3.1
library("grid")
# version 3.5.3
library("gridExtra")
# version 2.3
library("gtable")
# version 0.3.0
library("reshape2")
# version 1.4.3
library("viridis")
# version 0.5.1

countData <- as.matrix(read.csv("MS_plotData/gene_count_matrix.csv", row.names="gene_id"))

colData <- read.csv("MS_plotData/ceratodon_diff_expr_pheno.csv", sep=",", row.names=1)

dds <- DESeqDataSetFromMatrix(countData = countData,
                              colData = colData, design = ~ Stage_Sex + Population)

deseq2Data <- DESeq(dds)
deseq2Data <- estimateSizeFactors(deseq2Data)
deseq2Results <- results(deseq2Data, contrast=c("Stage_Sex","Gametophore_Female","Gametophore_Male"))
deseq2ResDF <- as.data.frame(deseq2Results)
deseq2VST <- vst(deseq2Data)
deseq2VST <- assay(deseq2VST)
deseq2VST <- as.data.frame(deseq2VST)
deseq2VST$Gene <- rownames(deseq2VST)

# read in list of genes to plot
DEGs <- read.csv("MS_plotData/ceratodon_U_repro_genes.csv", header=FALSE)

deseq2VST <- deseq2VST[deseq2VST$Gene %in% DEGs$V1,]

## use for female only genes
deseq2VST <- subset(deseq2VST, select = -c(REN_1.21_I190m_Gam_1,REN_1.21_I190m_Gam_2,REN_1.21_I190m_Gam_3,REN_1.21_I190m_Proto_1,REN_1.21_I190m_Proto_2,REN_1.21_I190m_Proto_3,Alsk_10.5_B190m_Gam_1,Alsk_10.5_B190m_Gam_2,Alsk_10.5_B190m_Gam_3,Alsk_10.5_B190m_Proto_1,Alsk_10.5_B190m_Proto_2,Alsk_10.5_B190m_Proto_3,POR_P8.P9_M270m_Gam_1,POR_P8.P9_M270m_Gam_2,POR_P8.P9_M270m_Gam_3,POR_P8.P9_M270m_Proto_1,POR_P8.P9_M270m_Proto_2,POR_P8.P9_M270m_Proto_3))

deseq2VST <- melt(deseq2VST, id.vars=c("Gene"))
deseq2VSTMatrix <- dcast(deseq2VST, Gene ~ variable)
rownames(deseq2VSTMatrix) <- deseq2VSTMatrix$Gene
deseq2VSTMatrix$Gene <- NULL

distanceGene <- dist(deseq2VSTMatrix)
distanceSample <- dist(t(deseq2VSTMatrix))

clusterGene <- hclust(distanceGene, method="average")
clusterSample <- hclust(distanceSample, method="average")


sampleModel <- as.dendrogram(clusterSample)
sampleDendrogramData <- segment(dendro_data(sampleModel, type = "rectangle"))
sampleDendrogram <- ggplot(sampleDendrogramData) + 
  geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + theme_dendro()

deseq2VST$variable <- factor(deseq2VST$variable, 
                             levels=clusterSample$labels[clusterSample$order])

heatmap <- ggplot(deseq2VST, aes(x=variable, y=Gene, fill=value)) +
  geom_raster() + scale_fill_viridis(trans="sqrt") + 
  theme(axis.text.x=element_text(angle=65, hjust=1), axis.text.y=element_blank(), 
        axis.ticks.y=element_blank()) + xlab("Sample") +
  theme(plot.margin=unit(c(1,1,1.5,1.2),"cm"))

sampleDendrogram_1 <- sampleDendrogram + scale_x_continuous(expand=c(.0085, .0085)) + scale_y_continuous(expand=c(0, 0))
heatmap_1 <- heatmap + scale_x_discrete(expand=c(0, 0)) + scale_y_discrete(expand=c(0, 0))

sampleDendrogramGrob <- ggplotGrob(sampleDendrogram_1)
heatmapGrob <- ggplotGrob(heatmap_1)

sampleDendrogramGrob <- gtable_add_cols(sampleDendrogramGrob, heatmapGrob$widths[7], 6)
sampleDendrogramGrob <- gtable_add_cols(sampleDendrogramGrob, heatmapGrob$widths[8], 7)

maxWidth <- unit.pmax(sampleDendrogramGrob$widths, heatmapGrob$widths)
sampleDendrogramGrob$widths <- as.list(maxWidth)
heatmapGrob$widths <- as.list(maxWidth)

finalGrob <- arrangeGrob(sampleDendrogramGrob, heatmapGrob, ncol=1, heights=c(2,5))
colData2 <- as.data.frame(colData)
colData2$names <- rownames(colData2)

colData2$names <- factor(colData2$names, levels=clusterSample$labels[clusterSample$order])
colData2 <- colData2[colData2$names %in% deseq2VST$variable,]

sample_stage <- ggplot(colData2, 
                       aes(x=names, y=1, 
                           fill=Stage)) + 
  geom_tile() + scale_x_discrete(expand=c(0, 0)) + 
  scale_y_discrete(expand=c(0, 0)) + 
  scale_fill_manual(name="Stage", values=c("forestgreen","green")) +
  theme_void() + theme(legend.position=c(1.1,2))


sample_sex <- ggplot(colData2, 
                     aes(x=names, y=1, 
                         fill=Sex)) + 
  geom_tile() + scale_x_discrete(expand=c(0, 0)) + 
  scale_y_discrete(expand=c(0, 0)) + 
  scale_fill_manual(name="Sex", values=c("salmon","turquoise")) +
  theme_void() + theme(legend.position=c(1.075,5))



sample_stageGrob <- ggplotGrob(sample_stage)
sample_sexGrob <- ggplotGrob(sample_sex)


maxWidth <- unit.pmax(sampleDendrogramGrob$widths, heatmapGrob$widths, sample_sexGrob$widths, sample_stageGrob$widths)
sampleDendrogramGrob$widths <- as.list(maxWidth)
heatmapGrob$widths <- as.list(maxWidth)
sample_sexGrob$widths <- as.list(maxWidth)
sample_stageGrob$widths <- as.list(maxWidth)


## Final plot for U-specific genes
finalGrob <- arrangeGrob(sampleDendrogramGrob, sample_stageGrob, heatmapGrob, 
                         ncol=1, heights=c(1.5,0.25,8))
png("Figures/ceratodon_U_reproGenes_heatmap.png", width = 8, height = 8, units = 'in', res = 1000)

grid.draw(finalGrob)

dev.off()


### V-linked genes with putative reproductive functions

library("DESeq2")
#DESeq2 v.1.22.2
library("ggdendro")
# version 0.1.22
library("ggplot2")
# version 3.3.1
library("grid")
# version 3.5.3
library("gridExtra")
# version 2.3
library("gtable")
# version 0.3.0
library("reshape2")
# version 1.4.3
library("viridis")
# version 0.5.1

countData <- as.matrix(read.csv("MS_plotData/gene_count_matrix.csv", row.names="gene_id"))

colData <- read.csv("MS_plotData/ceratodon_diff_expr_pheno.csv", sep=",", row.names=1)

dds <- DESeqDataSetFromMatrix(countData = countData,
                              colData = colData, design = ~ Stage_Sex + Population)

deseq2Data <- DESeq(dds)
deseq2Data <- estimateSizeFactors(deseq2Data)
deseq2Results <- results(deseq2Data, contrast=c("Stage_Sex","Gametophore_Female","Gametophore_Male"))
deseq2ResDF <- as.data.frame(deseq2Results)
deseq2VST <- vst(deseq2Data)
deseq2VST <- assay(deseq2VST)
deseq2VST <- as.data.frame(deseq2VST)
deseq2VST$Gene <- rownames(deseq2VST)

# read in list of genes to plot
DEGs <- read.csv("MS_plotData/ceratodon_V_repro_genes.csv", header=FALSE)

deseq2VST <- deseq2VST[deseq2VST$Gene %in% DEGs$V1,]

## use for male only genes
deseq2VST <- subset(deseq2VST, select = -c(Alsk_10.5_B150f_Gam_1,Alsk_10.5_B150f_Gam_2,Alsk_10.5_B150f_Gam_3,Alsk_10.5_B150f_Proto_1,Alsk_10.5_B150f_Proto_2,Alsk_10.5_B150f_Proto_3,POR_P8.P9_M280f_Gam_1,POR_P8.P9_M280f_Gam_2,POR_P8.P9_M280f_Gam_3,POR_P8.P9_M280f_Proto_1,POR_P8.P9_M280f_Proto_2,POR_P8.P9_M280f_Proto_3,REN_1.21_I180f_Gam_1,REN_1.21_I180f_Gam_2,REN_1.21_I180f_Gam_3,REN_1.21_I180f_Proto_1,REN_1.21_I180f_Proto_2,REN_1.21_I180f_Proto_3))

deseq2VST <- melt(deseq2VST, id.vars=c("Gene"))
deseq2VSTMatrix <- dcast(deseq2VST, Gene ~ variable)
rownames(deseq2VSTMatrix) <- deseq2VSTMatrix$Gene
deseq2VSTMatrix$Gene <- NULL

distanceGene <- dist(deseq2VSTMatrix)
distanceSample <- dist(t(deseq2VSTMatrix))

clusterGene <- hclust(distanceGene, method="average")
clusterSample <- hclust(distanceSample, method="average")


sampleModel <- as.dendrogram(clusterSample)
sampleDendrogramData <- segment(dendro_data(sampleModel, type = "rectangle"))
sampleDendrogram <- ggplot(sampleDendrogramData) + 
  geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + theme_dendro()

deseq2VST$variable <- factor(deseq2VST$variable, 
                             levels=clusterSample$labels[clusterSample$order])

heatmap <- ggplot(deseq2VST, aes(x=variable, y=Gene, fill=value)) +
  geom_raster() + scale_fill_viridis(trans="sqrt") + 
  theme(axis.text.x=element_text(angle=65, hjust=1), axis.text.y=element_blank(), 
        axis.ticks.y=element_blank()) + xlab("Sample") +
  theme(plot.margin=unit(c(1,1,1.5,1.2),"cm"))

sampleDendrogram_1 <- sampleDendrogram + scale_x_continuous(expand=c(.0085, .0085)) + scale_y_continuous(expand=c(0, 0))
heatmap_1 <- heatmap + scale_x_discrete(expand=c(0, 0)) + scale_y_discrete(expand=c(0, 0))

sampleDendrogramGrob <- ggplotGrob(sampleDendrogram_1)
heatmapGrob <- ggplotGrob(heatmap_1)

sampleDendrogramGrob <- gtable_add_cols(sampleDendrogramGrob, heatmapGrob$widths[7], 6)
sampleDendrogramGrob <- gtable_add_cols(sampleDendrogramGrob, heatmapGrob$widths[8], 7)

maxWidth <- unit.pmax(sampleDendrogramGrob$widths, heatmapGrob$widths)
sampleDendrogramGrob$widths <- as.list(maxWidth)
heatmapGrob$widths <- as.list(maxWidth)

finalGrob <- arrangeGrob(sampleDendrogramGrob, heatmapGrob, ncol=1, heights=c(2,5))
colData2 <- as.data.frame(colData)
colData2$names <- rownames(colData2)

colData2$names <- factor(colData2$names, levels=clusterSample$labels[clusterSample$order])
colData2 <- colData2[colData2$names %in% deseq2VST$variable,]

sample_stage <- ggplot(colData2, 
                       aes(x=names, y=1, 
                           fill=Stage)) + 
  geom_tile() + scale_x_discrete(expand=c(0, 0)) + 
  scale_y_discrete(expand=c(0, 0)) + 
  scale_fill_manual(name="Stage", values=c("forestgreen","green")) +
  theme_void() + theme(legend.position=c(1.1,2))


sample_sex <- ggplot(colData2, 
                     aes(x=names, y=1, 
                         fill=Sex)) + 
  geom_tile() + scale_x_discrete(expand=c(0, 0)) + 
  scale_y_discrete(expand=c(0, 0)) + 
  scale_fill_manual(name="Sex", values=c("salmon","turquoise")) +
  theme_void() + theme(legend.position=c(1.075,5))



sample_stageGrob <- ggplotGrob(sample_stage)
sample_sexGrob <- ggplotGrob(sample_sex)


maxWidth <- unit.pmax(sampleDendrogramGrob$widths, heatmapGrob$widths, sample_sexGrob$widths, sample_stageGrob$widths)
sampleDendrogramGrob$widths <- as.list(maxWidth)
heatmapGrob$widths <- as.list(maxWidth)
sample_sexGrob$widths <- as.list(maxWidth)
sample_stageGrob$widths <- as.list(maxWidth)

## Final plot for V-specific genes
finalGrob <- arrangeGrob(sampleDendrogramGrob, sample_stageGrob, heatmapGrob, 
                         ncol=1, heights=c(1.5,0.25,8))

png("Figures/ceratodon_V_reproGenes_heatmap.png", width = 8, height = 8, units = 'in', res = 1000)

grid.draw(finalGrob)

dev.off()


tree_file <- read.raxml("MS_plotData/RAxML_bipartitionsBranchLabels.cluster224.tree")

##get node numbers
tree_plot <- ggtree(tree_file, color="black", size=1) + 
  geom_tiplab(size=2, color="black") +
  geom_nodelab(aes(label=node),size=2, color="red")+
  theme_tree("white") 
tree_plot


png("Figures/marchCeratV.png", width = 5, height = 10, units = 'in', res = 1000)

tree_plot <- ggtree(tree_file, color="black", size=1) + 
  geom_tiplab(size=2, color="black") +
  geom_nodelab(aes(label=bootstrap),size=2, color="red") +
  theme_tree("white") +
  xlim(0,3)

tree_plot <- collapse(tree_plot,node=435) + 
  geom_point2(aes(subset=(node==435)), shape=23, size=5, fill='black')
tree_plot <- collapse(tree_plot,node=432) + 
  geom_point2(aes(subset=(node==432)), shape=23, size=5, fill='black')
tree_plot <- collapse(tree_plot,node=336) + 
  geom_point2(aes(subset=(node==336)), shape=23, size=5, fill='black')
tree_plot <- collapse(tree_plot,node=292) + 
  geom_point2(aes(subset=(node==292)), shape=23, size=5, fill='black')

tree_plot +  
  geom_hilight(node=423, fill="#0072B2", alpha=0.5, extend=1.3) +  
  geom_hilight(node=388, fill="#0072B2", alpha=0.5, extend=1.3) +  
  geom_hilight(node=317, fill="#CC79A7", alpha=0.5, extend=1.3) +
  geom_treescale(color="gray50", fontsize=5,x=0, y=45)

dev.off()

################## Fig. S7, gene trees w/interesting functions ##################

## marchantia cis acting bi direction sexual dimorphism swtich that is sex-linked in ceratodon
tree_file <- read.raxml("MS_plotData/RAxML_bipartitionsBranchLabels.cluster457.tree" )

##get node numbers
tree_plot <- ggtree(tree_file, color="black", size=1) + 
  geom_tiplab(size=2, color="black") +
  geom_nodelab(aes(label=node),size=2, color="red") +
  theme_tree("white") 
tree_plot


png("Figures/marchSwitch.png", width = 5, height = 10, units = 'in', res = 1000)

tree_plot <- ggtree(tree_file, color="black", size=1) + 
  geom_tiplab(size=2, color="black") +
  geom_nodelab(aes(label=bootstrap),size=2, color="red") +
  theme_tree("white") +
  xlim(0,2.5)

tree_plot <- collapse(tree_plot,node=107) + 
  geom_point2(aes(subset=(node==107)), shape=23, size=5, fill='black')
tree_plot <- collapse(tree_plot,node=96) + 
  geom_point2(aes(subset=(node==96)), shape=23, size=5, fill='black')


tree_plot +  
  geom_hilight(node=154, fill="#0072B2", alpha=0.5, extend=1.1) +  
  geom_hilight(node=157, fill="#D55E00", alpha=0.5, extend=1.1) +  
  geom_hilight(node=46, fill="#CC79A7", alpha=0.5, extend=1.1) +
  geom_treescale(color="gray50", fontsize=5,x=0, y=45)

dev.off()


## CRINKLY4
tree_file <- read.raxml("MS_plotData/RAxML_bipartitionsBranchLabels.cluster582.tree")

##get node numbers
tree_plot <- ggtree(tree_file, color="black", size=1) + 
  geom_tiplab(size=2, color="black") +
  geom_nodelab(aes(label=node),size=2, color="red") +
  theme_tree("white")
tree_plot


png("Figures/crinkly4.png", width = 5, height = 10, units = 'in', res = 1000)

tree_plot <- ggtree(tree_file, color="black", size=1) + 
  geom_tiplab(size=2, color="black") +
  geom_nodelab(aes(label=bootstrap),size=2, color="red")+
  theme_tree("white") +
  xlim(0,3.3)

tree_plot <- collapse(tree_plot,node=142) + 
  geom_point2(aes(subset=(node==142)), shape=23, size=5, fill='black')
tree_plot <- collapse(tree_plot,node=268) + 
  geom_point2(aes(subset=(node==268)), shape=23, size=5, fill='black')
tree_plot <- collapse(tree_plot,node=242) + 
  geom_point2(aes(subset=(node==242)), shape=23, size=5, fill='black')
tree_plot <- collapse(tree_plot,node=188) + 
  geom_point2(aes(subset=(node==188)), shape=23, size=5, fill='black')

tree_plot +  
  geom_hilight(node=229, fill="#0072B2", alpha=0.5, extend=1.4) +  
  geom_hilight(node=233, fill="#D55E00", alpha=0.5, extend=1.4) +  
  geom_hilight(node=81, fill="#CC79A7", alpha=0.5, extend=1.4) +
  geom_treescale(color="gray50", fontsize=5,x=0, y=45)

dev.off()

## U-linked RWP-RK
tree_file <- read.raxml("MS_plotData/RAxML_bipartitionsBranchLabels.cluster423.tree" )

##get node numbers
tree_plot <- ggtree(tree_file, color="black", size=1) + 
  geom_tiplab(size=2, color="black") +
  geom_nodelab(aes(label=node),size=2, color="red") +
  theme_tree("white")
tree_plot


png("Figures/rwprk.png", width = 5, height = 10, units = 'in', res = 1000)

tree_plot <- ggtree(tree_file, color="black", size=1) + 
  geom_tiplab(size=2, color="black") +
  geom_nodelab(aes(label=bootstrap),size=2, color="red") +
  theme_tree("white") +
  xlim(0,2.1)

tree_plot +  
  geom_hilight(node=196, fill="#D55E00", alpha=0.5, extend=1) +  
  geom_hilight(node=10, fill="#CC79A7", alpha=0.5, extend=1) +
  geom_treescale(color="gray50", fontsize=5,x=0, y=45)

dev.off()

