In [2]:
library(tidyr)
library(ggplot2)
library(zoo)
library(RColorBrewer)
library(dplyr)
library(lubridate)
Warning message:
“package ‘tidyr’ was built under R version 3.6.2”
Registered S3 methods overwritten by 'ggplot2':
  method         from 
  [.quosures     rlang
  c.quosures     rlang
  print.quosures rlang

Warning message:
“package ‘zoo’ was built under R version 3.6.2”

Attaching package: ‘zoo’


The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric



Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Warning message:
“package ‘lubridate’ was built under R version 3.6.2”

Attaching package: ‘lubridate’


The following objects are masked from ‘package:dplyr’:

    intersect, setdiff, union


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union


Process the Data

In [42]:
#read in the pos data
pos <- read.csv("Table_S4_Concentrations_Identified_Pharmaceuticals_PositiveMode_05052021_v2.csv", header=T, stringsAsFactors = F) #pos_Concentration_Pharmaceuticals_LuxEau_20201231.csv
pos$mode <- "pos"
head(pos)
dim(pos)
A data.frame: 6 × 96
RTm.zChemicalX2019_April_Loc01X2019_April_Loc02X2019_April_Loc03X2019_April_Loc04X2019_April_Loc05X2019_April_Loc06X2019_April_Loc07X2020_July_Loc11X2020_July_Loc12X2020_July_Loc13X2020_August_Loc01X2020_August_Loc04X2020_August_Loc10X2020_August_Loc11X2020_August_Loc12X2020_August_Loc13mode
<dbl><dbl><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr>
14.86253.09703-hydroxycarbamazepinebelow QR 6.902 0.964 3.738 4.004 5.61 below QR1.226 1.326 1.602 51.398 3.796 0.062 0.28 1.348 3.07 pos
8.05152.0707Acetaminophen 9.458 17.046 12.178 22.854 30.728 below QR below QRbelow QR below QR below QR 14.486 below QR below QR below QR below QR below QR pos
10.65370.1795Amisulpride 7.722 8.472 0.376 6.224 2.074 2.694 0.05 0.946 2.58 8.104 108.204 6.612 1.146 2.016 2.982 8.1 pos
15.75278.1903Amitriptyline 0.19251560.1425836below QR 0.01442480.072124 0.0588088below QRbelow QR below QR below QR below QR below QR below QR below QR below QR below QR pos
8.10267.1703Atenolol 1.464 5.784 below QR 1.254 8.598 2.844 0.056 0.094 0.048 0.102 0.168 0.05 below QR below QR below QR below QR pos
4.47119.0604Benzimidazole below QR 0.58560640.14468480.76983682.15914242.84799360.2143040.10253440.20791040.68482563.85344640.91949440.86574080.30618240.34880641.4700544pos
  1. 71
  2. 96
In [43]:
#read in the neg data
neg <- read.csv("Table_S3_Concentrations_Identified_Pharmaceuticals_NegativeMode_05052021_v2.csv", header=T, stringsAsFactors = F) #neg_Concentration_Pharmaceuticals_LuxEau_20201231.csv
neg$mode <- "neg"
neg
dim(neg)
A data.frame: 18 × 96
RTm.zChemicalX2019_April_Loc01X2019_April_Loc02X2019_April_Loc03X2019_April_Loc04X2019_April_Loc05X2019_April_Loc06X2019_April_Loc07X2020_July_Loc11X2020_July_Loc12X2020_July_Loc13X2020_August_Loc01X2020_August_Loc04X2020_August_Loc10X2020_August_Loc11X2020_August_Loc12X2020_August_Loc13mode
<dbl><dbl><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr>
2.33180.0334Acamprosate 0.008 below QR below QR below QR 0.018 0.014 below QR below QR 0.934 below QR 1.104 1.042 1.004 1.02 0.994 1.054 neg
8.62220.9807Acetazolamide below QR 0.06533856 0.28535616 1.18765056 below QR 1.73302752 below QR below QR below QR 0.10934208 0.4978176 below QR below QR below QR below QR 0.35291712 neg
3.46135.0310Allopurinol 0.08 below QR below QR 0.084 0.102 0.104 0.17 below QR below QR below QR below QR below QR below QR below QR below QR below QR neg
16.70429.0538Bicalutamide below QR 0.08779548 0.07488438 0.29867678 0.27457606 0.4475848 0.00344296 below QR 1.31176776 0.62920094 11.286883624.59979456 0.02410072 below QR 1.49166242 0.31244862 neg
18.10287.0247Ciprofibrate below QR 0.167707 0.0560951 0.8512576 3.0349184 1.7187076 0.0474206 below QR 0.1763815 2.7579127 4.1718562 7.4531304 0.312282 below QR 0.0260235 1.2392969 neg
8.20204.1238Dexpanthenol below QR 0.026 3.168 0.714 4.41 2.656 0.202 0.132 0.13 0.038 0.218 below QR 0.064 0.086 below QR below QR neg
14.01423.1381Eprosartan below QR below QR 0.196 0.082 below QR below QR below QR below QR below QR 0.08 3.038 below QR below QR below QR 0.056 below QR neg
18.51286.1446Etodolac below QR below QR below QR 0.01 below QR 0.11 below QR below QR below QR below QR 2.126 below QR below QR below QR below QR below QR neg
19.25280.0588Flufenamic Acid below QR 0.3279188440.10405658 0.1636781880.3717913480.3926026640.127680236below QR below QR 0.0281234 9.7413832921.7397135240.34591782 0.2722345121.5349751723.919277024neg
14.70329.0004Furosemide below QR 0.33206296 below QR 0.2943586 1.12848488 0.18653736 below QR below QR 0.51132404 below QR 49.90734304below QR below QR below QR 1.68412808 0.23747132 neg
8.20295.9572Hydrochlorothiazide below QR 1.31477568 2.68076092 15.1788708628.9792518220.7964405 0.15362868 7.76241656 13.0483149830.22793144153.8228 24.047056642.66587442 7.84280366 19.3685274252.80003366neg
17.60269.0541Leflunomide 0.0032425320.001621266below QR 0.0124297060.0880887860.050259246below QR below QR below QR below QR 0.0697144380.0670123280.0502592460.00540422 below QR 0.018374348neg
19.70270.2075N-Dodecanoyl-N-methylglycine0.0119416 0.0493948 0.1416708 0.4244696 0.0586224 0.059708 0.1340716 below QR below QR below QR 4.456388 below QR below QR 0.0629648 0.0673072 below QR neg
14.80187.0974Nonanedioic Acid below QR 1.99814352 192.9846011208.480000853.40290772248.920573643.141529763.94923204 6.30951084 below QR below QR below QR 1.84568532 below QR 0.84096696 below QR neg
3.12151.0260Oxypurinol below QR 0.12442598 0.15545642 1.70758686 3.19856908 3.56819638 0.05688914 0.29752716 below QR 0.19287548 2.30659604 0.09613352 0.09674196 0.07210014 0.04167814 0.65894052 neg
14.90137.0242Salicylic Acid 3.438 3.926 18.684 14.584 16.806 29.882 5.554 10.034 12.414 9.078 28.91 8.38 42.402 8.492 10.006 6.694 neg
3.37117.0193Succinic Acid below QR 19.65 306.376 383.56 247.96 293.098 146.596 27.232 20.92 25.862 34.37 12.094 13.156 8.54 6.25 13.054 neg
10.12179.0573Theophylline below QR 0.5722103922.1994787363.32696382222.379984416.9353484980.2626834861.8315777222.4203634781.0705523143.3752485780.4424901520.1185498860.2147590640.28286219 0.892186984neg
  1. 18
  2. 96
In [44]:
#deduplicate if cpd measured in both pos and neg; want to take pos measurements only 
dups <- Reduce(intersect, list(pos$Chemical,neg$Chemical))
length(dups)
dups
class(dups)
3
  1. 'Dexpanthenol'
  2. 'Eprosartan'
  3. 'Etodolac'
'character'
In [45]:
#remove these rows from neg before further processing
dedup_neg <- neg[!neg$Chemical %in% dups,]
dim(dedup_neg)
  1. 15
  2. 96
In [46]:
dedup_neg
A data.frame: 15 × 96
RTm.zChemicalX2019_April_Loc01X2019_April_Loc02X2019_April_Loc03X2019_April_Loc04X2019_April_Loc05X2019_April_Loc06X2019_April_Loc07X2020_July_Loc11X2020_July_Loc12X2020_July_Loc13X2020_August_Loc01X2020_August_Loc04X2020_August_Loc10X2020_August_Loc11X2020_August_Loc12X2020_August_Loc13mode
<dbl><dbl><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr>
1 2.33180.0334Acamprosate 0.008 below QR below QR below QR 0.018 0.014 below QR below QR 0.934 below QR 1.104 1.042 1.004 1.02 0.994 1.054 neg
2 8.62220.9807Acetazolamide below QR 0.06533856 0.28535616 1.18765056 below QR 1.73302752 below QR below QR below QR 0.10934208 0.4978176 below QR below QR below QR below QR 0.35291712 neg
3 3.46135.0310Allopurinol 0.08 below QR below QR 0.084 0.102 0.104 0.17 below QR below QR below QR below QR below QR below QR below QR below QR below QR neg
416.70429.0538Bicalutamide below QR 0.08779548 0.07488438 0.29867678 0.27457606 0.4475848 0.00344296 below QR 1.31176776 0.62920094 11.286883624.59979456 0.02410072 below QR 1.49166242 0.31244862 neg
518.10287.0247Ciprofibrate below QR 0.167707 0.0560951 0.8512576 3.0349184 1.7187076 0.0474206 below QR 0.1763815 2.7579127 4.1718562 7.4531304 0.312282 below QR 0.0260235 1.2392969 neg
919.25280.0588Flufenamic Acid below QR 0.3279188440.10405658 0.1636781880.3717913480.3926026640.127680236below QR below QR 0.0281234 9.7413832921.7397135240.34591782 0.2722345121.5349751723.919277024neg
1014.70329.0004Furosemide below QR 0.33206296 below QR 0.2943586 1.12848488 0.18653736 below QR below QR 0.51132404 below QR 49.90734304below QR below QR below QR 1.68412808 0.23747132 neg
11 8.20295.9572Hydrochlorothiazide below QR 1.31477568 2.68076092 15.1788708628.9792518220.7964405 0.15362868 7.76241656 13.0483149830.22793144153.8228 24.047056642.66587442 7.84280366 19.3685274252.80003366neg
1217.60269.0541Leflunomide 0.0032425320.001621266below QR 0.0124297060.0880887860.050259246below QR below QR below QR below QR 0.0697144380.0670123280.0502592460.00540422 below QR 0.018374348neg
1319.70270.2075N-Dodecanoyl-N-methylglycine0.0119416 0.0493948 0.1416708 0.4244696 0.0586224 0.059708 0.1340716 below QR below QR below QR 4.456388 below QR below QR 0.0629648 0.0673072 below QR neg
1414.80187.0974Nonanedioic Acid below QR 1.99814352 192.9846011208.480000853.40290772248.920573643.141529763.94923204 6.30951084 below QR below QR below QR 1.84568532 below QR 0.84096696 below QR neg
15 3.12151.0260Oxypurinol below QR 0.12442598 0.15545642 1.70758686 3.19856908 3.56819638 0.05688914 0.29752716 below QR 0.19287548 2.30659604 0.09613352 0.09674196 0.07210014 0.04167814 0.65894052 neg
1614.90137.0242Salicylic Acid 3.438 3.926 18.684 14.584 16.806 29.882 5.554 10.034 12.414 9.078 28.91 8.38 42.402 8.492 10.006 6.694 neg
17 3.37117.0193Succinic Acid below QR 19.65 306.376 383.56 247.96 293.098 146.596 27.232 20.92 25.862 34.37 12.094 13.156 8.54 6.25 13.054 neg
1810.12179.0573Theophylline below QR 0.5722103922.1994787363.32696382222.379984416.9353484980.2626834861.8315777222.4203634781.0705523143.3752485780.4424901520.1185498860.2147590640.28286219 0.892186984neg
In [47]:
#add asterix to neg compound names for plotting legibility
dedup_neg$Chemical <- paste0(dedup_neg$Chemical,"*")
In [48]:
#merge pos and dedup_neg
all_pos_dedup_neg <- rbind(pos,dedup_neg)
head(all_pos_dedup_neg)
A data.frame: 6 × 96
RTm.zChemicalX2019_April_Loc01X2019_April_Loc02X2019_April_Loc03X2019_April_Loc04X2019_April_Loc05X2019_April_Loc06X2019_April_Loc07X2020_July_Loc11X2020_July_Loc12X2020_July_Loc13X2020_August_Loc01X2020_August_Loc04X2020_August_Loc10X2020_August_Loc11X2020_August_Loc12X2020_August_Loc13mode
<dbl><dbl><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr>
14.86253.09703-hydroxycarbamazepinebelow QR 6.902 0.964 3.738 4.004 5.61 below QR1.226 1.326 1.602 51.398 3.796 0.062 0.28 1.348 3.07 pos
8.05152.0707Acetaminophen 9.458 17.046 12.178 22.854 30.728 below QR below QRbelow QR below QR below QR 14.486 below QR below QR below QR below QR below QR pos
10.65370.1795Amisulpride 7.722 8.472 0.376 6.224 2.074 2.694 0.05 0.946 2.58 8.104 108.204 6.612 1.146 2.016 2.982 8.1 pos
15.75278.1903Amitriptyline 0.19251560.1425836below QR 0.01442480.072124 0.0588088below QRbelow QR below QR below QR below QR below QR below QR below QR below QR below QR pos
8.10267.1703Atenolol 1.464 5.784 below QR 1.254 8.598 2.844 0.056 0.094 0.048 0.102 0.168 0.05 below QR below QR below QR below QR pos
4.47119.0604Benzimidazole below QR 0.58560640.14468480.76983682.15914242.84799360.2143040.10253440.20791040.68482563.85344640.91949440.86574080.30618240.34880641.4700544pos
In [49]:
#coerce as type numeric so"below QR" values become NA 
all_pos_dedup_neg[,c(4:95)] <- suppressWarnings(sapply(all_pos_dedup_neg[,c(4:95)], as.numeric))
In [50]:
head(all_pos_dedup_neg)
A data.frame: 6 × 96
RTm.zChemicalX2019_April_Loc01X2019_April_Loc02X2019_April_Loc03X2019_April_Loc04X2019_April_Loc05X2019_April_Loc06X2019_April_Loc07X2020_July_Loc11X2020_July_Loc12X2020_July_Loc13X2020_August_Loc01X2020_August_Loc04X2020_August_Loc10X2020_August_Loc11X2020_August_Loc12X2020_August_Loc13mode
<dbl><dbl><chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><chr>
14.86253.09703-hydroxycarbamazepine NA 6.9020000 0.9640000 3.7380000 4.0040005.6100000 NA1.22600001.32600001.6020000 51.3980003.79600000.06200000.28000001.34800003.070000pos
8.05152.0707Acetaminophen 9.458000017.046000012.178000022.854000030.728000 NA NA NA NA NA 14.486000 NA NA NA NA NApos
10.65370.1795Amisulpride 7.7220000 8.4720000 0.3760000 6.2240000 2.0740002.69400000.0500000.94600002.58000008.1040000108.2040006.61200001.14600002.01600002.98200008.100000pos
15.75278.1903Amitriptyline 0.1925156 0.1425836 NA 0.0144248 0.0721240.0588088 NA NA NA NA NA NA NA NA NA NApos
8.10267.1703Atenolol 1.4640000 5.7840000 NA 1.2540000 8.5980002.84400000.0560000.09400000.04800000.1020000 0.1680000.0500000 NA NA NA NApos
4.47119.0604Benzimidazole NA 0.5856064 0.1446848 0.7698368 2.1591422.84799360.2143040.10253440.20791040.6848256 3.8534460.91949440.86574080.30618240.34880641.470054pos
In [51]:
#pivot: wide to long
long_all_pos_dedup_neg <- all_pos_dedup_neg[,c(3,4:95)] %>% pivot_longer(!Chemical, names_to=c("year","month","code"), 
                                 names_pattern ="X(.*)_(.*)_(.*)", 
                                 values_to="Conc")
In [52]:
head(long_all_pos_dedup_neg)
dim(long_all_pos_dedup_neg)
A tibble: 6 × 5
ChemicalyearmonthcodeConc
<chr><chr><chr><chr><dbl>
3-hydroxycarbamazepine2019AprilLoc01 NA
3-hydroxycarbamazepine2019AprilLoc026.902
3-hydroxycarbamazepine2019AprilLoc030.964
3-hydroxycarbamazepine2019AprilLoc043.738
3-hydroxycarbamazepine2019AprilLoc054.004
3-hydroxycarbamazepine2019AprilLoc065.610
  1. 7912
  2. 5
In [53]:
#translate Loc labels
code <- c(paste0("Loc0",1:9),paste0("Loc",10:13))
tr_locs <- data.frame(code,location=c("Chiers-Rodange","Alzette-Ettelbruck","Sûre-Erpeldange",
                                      "Syr-Mertert","Mess-Noertzange","Mamer-Mersch",
                                      "Eisch-Mersch","Attert-Colmar-Berg","Alzette-Mersch",
                                      "Our-Wallendorf","Ernz-Blanche","Ernz-Noire",
                                      "Gander-Emerange"),stringsAsFactors=FALSE)
In [54]:
head(tr_locs)
head(long_all_pos_dedup_neg)
A data.frame: 6 × 2
codelocation
<chr><chr>
Loc01Chiers-Rodange
Loc02Alzette-Ettelbruck
Loc03Sûre-Erpeldange
Loc04Syr-Mertert
Loc05Mess-Noertzange
Loc06Mamer-Mersch
A tibble: 6 × 5
ChemicalyearmonthcodeConc
<chr><chr><chr><chr><dbl>
3-hydroxycarbamazepine2019AprilLoc01 NA
3-hydroxycarbamazepine2019AprilLoc026.902
3-hydroxycarbamazepine2019AprilLoc030.964
3-hydroxycarbamazepine2019AprilLoc043.738
3-hydroxycarbamazepine2019AprilLoc054.004
3-hydroxycarbamazepine2019AprilLoc065.610
In [55]:
long_all_pos_dedup_neg <- left_join(long_all_pos_dedup_neg,tr_locs,by="code")
In [56]:
#remove NA rows (below QR)
cc_long_all_pos_dedup_neg <- long_all_pos_dedup_neg[complete.cases(long_all_pos_dedup_neg),] 
dim(cc_long_all_pos_dedup_neg)
  1. 6211
  2. 6

7912-6211 = 1701 NAs, as expected.

In [57]:
tail(cc_long_all_pos_dedup_neg)
A tibble: 6 × 6
ChemicalyearmonthcodeConclocation
<chr><chr><chr><chr><dbl><chr>
Theophylline*2020AugustLoc013.3752486Chiers-Rodange
Theophylline*2020AugustLoc040.4424902Syr-Mertert
Theophylline*2020AugustLoc100.1185499Our-Wallendorf
Theophylline*2020AugustLoc110.2147591Ernz-Blanche
Theophylline*2020AugustLoc120.2828622Ernz-Noire
Theophylline*2020AugustLoc130.8921870Gander-Emerange
In [58]:
#prepare dates for plotting
cc_long_all_pos_dedup_neg$Time <- as.factor(paste("01",substr(cc_long_all_pos_dedup_neg$month,1,3),cc_long_all_pos_dedup_neg$year))
cc_long_all_pos_dedup_neg$Time <- as.yearmon(as.Date(cc_long_all_pos_dedup_neg$Time, format="%d %b %Y"))
cc_long_all_pos_dedup_neg$Time <- as.factor(cc_long_all_pos_dedup_neg$Time)

Temporal Heatmaps for 4 permanent locations across 2019-2020

In [59]:
perm_locs <- c("Chiers-Rodange","Alzette-Ettelbruck","Sûre-Erpeldange","Syr-Mertert")
In [60]:
#calculate medians first
df_cc_perm_locs_all_pos_dedup_neg_permonth <- cc_long_all_pos_dedup_neg[cc_long_all_pos_dedup_neg$location %in% perm_locs,] %>% 
                                                group_by(Chemical,year,month,Time) %>% 
                                                summarise(Median_Conc_ngL=median(Conc))
tail(df_cc_perm_locs_all_pos_dedup_neg_permonth)
A grouped_df: 6 × 5
ChemicalyearmonthTimeMedian_Conc_ngL
<chr><chr><chr><fct><dbl>
Warfarin2019SeptemberSep 20191.8675730
Warfarin2020April Apr 20200.4316662
Warfarin2020August Aug 20200.3348496
Warfarin2020July Jul 20200.4057662
Warfarin2020June Jun 20200.8149241
Warfarin2020May May 20200.2815080

Analysis of Metformin

In [61]:
#range of medians for metformin in 2019 and 2020
metformin_medians <- df_cc_perm_locs_all_pos_dedup_neg_permonth[df_cc_perm_locs_all_pos_dedup_neg_permonth$Chemical=="Metformin",]
metformin_medians
A grouped_df: 10 × 5
ChemicalyearmonthTimeMedian_Conc_ngL
<chr><chr><chr><fct><dbl>
Metformin2019August Aug 201928.844
Metformin2019July Jul 201919.944
Metformin2019May May 2019 3.050
Metformin2019October Oct 201938.537
Metformin2019SeptemberSep 201919.093
Metformin2020April Apr 2020 0.526
Metformin2020August Aug 2020 0.616
Metformin2020July Jul 2020 0.156
Metformin2020June Jun 2020 0.061
Metformin2020May May 2020 0.332
In [62]:
write.csv(metformin_medians,"20210505_metformin_median_conc_perm_locs_2019_2020.csv")
In [63]:
#populate $notzero before taking log
#df_cc_perm_locs_all_pos_dedup_neg_permonth$notzero <-  TRUE
#df_cc_perm_locs_all_pos_dedup_neg_permonth$notzero[df_cc_perm_locs_all_pos_dedup_neg_permonth$Median_Conc_ngL == 0] <- FALSE
In [65]:
#df_cc_perm_locs_all_pos_dedup_neg_permonth[df_cc_perm_locs_all_pos_dedup_neg_permonth$notzero==FALSE,]
A grouped_df: 0 × 6
ChemicalyearmonthTimeMedian_Conc_ngLnotzero
<chr><chr><chr><fct><dbl><lgl>

There are no zero-value concentrations now.

In [66]:
#take log of Median_Conc_ngL
df_cc_perm_locs_all_pos_dedup_neg_permonth$log_Median_Conc_ngL <- log10(df_cc_perm_locs_all_pos_dedup_neg_permonth$Median_Conc_ngL)
In [39]:
#convert -Inf to 0 to find true minimum 
#df_cc_perm_locs_all_pos_dedup_neg_permonth$log_Median_Conc_ngL[is.infinite(df_cc_perm_locs_all_pos_dedup_neg_permonth$log_Median_Conc_ngL)] <- 0
#summary(df_cc_perm_locs_all_pos_dedup_neg_permonth$log_Median_Conc_ngL)
#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#-2.3010  0.4061  2.6274  2.3261  4.2804  6.1853
In [67]:
df_cc_perm_locs_all_pos_dedup_neg_permonth[is.infinite(df_cc_perm_locs_all_pos_dedup_neg_permonth$log_Median_Conc_ngL),]
A grouped_df: 0 × 7
ChemicalyearmonthTimeMedian_Conc_ngLnotzerolog_Median_Conc_ngL
<chr><chr><chr><fct><dbl><lgl><dbl>
In [41]:
#convert -Inf to -99 for plotting (ONLY IF THERE WERE ZERO-VALUES)
#df_cc_perm_locs_all_pos_dedup_neg_permonth$log_Median_Conc_ngL[is.infinite(df_cc_perm_locs_all_pos_dedup_neg_permonth$log_Median_Conc_ngL)] <- -99
In [68]:
df_cc_perm_locs_all_pos_dedup_neg_permonth[df_cc_perm_locs_all_pos_dedup_neg_permonth$Chemical=="Theophylline*",]
A grouped_df: 11 × 7
ChemicalyearmonthTimeMedian_Conc_ngLnotzerolog_Median_Conc_ngL
<chr><chr><chr><fct><dbl><lgl><dbl>
Theophylline*2019April Apr 20192.1994787TRUE 0.342319768
Theophylline*2019August Aug 20199.2317571TRUE 0.965284368
Theophylline*2019July Jul 20198.0141885TRUE 0.903859553
Theophylline*2019May May 20192.6396267TRUE 0.421542515
Theophylline*2019October Oct 20198.3491189TRUE 0.921640648
Theophylline*2019SeptemberSep 20198.6417102TRUE 0.936599696
Theophylline*2020April Apr 20200.9921797TRUE-0.003409676
Theophylline*2020August Aug 20201.9088694TRUE 0.280776208
Theophylline*2020July Jul 20201.8274339TRUE 0.261841673
Theophylline*2020June Jun 20201.4752074TRUE 0.168853081
Theophylline*2020May May 20201.8184255TRUE 0.259695520
In [69]:
df_cc_perm_locs_all_pos_dedup_neg_permonth[df_cc_perm_locs_all_pos_dedup_neg_permonth$Chemical=="Pantoprozole",]
A grouped_df: 6 × 7
ChemicalyearmonthTimeMedian_Conc_ngLnotzerolog_Median_Conc_ngL
<chr><chr><chr><fct><dbl><lgl><dbl>
Pantoprozole2019August Aug 201957.169TRUE1.757161
Pantoprozole2019July Jul 201954.083TRUE1.733061
Pantoprozole2019May May 2019 4.894TRUE0.689664
Pantoprozole2019October Oct 201954.615TRUE1.737312
Pantoprozole2019SeptemberSep 201980.379TRUE1.905143
Pantoprozole2020May May 202033.732TRUE1.528042
In [71]:
summary(df_cc_perm_locs_all_pos_dedup_neg_permonth$log_Median_Conc_ngL)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-2.4891  0.1743  0.9411  0.8643  1.6177  3.8142 

Figure 4 - Permanent Sampling Points Only

In [44]:
###outline the boxes where $notzero is FALSE
#options(repr.plot.width=20, repr.plot.height=12)
#ggplot(df_cc_perm_locs_all_pos_dedup_neg_permonth, 
 #      aes(x=Time, y=reorder(Chemical,log_Median_Conc_ngL), 
  #         fill=log_Median_Conc_ngL)) + #geom_tile() +
#geom_tile(data=df_cc_perm_locs_all_pos_dedup_neg_permonth,
 #         aes(size=factor(notzero,c(FALSE,TRUE)),
  #            colour=factor(notzero,c(FALSE,TRUE)))) + 
#guides(color = FALSE, size = FALSE) +
#scale_colour_manual("notzero", values=c("black", "white")) + 
#scale_size_manual("notzero", values=c(0.2, 0)) +
#xlab("Month-Year, N = 4 permanent sampling points") + ylab("Compound") + theme_classic() + 
#theme(axis.text.x = element_text(size=18), axis.text.y = element_text(size=15)) + #tick text
#theme(axis.title.x = element_text(face="bold", size=20), axis.title.y = element_text(face="bold",size=20)) + #axis title
#scale_fill_distiller(palette="RdYlBu",limits=c(-3,7),breaks=c(-3,-1,1,3,5,7),name="log10(median Conc)") + #, limits= c(-4,8),breaks = c(-4,-2,0,2,4,6,8),labels = c(-4,-2,0,2,4,6,8)) + 
#theme(legend.title = element_text(size=20, face="bold"), legend.text= element_text(size=15))
In [45]:
###use alpha as 2nd layer for FALSE/TRUE in $notzero
#ggplot(df_cc_perm_locs_all_pos_dedup_neg_permonth, 
 #      aes(x=Time, y=reorder(Chemical,log_Median_Conc_ngL)))+#, 
           #fill=log_Median_Conc_ngL)) + #geom_tile() +
#geom_tile(data=df_cc_perm_locs_all_pos_dedup_neg_permonth,
 #         aes(size=factor(iszero,c(TRUE,FALSE)),
  #            colour=factor(iszero,c(TRUE,FALSE)))) + 
#geom_tile(aes(fill=log_Median_Conc_ngL,alpha=notzero), colour='grey20')+
#guides(color = FALSE, size = FALSE) +
#scale_colour_manual("iszero", values=c("black", "white")) + 
#scale_fill_manual("iszero", values=c("black","white")) + 
#scale_size_manual("iszero", values=c(0.2, 0)) +
#xlab("Month-Year, N = 4 permanent sampling points") + ylab("Compound") + theme_classic() + 
#theme(axis.text.x = element_text(size=18), axis.text.y = element_text(size=15)) + #tick text
#theme(axis.title.x = element_text(face="bold", size=20), axis.title.y = element_text(face="bold",size=20)) + #axis title
#scale_fill_distiller(palette="RdYlBu",limits=c(-3,7),breaks=c(-3,-1,1,3,5,7),name="log10(median Conc)") + #, limits= c(-4,8),breaks = c(-4,-2,0,2,4,6,8),labels = c(-4,-2,0,2,4,6,8)) + 
#theme(legend.title = element_text(size=20, face="bold"), legend.text= element_text(size=15))
In [75]:
options(repr.plot.width=20, repr.plot.height=12)
ggplot(df_cc_perm_locs_all_pos_dedup_neg_permonth, aes(x=Time, y=reorder(Chemical,log_Median_Conc_ngL), fill=log_Median_Conc_ngL)) + geom_tile() +
#geom_tile(data=df_cc_perm_locs_all_pos_dedup_neg_permonth,aes(size=factor(iszero,c(TRUE,FALSE))),alpha=0,colour="blue") + #scale_size_discrete("Your legend", range=c(3, 0.5)) +
#+ geom_tile(data=df2, aes(size=factor(z, c(TRUE, FALSE))), alpha=0, color="blue") +   scale_size_discrete("Your legend", range=c(3, 0.5))
xlab("Month-Year, N = 4 permanent sampling points") + ylab("Compound") + theme_classic() + 
theme(axis.text.x = element_text(size=18), axis.text.y = element_text(size=15)) + #tick text
theme(axis.title.x = element_text(face="bold", size=20), axis.title.y = element_text(face="bold",size=20)) + #axis title
scale_fill_distiller(palette="RdYlBu",limits=c(-3,4),name="log10(MedianConc)") + #, limits= c(-4,8),breaks = c(-4,-2,0,2,4,6,8),labels = c(-4,-2,0,2,4,6,8)) + 
theme(legend.title = element_text(size=20, face="bold"), legend.text= element_text(size=15))

Figure 4. Temporal heat map showing median concentration values (original units: ng/L) per compound measured per sampling month-year plotted using a base-10 logarithmic scale. Median values were calculated across the concentrations measured at the four permanent sampling locations for the respective compound and month-year. White boxes indicate concentration values that were below the respective quantification range, which were therefore discarded from median calculation. All compounds were measured in positive mode except for those marked with an asterisks, which were measured in negative mode.

In [77]:
#to save as pdf or png
heatmap_permonth <- ggplot(df_cc_perm_locs_all_pos_dedup_neg_permonth, aes(x=Time, y=reorder(Chemical,log_Median_Conc_ngL), fill=log_Median_Conc_ngL)) + geom_tile() + 
xlab("Month-Year, N = 4 permanent sampling points") + ylab("Chemical") + theme_classic() + 
theme(axis.text.x = element_text(size=10), axis.text.y = element_text(size=7)) + #tick text
theme(axis.title.x = element_text(face="bold", size=15), axis.title.y = element_text(face="bold",size=15)) + #axis title
scale_fill_distiller(palette="RdYlBu",limits=c(-3,4),name="log10(MedianConc)") + 
theme(legend.title = element_text(size=12, face="bold"), legend.text= element_text(size=10)) 

#ggsave("temporal_heatmap_per_month_permanent_sampling_points_20210422.pdf", heatmap_permonth, width = 297, height = 210, units = "mm")
ggsave("temporal_heatmap_per_month_permanent_sampling_points_20210505.png", heatmap_permonth, width = 297, height = 210, units = "mm")

Spatial Heatmaps for 2019 & 2020 respectively

In [78]:
head(cc_long_all_pos_dedup_neg)
A tibble: 6 × 7
ChemicalyearmonthcodeConclocationTime
<chr><chr><chr><chr><dbl><chr><fct>
3-hydroxycarbamazepine2019AprilLoc026.902Alzette-EttelbruckApr 2019
3-hydroxycarbamazepine2019AprilLoc030.964Sûre-Erpeldange Apr 2019
3-hydroxycarbamazepine2019AprilLoc043.738Syr-Mertert Apr 2019
3-hydroxycarbamazepine2019AprilLoc054.004Mess-Noertzange Apr 2019
3-hydroxycarbamazepine2019AprilLoc065.610Mamer-Mersch Apr 2019
3-hydroxycarbamazepine2019AprilLoc081.326Attert-Colmar-BergApr 2019
In [79]:
#calculate medians for 2019 only across all months
df_cc_2019_long_all_pos_dedup_neg = cc_long_all_pos_dedup_neg[cc_long_all_pos_dedup_neg$year=="2019",] %>% 
                                group_by(Chemical,location) %>% 
                                summarise(Median_Conc_ngL=median(Conc))
tail(df_cc_2019_long_all_pos_dedup_neg)
dim(df_cc_2019_long_all_pos_dedup_neg)
A grouped_df: 6 × 3
ChemicallocationMedian_Conc_ngL
<chr><chr><dbl>
WarfarinChiers-Rodange 2.2039643
WarfarinEisch-Mersch 0.1492332
WarfarinMamer-Mersch 0.7773075
WarfarinMess-Noertzange0.7658992
WarfarinSyr-Mertert 1.1463821
WarfarinSûre-Erpeldange0.5250911
  1. 762
  2. 3
In [80]:
#testing
foo <- cc_long_all_pos_dedup_neg[cc_long_all_pos_dedup_neg$Chemical=="Warfarin"&
                                 cc_long_all_pos_dedup_neg$location=="Syr-Mertert"&
                                 cc_long_all_pos_dedup_neg$year=="2019",
                                 "Conc"]
foo <- as.numeric(unlist(foo))
median(foo)
1.146382094
In [81]:
#populate $notzero before taking log
#df_cc_2019_long_all_pos_dedup_neg$notzero <-  TRUE
#df_cc_2019_long_all_pos_dedup_neg$notzero[df_cc_2019_long_all_pos_dedup_neg$Median_Conc_ngL == 0] <- FALSE 
#take log of Median_Conc_ngL
#df_cc_2019_long_all_pos_dedup_neg$log_Median_Conc_ngL <- log10(df_cc_2019_long_all_pos_dedup_neg$Median_Conc_ngL)
In [82]:
summary(df_cc_2019_long_all_pos_dedup_neg$log_Median_Conc_ngL)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-2.0769  0.1889  0.9942  0.8800  1.6800  4.0630 
In [53]:
#convert -Inf to 0 to find true minimum 
#df_cc_2019_long_all_pos_dedup_neg$log_Median_Conc_ngL[is.infinite(df_cc_2019_long_all_pos_dedup_neg$log_Median_Conc_ngL)] <- 0
#summary(df_cc_2019_long_all_pos_dedup_neg$log_Median_Conc_ngL)
#Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 #-2.301   0.391   2.768   2.371   4.320   6.072
In [54]:
#convert -Inf to -99 for plotting (ONLY IF THERE WERE ZERO-VALUES)
#df_cc_2019_long_all_pos_dedup_neg$log_Median_Conc_ngL[is.infinite(df_cc_2019_long_all_pos_dedup_neg$log_Median_Conc_ngL)] <- -99
In [83]:
df_cc_2019_long_all_pos_dedup_neg[df_cc_2019_long_all_pos_dedup_neg$Chemical=="Bicalutamide*",]
A grouped_df: 9 × 5
ChemicallocationMedian_Conc_ngLnotzerolog_Median_Conc_ngL
<chr><chr><dbl><lgl><dbl>
Bicalutamide*Alzette-Ettelbruck0.7845645TRUE-0.1053713
Bicalutamide*Alzette-Mersch 1.6465956TRUE 0.2165870
Bicalutamide*Attert-Colmar-Berg0.1618191TRUE-0.7909702
Bicalutamide*Chiers-Rodange 1.7929214TRUE 0.2535613
Bicalutamide*Eisch-Mersch 0.1501991TRUE-0.8233326
Bicalutamide*Mamer-Mersch 0.2969553TRUE-0.5273089
Bicalutamide*Mess-Noertzange 0.4815840TRUE-0.3173279
Bicalutamide*Syr-Mertert 0.3860419TRUE-0.4133656
Bicalutamide*Sûre-Erpeldange 0.1545028TRUE-0.8110636

Figure 2 - 2019 concs

In [90]:
ggplot(df_cc_2019_long_all_pos_dedup_neg, aes(x=reorder(location,log_Median_Conc_ngL), y=reorder(Chemical,log_Median_Conc_ngL), fill=log_Median_Conc_ngL)) + 
geom_tile() + 
xlab("2019 Sampling Points, N = 6 months") + ylab("Chemical") + theme_classic() +
theme(axis.text.x = element_text(size=18,angle=10,hjust=1), axis.text.y = element_text(size=15)) + #tick text
theme(axis.title.x = element_text(face="bold", size=30), axis.title.y = element_text(face="bold",size=30)) + #axis title
scale_fill_distiller(palette="RdYlBu",limits=c(-3,4.1),name="log10(MedianConc)") +
theme(legend.title = element_text(size=20, face="bold"), legend.text= element_text(size=15))#legends

Figure 2. Spatial heat map showing median concentration values (original units: ng/L) per compound measured per sampling location over 6 months in 2019, plotted using a base-10 logarithmic scale. Median values were calculated across the concentrations measured over the relevant months of sampling for the respective compound and location. White boxes indicate that there were no concentration values within the quantification range. All compounds were measured in positive mode except for those marked with an asterisks, which were measured in negative mode.

In [91]:
#to save as pdf
heatmap_2019_perloc <- ggplot(df_cc_2019_long_all_pos_dedup_neg, aes(x=reorder(location,log_Median_Conc_ngL), y=reorder(Chemical,log_Median_Conc_ngL), fill=log_Median_Conc_ngL)) + 
geom_tile() + 
xlab("2019 Sampling Points, N = 6 months") + ylab("Chemical") + theme_classic() +
theme(axis.text.x = element_text(size=10,angle=10,hjust=1), axis.text.y = element_text(size=7)) + #tick text
theme(axis.title.x = element_text(face="bold", size=15), axis.title.y = element_text(face="bold",size=15)) + #axis title
scale_fill_distiller(palette="RdYlBu",limits=c(-3,4.1),name="log10(MedianConc)") +
theme(legend.title = element_text(size=12, face="bold"), legend.text= element_text(size=10))#legends
#ggsave("spatial_heatmap_2019_per_loc_20210422.pdf", heatmap_2019_perloc, width = 297, height = 210, units = "mm")
ggsave("spatial_heatmap_2019_per_loc_20210505.png", heatmap_2019_perloc, width = 297, height = 210, units = "mm")

Figure 3 - 2020 concs

In [92]:
#recalculate medians for 2020 only across all months
df_cc_2020_long_all_pos_dedup_neg <- cc_long_all_pos_dedup_neg[cc_long_all_pos_dedup_neg$year==2020,] %>% 
                                group_by(Chemical,location) %>% 
                                summarise(Median_Conc_ngL=median(Conc))
head(df_cc_2020_long_all_pos_dedup_neg)
dim(df_cc_2020_long_all_pos_dedup_neg)
A grouped_df: 6 × 3
ChemicallocationMedian_Conc_ngL
<chr><chr><dbl>
3-hydroxycarbamazepineAlzette-Ettelbruck 5.872
3-hydroxycarbamazepineChiers-Rodange 30.890
3-hydroxycarbamazepineErnz-Blanche 1.210
3-hydroxycarbamazepineErnz-Noire 1.086
3-hydroxycarbamazepineGander-Emerange 3.070
3-hydroxycarbamazepineOur-Wallendorf 0.062
  1. 565
  2. 3
In [93]:
#testing
bar <- cc_long_all_pos_dedup_neg[cc_long_all_pos_dedup_neg$Chemical=="3-hydroxycarbamazepine"&
                          cc_long_all_pos_dedup_neg$location=="Alzette-Ettelbruck"&
                          cc_long_all_pos_dedup_neg$year=="2020","Conc"]
bar <- as.numeric(unlist(bar))
median(bar)
5.872
In [94]:
#populate $notzero before taking log
#df_cc_2020_long_all_pos_dedup_neg$notzero <-  TRUE
#df_cc_2020_long_all_pos_dedup_neg$notzero[df_cc_2020_long_all_pos_dedup_neg$Median_Conc_ngL == 0] <- FALSE 
#take log of Median_Conc_ngL
#df_cc_2020_long_all_pos_dedup_neg$log_Median_Conc_ngL <- log10(df_cc_2020_long_all_pos_dedup_neg$Median_Conc_ngL)
In [61]:
#convert -Inf to 0 to find true minimum 
#df_cc_2020_long_all_pos_dedup_neg$log_Median_Conc_ngL[is.infinite(df_cc_2020_long_all_pos_dedup_neg$log_Median_Conc_ngL)] <- 0
#summary(df_cc_2020_long_all_pos_dedup_neg$log_Median_Conc_ngL)
   #  Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
#-2.301030  0.006038  1.973128  1.911851  3.556544  5.763277
In [62]:
#convert -Inf to -99 for plotting (ONLY IF THERE WERE ZERO-VALUES)
#df_cc_2020_long_all_pos_dedup_neg$log_Median_Conc_ngL[is.infinite(df_cc_2020_long_all_pos_dedup_neg$log_Median_Conc_ngL)] <- -99
In [95]:
summary(df_cc_2020_long_all_pos_dedup_neg$log_Median_Conc_ngL)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-2.2673 -0.4523  0.3526  0.3577  1.1449  3.1651 

Figure 3

In [96]:
ggplot(df_cc_2020_long_all_pos_dedup_neg, aes(x=reorder(location,log_Median_Conc_ngL), y=reorder(Chemical,log_Median_Conc_ngL), fill=log_Median_Conc_ngL)) + 
geom_tile() + 
xlab("2020 Sampling Points, N = 5 months") + ylab("Chemical") + theme_classic() +
theme(axis.text.x = element_text(size=18,angle=10,hjust=1), axis.text.y = element_text(size=15)) + #tick text
theme(axis.title.x = element_text(face="bold", size=30), axis.title.y = element_text(face="bold",size=30)) + #axis title
scale_fill_distiller(palette="RdYlBu",limits=c(-3,4),name="log10(MedianConc)") +
theme(legend.title = element_text(size=20, face="bold"), legend.text= element_text(size=15))#legends

Figure 3. Spatial heat map showing median concentration values (original units: ng/L) per compound measured per sampling location over 5 months in 2020, plotted using a base-10 logarithmic scale. Median values were calculated across the concentrations measured over the relevant months of sampling for the respective compound and location. White boxes indicate that there were no concentration values within the quantification range. All compounds were measured in positive mode except for those marked with an asterisks, which were measured in negative mode.

In [97]:
#to save as pdf
heatmap_2020_perloc <- ggplot(df_cc_2020_long_all_pos_dedup_neg, aes(x=reorder(location,log_Median_Conc_ngL), y=reorder(Chemical,log_Median_Conc_ngL), fill=log_Median_Conc_ngL)) + 
geom_tile() + 
xlab("2020 Sampling Points, N = 5 months") + ylab("Chemical") + theme_classic() +
theme(axis.text.x = element_text(size=10,angle=10,hjust=1), axis.text.y = element_text(size=7)) + #tick text
theme(axis.title.x = element_text(face="bold", size=15), axis.title.y = element_text(face="bold",size=15)) + #axis title
scale_fill_distiller(palette="RdYlBu",limits=c(-3,4),name="log10(MedianConc)") +
theme(legend.title = element_text(size=12, face="bold"), legend.text= element_text(size=10))#legends
#ggsave("spatial_heatmap_2020_per_loc_20210422.pdf", heatmap_2020_perloc, width = 297, height = 210, units = "mm")
ggsave("spatial_heatmap_2020_per_loc_20210505.png", heatmap_2020_perloc, width = 297, height = 210, units = "mm")

Boxplots with points

Idea would be to show distribution of conc values, not just median.

Zero-value concentrations could indicate point sources, non-zero-value concentrations could up-prioritise that compound for Suspect Screening.

In [98]:
head(cc_long_all_pos_dedup_neg)
A tibble: 6 × 7
ChemicalyearmonthcodeConclocationTime
<chr><chr><chr><chr><dbl><chr><fct>
3-hydroxycarbamazepine2019AprilLoc026.902Alzette-EttelbruckApr 2019
3-hydroxycarbamazepine2019AprilLoc030.964Sûre-Erpeldange Apr 2019
3-hydroxycarbamazepine2019AprilLoc043.738Syr-Mertert Apr 2019
3-hydroxycarbamazepine2019AprilLoc054.004Mess-Noertzange Apr 2019
3-hydroxycarbamazepine2019AprilLoc065.610Mamer-Mersch Apr 2019
3-hydroxycarbamazepine2019AprilLoc081.326Attert-Colmar-BergApr 2019
In [99]:
#what are the lowest and highest median Conc overall?
overall_median_conc <- cc_long_all_pos_dedup_neg %>% group_by(Chemical) %>% summarise(Median_Conc_ngL=median(Conc))
head(overall_median_conc)
A tibble: 6 × 2
ChemicalMedian_Conc_ngL
<chr><dbl>
3-hydroxycarbamazepine 3.7890000
Acamprosate* 0.9790000
Acetaminophen 22.4990000
Acetazolamide* 0.3584731
Allopurinol* 0.0940000
Amisulpride 5.3310000
In [100]:
summary(overall_median_conc$Median_Conc_ngL)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
   0.051    1.443    6.954   40.006   22.170 1423.996 
In [101]:
##output median concs
#write.csv(overall_median_conc, "20210505_overall_median_conc_all_locs_all_time.csv")
In [102]:
cc_long_all_pos_dedup_neg$Year <- as.factor(cc_long_all_pos_dedup_neg$year)
#calculate logConc
cc_long_all_pos_dedup_neg$logConc <- log10(cc_long_all_pos_dedup_neg$Conc)
In [103]:
head(cc_long_all_pos_dedup_neg)
A tibble: 6 × 9
ChemicalyearmonthcodeConclocationTimeYearlogConc
<chr><chr><chr><chr><dbl><chr><fct><fct><dbl>
3-hydroxycarbamazepine2019AprilLoc026.902Alzette-EttelbruckApr 20192019 0.83897495
3-hydroxycarbamazepine2019AprilLoc030.964Sûre-Erpeldange Apr 20192019-0.01592297
3-hydroxycarbamazepine2019AprilLoc043.738Syr-Mertert Apr 20192019 0.57263930
3-hydroxycarbamazepine2019AprilLoc054.004Mess-Noertzange Apr 20192019 0.60249407
3-hydroxycarbamazepine2019AprilLoc065.610Mamer-Mersch Apr 20192019 0.74896286
3-hydroxycarbamazepine2019AprilLoc081.326Attert-Colmar-BergApr 20192019 0.12254352
In [106]:
bp <- ggplot(cc_long_all_pos_dedup_neg, aes(x=reorder(Chemical,logConc, FUN=median),y=logConc)) + 
xlab("Chemical") + ylab("log10(Conc)")
In [107]:
#with no jittering
bp + geom_boxplot() + geom_point(alpha=0.3) + #shape="." for very small 
theme(axis.text.x = element_text(size=14, angle=65, hjust=1),axis.text.y = element_text(size=18)) +
theme(axis.title.x = element_text(face="bold", size=30), axis.title.y = element_text(face="bold",size=30))
In [55]:
#to save as pdf
bps_sorted_median_nojit <- bp + geom_boxplot() + geom_point(alpha=0.3) + #shape="." for very small 
theme(axis.text.x = element_text(size=10, angle=65, hjust=1),axis.text.y = element_text(size=13)) +
theme(axis.title.x = element_text(face="bold", size=16), axis.title.y = element_text(face="bold",size=16))  

#ggsave("boxplots_20210116_nojittering.pdf", bps_sorted_median_nojit, width = 297, height = 210, units = "mm")
In [56]:
#with jittering
#bp + geom_boxplot() + geom_jitter(alpha=0.3, position=position_jitter(0.2)) +
#xlab('log Concentration [ng/L]') +
#theme(axis.text.x = element_text(size=14, angle=65, hjust=1),axis.text.y = element_text(size=18)) +
#theme(axis.title.x = element_text(face="bold", size=30), axis.title.y = element_text(face="bold",size=30))
In [57]:
#to save as pdf
#bps_sorted_median <- bp + geom_boxplot() + geom_jitter(alpha=0.3, position=position_jitter(0.2)) +
#theme(axis.text.x = element_text(size=10, angle=65, hjust=1),axis.text.y = element_text(size=13)) +
#theme(axis.title.x = element_text(face="bold", size=16), axis.title.y = element_text(face="bold",size=16))  
#ggsave("boxplots_20210113_jittering.pdf", bps_sorted_median, width = 297, height = 210, units = "mm")
In [108]:
#colour code by year
update_geom_defaults("point", list(colour = NULL))
c <- ggplot(cc_long_all_pos_dedup_neg,aes(x=reorder(Chemical,logConc,FUN=median),y=logConc)) + 
geom_boxplot(outlier.shape=NA) + xlab('Chemical') + ylab('log10(Conc)')
In [109]:
c + geom_point(aes(color=Year),alpha=0.2) + 
theme(axis.text.x = element_text(size=14, angle=65, hjust=1),axis.text.y = element_text(size=18)) +
theme(axis.title.x = element_text(face="bold", size=30), axis.title.y = element_text(face="bold",size=30))  +
theme(legend.position = c(0.95, 0.1)) +
scale_color_manual(values=c("#332288","#009E73")) + #'#6633FF','#66FF33')) + #("#E69F00","#0072B2")) + 
theme(legend.title = element_text(size=20), legend.text=element_text(size=15))
In [60]:
#to save as pdf
c_sorted_median_years <- c + geom_point(aes(color=Year),alpha=0.3,size=0.8) + 
theme(axis.text.x = element_text(size=8, angle=65, hjust=1),axis.text.y = element_text(size=10)) +
theme(axis.title.x = element_text(face="bold", size=15), axis.title.y = element_text(face="bold",size=15))  +
theme(legend.position = c(0.95, 0.1)) +
scale_color_manual(values=c("#332288","#009E73")) + 
theme(legend.title = element_text(size=12), legend.text=element_text(size=10)) +  guides(colour = guide_legend(override.aes = list(size=2)))

#ggsave("boxplots_20210116_nojittering_years.pdf", c_sorted_median_years , width = 297, height = 210, units = "mm")
#ggsave("boxplots_20210116_nojittering_years.png", c_sorted_median_years , width = 297, height = 210, units = "mm")
In [122]:
head(cc_long_all_pos_dedup_neg)
A tibble: 6 × 9
ChemicalyearmonthcodeConclocationTimeYearlogConc
<chr><chr><chr><chr><dbl><chr><fct><fct><dbl>
3-hydroxycarbamazepine2019AprilLoc026.902Alzette-EttelbruckApr 20192019 0.83897495
3-hydroxycarbamazepine2019AprilLoc030.964Sûre-Erpeldange Apr 20192019-0.01592297
3-hydroxycarbamazepine2019AprilLoc043.738Syr-Mertert Apr 20192019 0.57263930
3-hydroxycarbamazepine2019AprilLoc054.004Mess-Noertzange Apr 20192019 0.60249407
3-hydroxycarbamazepine2019AprilLoc065.610Mamer-Mersch Apr 20192019 0.74896286
3-hydroxycarbamazepine2019AprilLoc081.326Attert-Colmar-BergApr 20192019 0.12254352
In [110]:
#with separate boxes, one for 2019, one for 2020
ggplot(cc_long_all_pos_dedup_neg,aes(x=reorder(Chemical,logConc,FUN=median),y=logConc, color=Year)) + 
geom_boxplot(outlier.shape=NA) + xlab('Chemical') + ylab('log10(Conc)') + geom_point(alpha=0.2) + 
theme(axis.text.x = element_text(size=14, angle=65, hjust=1),axis.text.y = element_text(size=18)) +
theme(axis.title.x = element_text(face="bold", size=30), axis.title.y = element_text(face="bold",size=30))  +
theme(legend.position = c(0.95, 0.1)) +
scale_color_manual(values=c("#332288","#009E73")) + #'#6633FF','#66FF33')) + #("#E69F00","#0072B2")) + 
theme(legend.title = element_text(size=20), legend.text=element_text(size=15))

Initiate plot to subset top-x:

In [111]:
twoboxes_peryear <- ggplot(cc_long_all_pos_dedup_neg,aes(x=reorder(Chemical,logConc,FUN=median),y=logConc, color=Year)) + 
geom_boxplot(outlier.shape=NA) + xlab('Chemical') + ylab('log10(Conc)') + geom_point(alpha=0.2) + 
theme(axis.text.x = element_text(size=8, angle=65, hjust=1),axis.text.y = element_text(size=10)) +
theme(axis.title.x = element_text(face="bold", size=15), axis.title.y = element_text(face="bold",size=15))  +
theme(legend.position = c(0.95, 0.1)) +
scale_color_manual(values=c("#332288","#009E73")) +
theme(legend.title = element_text(size=12), legend.text=element_text(size=10)) +  guides(colour = guide_legend(override.aes = list(size=0.8)))

#save as pdf
#ggsave("two_boxplots_20210119_nojittering_years.pdf", twoboxes_peryear , width = 297, height = 210, units = "mm")
#ggsave("two_boxplots_20210119_nojittering_years.png", twoboxes_peryear , width = 297, height = 210, units = "mm")
In [112]:
#plot only top 50 compounds
#found a better solution
ggbld <- ggplot_build(twoboxes_peryear)
compounds_sorted_ascending_medianlogConc <- ggbld$layout$coord$labels(ggbld$layout$panel_params)[[1]]$x.labels
compounds_sorted_ascending_medianlogConc
  1. 'Clomipramine'
  2. 'Amitriptyline'
  3. 'Leflunomide*'
  4. 'Allopurinol*'
  5. 'N-Dodecanoyl-N-methylglycine*'
  6. 'Metoclopramide'
  7. 'Scopolamine'
  8. 'Timolol'
  9. 'Sulfadiazine'
  10. 'Acetazolamide*'
  11. 'Etodolac'
  12. 'Memantine'
  13. 'Warfarin'
  14. 'Bicalutamide*'
  15. 'Thiabendazole'
  16. 'Epoxiconazole'
  17. 'Oxypurinol*'
  18. 'Acamprosate*'
  19. 'Methadone'
  20. 'Ciprofibrate*'
  21. 'Doxylamine'
  22. 'Phenazone'
  23. 'Mirtazapine'
  24. 'Furosemide*'
  25. 'Diltiazem'
  26. 'Lincomycin'
  27. 'Flufenamic Acid*'
  28. 'Theophylline*'
  29. 'Ketoprofen'
  30. 'Atenolol'
  31. 'Benzimidazole'
  32. 'Metronidazole'
  33. 'Quinine'
  34. 'Moclobemide'
  35. '3-hydroxycarbamazepine'
  36. 'Trimethoprim'
  37. 'Propranolol'
  38. 'Ranitidine'
  39. 'Dexpanthenol'
  40. 'Nicotine'
  41. 'Amisulpride'
  42. 'Eprosartan'
  43. 'Metformin'
  44. 'Piperine'
  45. 'Perindopril'
  46. 'Clarithromycin'
  47. 'Sulpiride'
  48. 'Tiapride'
  49. 'Carbendazim'
  50. 'Vildagliptin'
  51. 'Clindamycin'
  52. 'Bisoprolol'
  53. 'Losartan'
  54. 'Salicylic Acid*'
  55. 'Sulfamethoxazole'
  56. 'Cetirizine'
  57. 'Oxazepam'
  58. 'Metoprolol'
  59. 'Celiprolol'
  60. 'Hydrochlorothiazide*'
  61. 'Cotinine'
  62. 'Lidocaine'
  63. 'Nonanedioic Acid*'
  64. 'Valsartan'
  65. 'Acetaminophen'
  66. 'Fluconazole'
  67. 'Venlafaxine'
  68. 'Naproxen'
  69. 'Pantoprozole'
  70. 'Flecainide'
  71. 'Nicotinic Acid'
  72. 'Phenytoin'
  73. 'Sotalol'
  74. 'Phenylalanine'
  75. 'Carbamazepine'
  76. 'Diclofenac'
  77. 'Niacinamide'
  78. 'Lamotrigine'
  79. 'Iohexol'
  80. 'Telmisartan'
  81. 'Tramadol'
  82. 'Sitagliptin'
  83. 'Irbesartan'
  84. 'Succinic Acid*'
  85. 'O-Desmethylvenlafaxine'
  86. 'Caffeine'
In [113]:
length(compounds_sorted_ascending_medianlogConc)
86
In [114]:
#take the top x compounds (sorted by median)
toplot_compounds_sorted_ascending_medianlogConc <- compounds_sorted_ascending_medianlogConc[37:86] 
length(compounds_sorted_ascending_medianlogConc)
tail(toplot_compounds_sorted_ascending_medianlogConc)
86
  1. 'Tramadol'
  2. 'Sitagliptin'
  3. 'Irbesartan'
  4. 'Succinic Acid*'
  5. 'O-Desmethylvenlafaxine'
  6. 'Caffeine'
In [115]:
top50_medians_percompound_peryear <- cc_long_all_pos_dedup_neg[cc_long_all_pos_dedup_neg$Chemical %in% toplot_compounds_sorted_ascending_medianlogConc, ]
dim(top50_medians_percompound_peryear)
#top10_medians_percompound_peryear <- cc_all_pos_dedup_neg[cc_all_pos_dedup_neg$Compound %in% toplot_compounds_sorted_ascending_medianlogConc, ]
#dim(top10_medians_percompound_peryear)
dim(cc_long_all_pos_dedup_neg)
  1. 3898
  2. 9
  1. 6211
  2. 9

Fig 5

In [116]:
#replot boxplot
top50_twoboxes_peryear <- ggplot(top50_medians_percompound_peryear,aes(x=reorder(Chemical,logConc,FUN=median),y=logConc, color=Year)) + 
#top10_twoboxes_peryear <- ggplot(top10_medians_percompound_peryear,aes(x=reorder(Compound,logConc,FUN=median),y=logConc, color=Year)) + 
geom_boxplot(outlier.shape=NA) + xlab('Chemical') + ylab('log10(Conc)') + geom_point(alpha=0.2) + 
theme(axis.text.x = element_text(size=14, angle=65, hjust=1),axis.text.y = element_text(size=18)) +
theme(axis.title.x = element_text(face="bold", size=20), axis.title.y = element_text(face="bold",size=15))  +
theme(legend.position = c(0.95, 0.1)) +
scale_color_manual(values=c("#332288","#009E73")) +
theme(legend.title = element_text(size=20), legend.text=element_text(size=15)) +  guides(colour = guide_legend(override.aes = list(size=0.8)))
top50_twoboxes_peryear

Figure 5. Boxplots showing the range of concentrations (original units: ng/L) measured for the top-50 highest concentration pharmaceutical chemicals across all months and sampling locations in 2019 and 2020, plotted using a base-10 logarithmic scale. Concentration values that were below the respective quantification ranges were excluded. All chemicals were measured in positive mode.

In [117]:
#save as pdf
#ggsave("top50_twoboxes_peryear_20210505_nojittering_years.pdf", top50_twoboxes_peryear , width = 297, height = 210, units = "mm")
ggsave("top50_twoboxes_peryear_20210505_nojittering_years.png", top50_twoboxes_peryear , width = 297, height = 210, units = "mm")
In [118]:
#options(repr.plot.width=20, repr.plot.height=12)
#with jittering
top50_jitter_twoboxes_peryear <- ggplot(top50_medians_percompound_peryear,aes(x=reorder(Chemical,logConc,FUN=median),y=logConc, color=Year)) + 
geom_boxplot() + xlab('Chemical') + ylab('log10(Conc)') + geom_jitter(alpha=0.3, position=position_jitter(0.2)) + 
theme(axis.text.x = element_text(size=14, angle=65, hjust=1),axis.text.y = element_text(size=18)) +
theme(axis.title.x = element_text(face="bold", size=20), axis.title.y = element_text(face="bold",size=15))  +
theme(legend.position = c(0.95, 0.1)) +
scale_color_manual(values=c("#332288","#009E73")) +
theme(legend.title = element_text(size=20), legend.text=element_text(size=15)) +  guides(colour = guide_legend(override.aes = list(size=0.8)))
top50_jitter_twoboxes_peryear
In [119]:
#save as pdf
#ggsave("top50_jitter_twoboxes_peryear_20210505_years.pdf", top50_jitter_twoboxes_peryear , width = 297, height = 210, units = "mm")
ggsave("top50_jitter_twoboxes_peryear_20210505_2_years.png", top50_jitter_twoboxes_peryear , width = 297, height = 210, units = "mm")
In [120]:
#without points top50
#readjust indexing as necessary
top50_nopoints_twoboxes_peryear <- ggplot(top50_medians_percompound_peryear,aes(x=reorder(Chemical,logConc,FUN=median),y=logConc, color=Year)) + 
#top10_twoboxes_peryear <- ggplot(top10_medians_percompound_peryear,aes(x=reorder(Compound,logConc,FUN=median),y=logConc, color=Year)) + 
geom_boxplot(outlier.shape=NA) + xlab('Chemical') + ylab('log10(Conc)') + 
theme(axis.text.x = element_text(size=14, angle=65, hjust=1),axis.text.y = element_text(size=18)) +
theme(axis.title.x = element_text(face="bold", size=20), axis.title.y = element_text(face="bold",size=15))  +
theme(legend.position = c(0.95, 0.1)) +
scale_color_manual(values=c("#332288","#009E73")) +
theme(legend.title = element_text(size=20), legend.text=element_text(size=15)) +  guides(colour = guide_legend(override.aes = list(size=0.8)))
top50_nopoints_twoboxes_peryear
In [121]:
#save as pdf
#ggsave("top50_nopoints_twoboxes_peryear_20210505_years.pdf", top50_nopoints_twoboxes_peryear , width = 297, height = 210, units = "mm")
ggsave("top50_nopoints_twoboxes_peryear_20210505_2_years.png", top50_nopoints_twoboxes_peryear , width = 297, height = 210, units = "mm")
In [88]:
#without points, top30
#readjust indexing as necessary
top30_nopoints_twoboxes_peryear <- ggplot(top50_medians_percompound_peryear,aes(x=reorder(Chemical,logConc,FUN=median),y=logConc, color=Year)) + 
#top10_twoboxes_peryear <- ggplot(top10_medians_percompound_peryear,aes(x=reorder(Compound,logConc,FUN=median),y=logConc, color=Year)) + 
geom_boxplot(outlier.shape=NA) + xlab('Chemical') + ylab('log10(Conc)') + 
theme(axis.text.x = element_text(size=14, angle=65, hjust=1),axis.text.y = element_text(size=18)) +
theme(axis.title.x = element_text(face="bold", size=20), axis.title.y = element_text(face="bold",size=15))  +
theme(legend.position = c(0.95, 0.1)) +
scale_color_manual(values=c("#332288","#009E73")) +
theme(legend.title = element_text(size=20), legend.text=element_text(size=15)) +  guides(colour = guide_legend(override.aes = list(size=0.8)))
top30_nopoints_twoboxes_peryear
In [89]:
#save as pdf
#ggsave("top30_nopoints_twoboxes_peryear_20210504_nojittering_years.pdf", top30_nopoints_twoboxes_peryear , width = 297, height = 210, units = "mm")
#ggsave("top30_nopoints_twoboxes_peryear_20210504_nojittering_years.png", top30_nopoints_twoboxes_peryear , width = 297, height = 210, units = "mm")
In [ ]: