After looking at summary statistics for the individual locus/populations, there were 8 loci that deviated from HWE in 3 or more pops.
Try removing them and see what happens.
Load libraries and data
library(stringi)
library(rubias)
library(tidyverse)
[30m── [1mAttaching packages[22m ───────────────────────────────────────────────────── tidyverse 1.2.1 ──[39m
[30m[32m✓[30m [34mggplot2[30m 3.3.2 [32m✓[30m [34mpurrr [30m 0.3.3
[32m✓[30m [34mtibble [30m 3.0.1 [32m✓[30m [34mdplyr [30m 1.0.4
[32m✓[30m [34mtidyr [30m 1.1.2 [32m✓[30m [34mstringr[30m 1.4.0
[32m✓[30m [34mreadr [30m 1.3.1 [32m✓[30m [34mforcats[30m 0.4.0[39m
package ‘ggplot2’ was built under R version 3.6.2package ‘tibble’ was built under R version 3.6.2package ‘tidyr’ was built under R version 3.6.2[30m── [1mConflicts[22m ──────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31mx[30m [34mtidyr[30m::[32mextract()[30m masks [34mraster[30m::extract()
[31mx[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31mx[30m [34mpurrr[30m::[32mis_null()[30m masks [34mtestthat[30m::is_null()
[31mx[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()
[31mx[30m [34mdplyr[30m::[32mmatches()[30m masks [34mtidyr[30m::matches(), [34mtestthat[30m::matches()
[31mx[30m [34mdplyr[30m::[32mselect()[30m masks [34mraster[30m::select()[39m
library(CKMRsim)
# read in genotype data from `01-aggregate-genos-and-gsi-to..`
no_hi_missers_clean <- read_csv("csv_outputs/baseline_no_hi_missers_clean.csv")
Parsed with column specification:
cols(
gtseq_run = [32mcol_double()[39m,
id = [31mcol_character()[39m,
NMFS_DNA_ID = [31mcol_character()[39m,
locus = [31mcol_character()[39m,
gene_copy = [32mcol_double()[39m,
allele = [31mcol_character()[39m,
depth = [32mcol_double()[39m,
allele.balance = [32mcol_double()[39m,
total_depth = [32mcol_double()[39m,
rank = [32mcol_double()[39m
)
# meta data
meta <- readRDS("data/meta-data-tibble.rds")
# loci that deviate
outliers <- read_csv("csv_outputs/loci_out_hwe.csv")
Parsed with column specification:
cols(
Locus = [31mcol_character()[39m,
n = [32mcol_double()[39m
)
# nofu ids for samples without duplicates, etc.
nofu_ids <- read_csv("data/nofu_ids_04022019.csv")
Parsed with column specification:
cols(
NMFS_DNA_ID = [31mcol_character()[39m,
group = [31mcol_character()[39m
)
With those we can just filter down the genos to the ones that we want, and then we can get it into the format required for CKMR.
# we will use this some more
kg2 <- no_hi_missers_clean %>%
select(NMFS_DNA_ID, locus, allele) %>%
mutate(Chrom = "GTseq") %>%
mutate(Pos = as.integer(factor(locus, levels = unique(locus)))) %>%
rename(Locus = locus,
Allele = allele) %>%
select(NMFS_DNA_ID, Chrom, Locus, Pos, Allele) %>%
ungroup()
# get the allele freqs
kg_ckmr_markers <- kg2 %>%
filter(!is.na(Allele)) %>% # it is vital to filter out the NAs at this stage
group_by(Chrom, Locus, Pos, Allele) %>%
summarise(counts = n()) %>%
group_by(Locus, Pos) %>%
mutate(Freq = counts / sum(counts)) %>%
select(-counts) %>%
mutate(AlleIdx = 1,
LocIdx = 1) %>%
reindex_markers(.)
`summarise()` has grouped output by 'Chrom', 'Locus', 'Pos'. You can override using the `.groups` argument.
# summary stats
kg_ckmr_markers %>%
group_by(Locus) %>%
tally() %>%
arrange(n) %>%
summarise(mean(n))
NA
Remove the 8 loci that are out of HWE in three or more of the four populations.
outliers$Locus <- gsub("_1", "", outliers$Locus)
# replace the weird characters in the locus name to match
no_hi_missers_clean$locus <- gsub(":", "_", no_hi_missers_clean$locus)
no_hi_missers_clean$locus <- gsub("-", "_", no_hi_missers_clean$locus)
# now anti-join the loci to remove and the no_hi_misser genotypes
no_hi_missers_loc <- no_hi_missers_clean %>%
anti_join(., outliers, by = c("locus" = "Locus"))
Check the number of samples
no_hi_missers_loc %>%
select(NMFS_DNA_ID) %>%
unique() %>%
left_join(nofu_ids) %>%
group_by(group) %>%
count()
Joining, by = "NMFS_DNA_ID"
Save the baseline file for use in the bycatch assignment:
no_hi_missers_loc %>%
write_csv("csv_outputs/baseline_no_hi_missers_clean_hwe.csv")
# first make integers of the alleles
alle_idxs <- no_hi_missers_loc %>%
dplyr::select(NMFS_DNA_ID, locus, gene_copy, allele) %>%
group_by(locus) %>%
mutate(alleidx = as.integer(factor(allele, levels = unique(allele)))) %>%
ungroup() %>%
arrange(NMFS_DNA_ID, locus, alleidx)
# select just the columns to retain and spread the alleles
alle_idx2 <- alle_idxs[,-4]
two_col <- alle_idx2 %>%
unite(loc, locus, gene_copy, sep = ".") %>%
spread(loc, alleidx)
# Now that the data are in the correct format,
# perform self-assignment on baseline colony samples
sa_fulmars <- self_assign(reference = rubias_genos2, gen_start_col = 5)
Summary Statistics:
517 Individuals in Sample
133 Loci: scaffold10064_72410_72710.1, scaffold1073_6683_6983.1, scaffold11643_6155_6455.1, scaffold11659_1129_1429.1, scaffold11671_31645_31945.1, scaffold12151_88327_88627.1, scaffold12549_7519_7819.1, scaffold12572_95987_96287.1, scaffold12669_859_1159.1, scaffold12702_30730_31030.1, scaffold12724_15413_15713.1, scaffold12724_22667_22967.1, scaffold12724_4243_4543.1, scaffold12850_47111_47411.1, scaffold12907_16926_17226.1, scaffold12968_40534_40834.1, scaffold13158_16102_16402.1, scaffold13635_62902_63202.1, scaffold13913_12506_12806.1, scaffold14169_98394_98694.1, scaffold14302_10932_11232.1, scaffold14413_3901_4201.1, scaffold14476_28059_28359.1, scaffold14624_104543_104843.1, scaffold1506_64767_65067.1, scaffold15072_25695_25995.1, scaffold15458_19677_19977.1, scaffold15799_15980_16280.1, scaffold15856_64760_65060.1, scaffold16535_98016_98316.1, scaffold16631_31251_31551.1, scaffold16928_88219_88519.1, scaffold17092_25910_26210.1, scaffold18623_5641_5941.1, scaffold1915_31691_31991.1, scaffold19203_31036_31336.1, scaffold19438_69367_69667.1, scaffold1960_103029_103329.1, scaffold19776_1877_2177.1, scaffold20701_3279_3579.1, scaffold21348_18338_18638.1, scaffold21570_152862_153162.1, scaffold21894_2131_2431.1, scaffold21958_14622_14922.1, scaffold22707_13937_14237.1, scaffold23574_25017_25317.1, scaffold23733_33119_33419.1, scaffold24280_32394_32694.1, scaffold24335_14436_14736.1, scaffold24517_21781_22081.1, scaffold25430_21993_22293.1, scaffold25460_35667_35967.1, scaffold2649_12219_12519.1, scaffold26984_8747_9047.1, scaffold27163_13781_14081.1, scaffold2844_22566_22866.1, scaffold28505_21263_21563.1, scaffold28953_18028_18328.1, scaffold30583_3628_3928.1, scaffold30801_4180_4480.1, scaffold31213_3358_3658.1, scaffold31218_16437_16737.1, scaffold31581_33016_33316.1, scaffold32217_1745_2045.1, scaffold3263_8980_9280.1, scaffold3336_3819_4119.1, scaffold34234_7230_7530.1, scaffold34665_436_736.1, scaffold3469_2040_2340.1, scaffold34858_40611_40911.1, scaffold35021_6298_6598.1, scaffold3580_28673_28973.1, scaffold3748_3065_3365.1, scaffold37901_363_663.1, scaffold38389_5270_5570.1, scaffold38395_29487_29787.1, scaffold38398_6375_6675.1, scaffold3849_18022_18322.1, scaffold38904_5603_5903.1, scaffold38924_25817_26117.1, scaffold38997_7977_8277.1, scaffold391_998_1298.1, scaffold39244_11532_11832.1, scaffold39311_6143_6443.1, scaffold39606_9134_9434.1, scaffold39903_54057_54357.1, scaffold40040_9181_9481.1, scaffold40205_75784_76084.1, scaffold40437_4313_4613.1, scaffold41025_7850_8150.1, scaffold41491_6238_6538.1, scaffold42223_10380_10680.1, scaffold42370_758_1058.1, scaffold42423_2710_3010.1, scaffold42976_9097_9397.1, scaffold4335_28587_28887.1, scaffold43607_111864_112164.1, scaffold4364_33806_34106.1, scaffold44117_5212_5512.1, scaffold44250_17845_18145.1, scaffold44418_8527_8827.1, scaffold44855_55004_55304.1, scaffold44924_15256_15556.1, scaffold4595_103685_103985.1, scaffold46053_2758_3058.1, scaffold46494_5800_6100.1, scaffold46866_4310_4610.1, scaffold47695_45286_45586.1, scaffold478_2849_3149.1, scaffold48136_11747_12047.1, scaffold48982_19639_19939.1, scaffold51601_2552_2852.1, scaffold5538_31526_31826.1, scaffold5692_44_344.1, scaffold5965_24143_24443.1, scaffold6092_12052_12352.1, scaffold62_7494_7794.1, scaffold6266_78615_78915.1, scaffold6871_35099_35399.1, scaffold6960_6239_6539.1, scaffold7374_35025_35325.1, scaffold7567_99171_99471.1, scaffold7925_21844_22144.1, scaffold8143_77151_77451.1, scaffold8667_42523_42823.1, scaffold8861_15537_15837.1, scaffold8861_3177_3477.1, scaffold8910_49471_49771.1, scaffold90_29714_30014.1, scaffold9194_91800_92100.1, scaffold9205_103949_104249.1, scaffold9516_126012_126312.1, scaffold9981_29196_29496.1
4 Reporting Units: Semidi, Chagulak, Pribilof, StMatthew
4 Collections: Semidi, Chagulak, Pribilof, StMatthew
1.60% of allelic data identified as missing
# summarize repunit results
sa_to_repu <- sa_fulmars %>%
group_by(indiv, collection, repunit) %>%
top_n(1, scaled_likelihood) # just the top assignment for each sample
# summary of assignments without a likelihood threshold
assign_no_thres <- sa_to_repu %>%
group_by(repunit, inferred_repunit) %>%
tally()
Summarize assignments with a 50% likelihood threshold
# 50% likelihood threshold
thres50 <- sa_fulmars %>%
group_by(indiv, collection, repunit) %>%
filter(scaled_likelihood > 0.5) %>%
group_by(repunit, inferred_repunit) %>%
tally() %>%
rename(threshold_50 = n)
Summarize assignments with a 90% likelihood threshold
# 90% likelihood threshold
thres90 <- sa_fulmars %>%
group_by(indiv, collection, repunit) %>%
filter(scaled_likelihood > 0.9) %>%
group_by(repunit, inferred_repunit) %>%
tally() %>%
rename(threshold_90 = n)
Combine those
assign_no_thres %>%
left_join(., thres50) %>%
left_join(., thres90)
Joining, by = c("repunit", "inferred_repunit")
Joining, by = c("repunit", "inferred_repunit")
I need to remove the samples that I used for RAD-seq from my self-assignment because they were part of the ascertainment panel for the markers.
I remove them after doing the self-assignment.
# read in list of RAD samples
rads <- read_csv("data/RAD-67-samples.csv")
Parsed with column specification:
cols(
Individual = [31mcol_character()[39m
)
ids_to_remove <- rads %>%
left_join(., meta, by = c("Individual" = "SAMPLE_ID")) %>%
as_tibble() %>%
dplyr::select(NMFS_DNA_ID)
# That is the list of NMFS_IDs to remove.
# get the self-assignment results
sa_fulmars2 <- sa_fulmars %>%
anti_join(., ids_to_remove, by = c("indiv" = "NMFS_DNA_ID"))
# That leaves me with all the samples that were not used in ascertainment
Now look at the assignments without the ascertainment samples
# summarize repunit results
sa_to_repu2 <- sa_fulmars2 %>%
group_by(indiv, collection, repunit) %>%
top_n(1, scaled_likelihood) # just the top assignment for each sample
# summary of assignments with a likelihood threshold of 0.9
sa_to_repu2 %>%
filter(scaled_likelihood > 0.9) %>%
group_by(repunit, inferred_repunit) %>%
tally() %>%
ungroup() %>%
group_by(repunit) %>%
mutate(total = sum(n)) %>%
mutate(correct = ifelse(repunit == inferred_repunit, n/total, 0)) %>%
ungroup() %>%
filter(repunit == inferred_repunit)
Summary: overall - 76.7% accurately assigned at the 90% threshold - 91.4% samples correctly assigned
Take a look at z-score outliers:
sa_to_repu2 %>%
ggplot(aes(x = z_score)) +
geom_density() +
facet_wrap(.~collection)
Generate the full assignment matrix (without the ascertainment samples) and remove outliers based on z-scores
# no assignment threshold
sa_to_repu2 %>%
select(-missing_loci) %>%
#filter(z_score < 3 & z_score > -3) %>% # comment or uncomment to remove outliers
group_by(collection, inferred_collection) %>%
tally() %>%
pivot_wider(names_from = inferred_collection, values_from = n) %>%
write_csv("csv_outputs/colony_assignment_nothreshold.csv")
# 90% assignment
sa_to_repu2 %>%
select(-missing_loci) %>%
filter(scaled_likelihood > 0.9) %>% # 90% threshold
#filter(z_score < 3 & z_score > -3) %>% # remove outliers
group_by(collection, inferred_collection) %>%
tally() %>%
pivot_wider(names_from = inferred_collection, values_from = n) %>%
write_csv("csv_outputs/colony_assignment_90perc_threshold.csv")
**I opted not to remove samples with z-scores > 3 and < -3 because these samples were collected on the breeding colonies and were not removed prior to GSI. Furthermore, the presence of more of these outliers for the Chagulak population could mean either a) there are birds from unsampled colonies that were collected on Chagulak, or b) with smaller sample sizes, the likelihood of sampling an incomplete distribution increases, and thus, more outliers occur.
Go back to the rubias input
set.seed(765) # need to set a seed to make this reproducible!
rubias_genos_36 <- rubias_genos2 %>%
group_by(collection) %>%
sample_n(36, replace = FALSE) %>%
ungroup() # Chagulak has 36 samples - so make that the equalizer
Now go ahead with self-assignment using that dataset
# perform self-assignment on reduced number of colony samples
assign36 <- self_assign(reference = rubias_genos_36, gen_start_col = 5)
Summary Statistics:
144 Individuals in Sample
133 Loci: scaffold10064_72410_72710.1, scaffold1073_6683_6983.1, scaffold11643_6155_6455.1, scaffold11659_1129_1429.1, scaffold11671_31645_31945.1, scaffold12151_88327_88627.1, scaffold12549_7519_7819.1, scaffold12572_95987_96287.1, scaffold12669_859_1159.1, scaffold12702_30730_31030.1, scaffold12724_15413_15713.1, scaffold12724_22667_22967.1, scaffold12724_4243_4543.1, scaffold12850_47111_47411.1, scaffold12907_16926_17226.1, scaffold12968_40534_40834.1, scaffold13158_16102_16402.1, scaffold13635_62902_63202.1, scaffold13913_12506_12806.1, scaffold14169_98394_98694.1, scaffold14302_10932_11232.1, scaffold14413_3901_4201.1, scaffold14476_28059_28359.1, scaffold14624_104543_104843.1, scaffold1506_64767_65067.1, scaffold15072_25695_25995.1, scaffold15458_19677_19977.1, scaffold15799_15980_16280.1, scaffold15856_64760_65060.1, scaffold16535_98016_98316.1, scaffold16631_31251_31551.1, scaffold16928_88219_88519.1, scaffold17092_25910_26210.1, scaffold18623_5641_5941.1, scaffold1915_31691_31991.1, scaffold19203_31036_31336.1, scaffold19438_69367_69667.1, scaffold1960_103029_103329.1, scaffold19776_1877_2177.1, scaffold20701_3279_3579.1, scaffold21348_18338_18638.1, scaffold21570_152862_153162.1, scaffold21894_2131_2431.1, scaffold21958_14622_14922.1, scaffold22707_13937_14237.1, scaffold23574_25017_25317.1, scaffold23733_33119_33419.1, scaffold24280_32394_32694.1, scaffold24335_14436_14736.1, scaffold24517_21781_22081.1, scaffold25430_21993_22293.1, scaffold25460_35667_35967.1, scaffold2649_12219_12519.1, scaffold26984_8747_9047.1, scaffold27163_13781_14081.1, scaffold2844_22566_22866.1, scaffold28505_21263_21563.1, scaffold28953_18028_18328.1, scaffold30583_3628_3928.1, scaffold30801_4180_4480.1, scaffold31213_3358_3658.1, scaffold31218_16437_16737.1, scaffold31581_33016_33316.1, scaffold32217_1745_2045.1, scaffold3263_8980_9280.1, scaffold3336_3819_4119.1, scaffold34234_7230_7530.1, scaffold34665_436_736.1, scaffold3469_2040_2340.1, scaffold34858_40611_40911.1, scaffold35021_6298_6598.1, scaffold3580_28673_28973.1, scaffold3748_3065_3365.1, scaffold37901_363_663.1, scaffold38389_5270_5570.1, scaffold38395_29487_29787.1, scaffold38398_6375_6675.1, scaffold3849_18022_18322.1, scaffold38904_5603_5903.1, scaffold38924_25817_26117.1, scaffold38997_7977_8277.1, scaffold391_998_1298.1, scaffold39244_11532_11832.1, scaffold39311_6143_6443.1, scaffold39606_9134_9434.1, scaffold39903_54057_54357.1, scaffold40040_9181_9481.1, scaffold40205_75784_76084.1, scaffold40437_4313_4613.1, scaffold41025_7850_8150.1, scaffold41491_6238_6538.1, scaffold42223_10380_10680.1, scaffold42370_758_1058.1, scaffold42423_2710_3010.1, scaffold42976_9097_9397.1, scaffold4335_28587_28887.1, scaffold43607_111864_112164.1, scaffold4364_33806_34106.1, scaffold44117_5212_5512.1, scaffold44250_17845_18145.1, scaffold44418_8527_8827.1, scaffold44855_55004_55304.1, scaffold44924_15256_15556.1, scaffold4595_103685_103985.1, scaffold46053_2758_3058.1, scaffold46494_5800_6100.1, scaffold46866_4310_4610.1, scaffold47695_45286_45586.1, scaffold478_2849_3149.1, scaffold48136_11747_12047.1, scaffold48982_19639_19939.1, scaffold51601_2552_2852.1, scaffold5538_31526_31826.1, scaffold5692_44_344.1, scaffold5965_24143_24443.1, scaffold6092_12052_12352.1, scaffold62_7494_7794.1, scaffold6266_78615_78915.1, scaffold6871_35099_35399.1, scaffold6960_6239_6539.1, scaffold7374_35025_35325.1, scaffold7567_99171_99471.1, scaffold7925_21844_22144.1, scaffold8143_77151_77451.1, scaffold8667_42523_42823.1, scaffold8861_15537_15837.1, scaffold8861_3177_3477.1, scaffold8910_49471_49771.1, scaffold90_29714_30014.1, scaffold9194_91800_92100.1, scaffold9205_103949_104249.1, scaffold9516_126012_126312.1, scaffold9981_29196_29496.1
4 Reporting Units: Chagulak, Pribilof, Semidi, StMatthew
4 Collections: Chagulak, Pribilof, Semidi, StMatthew
1.67% of allelic data identified as missing
Remove ascertainment samples
# get the self-assignment results
sa_36_no_ascert <- assign36 %>%
anti_join(., ids_to_remove, by = c("indiv" = "NMFS_DNA_ID"))
# summarize repunit results
top_assign36 <- sa_36_no_ascert %>%
group_by(indiv, collection, repunit) %>%
top_n(1, scaled_likelihood) # just the top assignment for each sample
# summary of assignments without a likelihood threshold
assign36_no_thres <- top_assign36 %>%
group_by(repunit, inferred_repunit) %>%
tally()
Summarize assignments with a 50% likelihood threshold
# 50% likelihood threshold
thres50_36samples <- top_assign36 %>%
group_by(indiv, collection, repunit) %>%
filter(scaled_likelihood > 0.5) %>%
group_by(repunit, inferred_repunit) %>%
tally() %>%
rename(threshold_50 = n)
Summarize assignments with a 90% likelihood threshold
# 90% likelihood threshold
thres90_36samples <- top_assign36 %>%
group_by(indiv, collection, repunit) %>%
filter(scaled_likelihood > 0.9) %>%
group_by(repunit, inferred_repunit) %>%
tally() %>%
rename(threshold_90 = n)
Combine those
assign36_no_thres %>%
left_join(., thres50_36samples) %>%
left_join(., thres90_36samples)
Joining, by = c("repunit", "inferred_repunit")
Joining, by = c("repunit", "inferred_repunit")
# summary of assignments without a likelihood threshold
top_assign36 %>%
#filter(scaled_likelihood > 0.9) %>%
group_by(repunit, inferred_repunit) %>%
tally() %>%
ungroup() %>%
group_by(repunit) %>%
mutate(total = sum(n)) %>%
mutate(correct = ifelse(repunit == inferred_repunit, n/total, 0)) %>%
ungroup() %>%
filter(repunit == inferred_repunit) #%>%
# ungroup() %>%
# summarise(mean(correct)) # calculate overall % accuracy
Basically, the question is whether the assignment accuracy improves for Chagulak and St. Matthew when the subsample of 36 samples is used for self-assignment.
And the answer is that at the 90% likelihood threshold, the full data set rather than the downsampled data set actually performs (slightly) better, and considerably better for the Pribs and Semidis.
Thinking more about the z-scores and the long temporal range encompassed by the colony samples, I wonder if there are temporal patterns of assignment that pop out.
meta %>%
filter(str_detect(COLLECTION_DATE, "93"))
NA
30 samples from Chagulak from 1992 66 samples from the Semidis (Chowiet) from 1993
Both are potentially interesting cases.
meta %>%
filter(`Marine::LOCATION_COMMENTS_M` %in% c("Chowiet")) %>%
group_by(COLLECTION_DATE) %>%
tally()
286 samples collected over 10 dates. The one listed as 2017 is certainly an error in the formatting of the Excel file…
But let’s just look at the difference between the 2004 and 1993 samples. That’s an 11-year interval.
What if I do an assignment test with just those two time period groups?
# looks like there's not temporal structure - at least not in the markers we're using
top_semidi_assigned %>%
ggplot(aes(x = collection, fill = inferred_collection)) +
geom_bar(stat = "count")
More questions about the genetic stability of the colonies over the 25 year sampling period
temporal_test <- self_assign(reference = rubias_temporal_genos, gen_start_col = 5)
Summary Statistics:
483 Individuals in Sample
133 Loci: scaffold10064_72410_72710.1, scaffold1073_6683_6983.1, scaffold11643_6155_6455.1, scaffold11659_1129_1429.1, scaffold11671_31645_31945.1, scaffold12151_88327_88627.1, scaffold12549_7519_7819.1, scaffold12572_95987_96287.1, scaffold12669_859_1159.1, scaffold12702_30730_31030.1, scaffold12724_15413_15713.1, scaffold12724_22667_22967.1, scaffold12724_4243_4543.1, scaffold12850_47111_47411.1, scaffold12907_16926_17226.1, scaffold12968_40534_40834.1, scaffold13158_16102_16402.1, scaffold13635_62902_63202.1, scaffold13913_12506_12806.1, scaffold14169_98394_98694.1, scaffold14302_10932_11232.1, scaffold14413_3901_4201.1, scaffold14476_28059_28359.1, scaffold14624_104543_104843.1, scaffold1506_64767_65067.1, scaffold15072_25695_25995.1, scaffold15458_19677_19977.1, scaffold15799_15980_16280.1, scaffold15856_64760_65060.1, scaffold16535_98016_98316.1, scaffold16631_31251_31551.1, scaffold16928_88219_88519.1, scaffold17092_25910_26210.1, scaffold18623_5641_5941.1, scaffold1915_31691_31991.1, scaffold19203_31036_31336.1, scaffold19438_69367_69667.1, scaffold1960_103029_103329.1, scaffold19776_1877_2177.1, scaffold20701_3279_3579.1, scaffold21348_18338_18638.1, scaffold21570_152862_153162.1, scaffold21894_2131_2431.1, scaffold21958_14622_14922.1, scaffold22707_13937_14237.1, scaffold23574_25017_25317.1, scaffold23733_33119_33419.1, scaffold24280_32394_32694.1, scaffold24335_14436_14736.1, scaffold24517_21781_22081.1, scaffold25430_21993_22293.1, scaffold25460_35667_35967.1, scaffold2649_12219_12519.1, scaffold26984_8747_9047.1, scaffold27163_13781_14081.1, scaffold2844_22566_22866.1, scaffold28505_21263_21563.1, scaffold28953_18028_18328.1, scaffold30583_3628_3928.1, scaffold30801_4180_4480.1, scaffold31213_3358_3658.1, scaffold31218_16437_16737.1, scaffold31581_33016_33316.1, scaffold32217_1745_2045.1, scaffold3263_8980_9280.1, scaffold3336_3819_4119.1, scaffold34234_7230_7530.1, scaffold34665_436_736.1, scaffold3469_2040_2340.1, scaffold34858_40611_40911.1, scaffold35021_6298_6598.1, scaffold3580_28673_28973.1, scaffold3748_3065_3365.1, scaffold37901_363_663.1, scaffold38389_5270_5570.1, scaffold38395_29487_29787.1, scaffold38398_6375_6675.1, scaffold3849_18022_18322.1, scaffold38904_5603_5903.1, scaffold38924_25817_26117.1, scaffold38997_7977_8277.1, scaffold391_998_1298.1, scaffold39244_11532_11832.1, scaffold39311_6143_6443.1, scaffold39606_9134_9434.1, scaffold39903_54057_54357.1, scaffold40040_9181_9481.1, scaffold40205_75784_76084.1, scaffold40437_4313_4613.1, scaffold41025_7850_8150.1, scaffold41491_6238_6538.1, scaffold42223_10380_10680.1, scaffold42370_758_1058.1, scaffold42423_2710_3010.1, scaffold42976_9097_9397.1, scaffold4335_28587_28887.1, scaffold43607_111864_112164.1, scaffold4364_33806_34106.1, scaffold44117_5212_5512.1, scaffold44250_17845_18145.1, scaffold44418_8527_8827.1, scaffold44855_55004_55304.1, scaffold44924_15256_15556.1, scaffold4595_103685_103985.1, scaffold46053_2758_3058.1, scaffold46494_5800_6100.1, scaffold46866_4310_4610.1, scaffold47695_45286_45586.1, scaffold478_2849_3149.1, scaffold48136_11747_12047.1, scaffold48982_19639_19939.1, scaffold51601_2552_2852.1, scaffold5538_31526_31826.1, scaffold5692_44_344.1, scaffold5965_24143_24443.1, scaffold6092_12052_12352.1, scaffold62_7494_7794.1, scaffold6266_78615_78915.1, scaffold6871_35099_35399.1, scaffold6960_6239_6539.1, scaffold7374_35025_35325.1, scaffold7567_99171_99471.1, scaffold7925_21844_22144.1, scaffold8143_77151_77451.1, scaffold8667_42523_42823.1, scaffold8861_15537_15837.1, scaffold8861_3177_3477.1, scaffold8910_49471_49771.1, scaffold90_29714_30014.1, scaffold9194_91800_92100.1, scaffold9205_103949_104249.1, scaffold9516_126012_126312.1, scaffold9981_29196_29496.1
11 Reporting Units: Semidi_2004, Semidi_2001, Chagulak_2002, Chagulak_2004, Pribilof_2004, Pribilof_2002, StMatthew_2002, Semidi_2003, Semidi_2017, Semidi_1993, Pribilof_2003
11 Collections: Semidi_2004, Semidi_2001, Chagulak_2002, Chagulak_2004, Pribilof_2004, Pribilof_2002, StMatthew_2002, Semidi_2003, Semidi_2017, Semidi_1993, Pribilof_2003
1.54% of allelic data identified as missing
Now just look at the >90% assignment threshold…