#!/bin/bash
#get high coverage species that also have FCM data available
awk -F ',' '$13 !~/NA/ {print $0}' high_coverage_assemblies.csv > high_coverage_assemblies.with_FCM.csv

#make a list of unique genera with high coverage assemblies that also have FCM data available
cut -d ',' -f 4 high_coverage_assemblies.with_FCM.csv | grep -v Species | cut -d ' ' -f 1 > genera.txt
gsed -i 's/"//g' genera.txt
sort genera.txt | uniq > unique_genera_FCM.txt
rm genera.txt 

#get one representative species for each genus (the first listed for each genus in the spreadsheet)
cat unique_genera_FCM.txt | while read line
do
genus=$line
grep -m 1 $genus high_coverage_assemblies.with_FCM.csv >> Insecta_genomes_with_FCM_data.unique_genera.temp
done

head -n 1 high_coverage_assemblies.csv > header
cat header Insecta_genomes_with_FCM_data.unique_genera.temp > Insecta_genomes_with_FCM_data.unique_genera.csv
rm header Insecta_genomes_with_FCM_data.unique_genera.temp

