#!/bin/bash

n_r_file=${1:-""}

[ ! -f "${n_r_file}" ] && echo "File with non_redundant sequences not found" && exit

num_groups="$(tail -n 1 ${n_r_file} |awk -F',' '{print $2}')"

>${n_r_file}_selected

for nr in $(seq 1 ${num_groups})
do

grep -v -f ${n_r_file}_selected ${n_r_file} | grep -E "^cluster,${nr}," >cluster_$$_NR${nr}
grep -v -f ${n_r_file}_selected ${n_r_file} | grep -E "^unique,${nr}," >unique_$$_NR${nr}

num_in_cluster="$(tail -n 1 cluster_$$_NR${nr} |awk -F',' '{print $3}')"

if [ "${num_in_cluster}" != "" ]
then
  # cluster
  for grp in $(seq 1 ${num_in_group})
  do
    # print best (lowest) score AND protRNA case from the group
    grep "cluster,${nr},${grp}," cluster_$$_NR${nr} |grep naked |sort -t"," -n -k20 |head -n1
    grep "cluster,${nr},${grp}," cluster_$$_NR${nr} |grep naked |sort -t"," -n -k20 |head -n1 |awk -F',' '{print $4}' >>${n_r_file}_selected

    grep "cluster,${nr},${grp}," cluster_$$_NR${nr} |grep -E "prot[DR]NA" |sort -t"," -n -k20 |head -n1
    grep "cluster,${nr},${grp}," cluster_$$_NR${nr} |grep -E "prot[DR]NA" |sort -t"," -n -k20 |head -n1 |awk -F',' '{print $4}' >>${n_r_file}_selected
  done
fi

if [ $(cat unique_$$_NR${nr} |wc -l) != "0" ]
then
  grep "unique,${nr}," unique_$$_NR${nr} |grep naked |sort -t"," -n -k20 |head -n1
  grep "unique,${nr}," unique_$$_NR${nr} |grep naked |sort -t"," -n -k20 |head -n1 |awk -F',' '{print $4}' >>${n_r_file}_selected

  grep "unique,${nr}," unique_$$_NR${nr} |grep -E "prot[DR]NA" |sort -t"," -n -k20 |head -n1
  grep "unique,${nr}," unique_$$_NR${nr} |grep -E "prot[DR]NA" |sort -t"," -n -k20 |head -n1 |awk -F',' '{print $4}' >>${n_r_file}_selected
fi

rm cluster_$$_NR${nr} unique_$$_NR${nr}

done
