#!/bin/bash

while IFS='\t' read -r strain; do
	types=$(grep -F "$strains" - ./defense_finder_systems_wored100.tsv | cut -f2| sort -u)
	
	#Convert types into an array
	IFS=$'\n' read -d '' -r -a types_array <<< "$types"

	#Initialize an empty string for storing types present in the file
	freq_types=""

	#Loop

	for type in "${types_array[@]}"; do
        	if grep -Fq "$type" ./defense_finder_systems_wored100.tsv; then
            	# Append the type to the matched_types string
            		freq_types+="$type,"
       		fi
    	done
	
	# Remove trailing comma from matched_types
    	freq_types=${freq_types%,}

    	# Check if any types matched
    	if [ -n "$freq_types" ]; then
        	# Extract subtypes for the current strain
        	subtypes=$(grep -F "$strain" ./defense_finder_systems_wored100.tsv | cut -f3 | sort -u | tr '\n' ',' | sed 's/,$//')
        
        	# Output the strain, matched types, and subtypes to the output file
        	echo -e "$strain\t$freq_types\t$subtypes" >> st_sys_subsys.tsv
    	fi






#	if grep -Fq "$types" ./defense_systems_freq.id; then
#		subtypes=$( grep -F "$strains" ./defense_finder_systems.tsv | cut -f3 | sort | uniq | tr -s '\n' ',')
	#prots=$(grep $strain ./defense_finder_systems.tsv | cut -f6 )
	#IFS=',' read -ra  prot_array <<< "$prots"
	#genes=()
	#for  p  in "${prot_array[@]}"; do
	#	g=$(echo "$p" | grep -wf - ../clustered_proteins.tsv | cut -f1)
	#	genes+=("$g,")
	#done	
#		echo -e "$strain\t$types\t$subtypes" >> st_sys_subsys.tsv
#	fi
done < ../strains_nored100.ab
