### Script to prepare a matrix and use it in the elaboration of some figures or in the ML (add defense system types and frequent MLST to each genome)
import sys
import pandas as pd
import re
import numpy as np

# Loading data
mx_file = sys.argv[1]   #"/home/user/matrix_90_ab_1prcst_cl.tsv" ||  "/home/user/matrix_90_ab_freqmlst8_cl.tsv" 
defsys_strains_file= "/home/user/st_sys_subsys.tsv" # Defense system types 
mlst_file = "/home/user/mlst_ab_freq_wored100.tsv" # Freq MLST file

matrix = pd.read_csv(mx_file,sep='\t')
defsys_strains = pd.read_csv(defsys_strains_file, sep='\t') 
mlst_strains = pd.read_csv(mlst_file, sep='\t',names=["strain","mlst"]) 

# Build a defense system and freq MLST dictionary
types = {}
mlst =  {}
for idx,row in defsys_strains.iterrows():
    st = row.iloc[0]
    types_string = row.iloc[1]
    types[st] = types_string

    
for idx,row in mlst_strains.iterrows():
    st = row['strain']
    mlst[st] = str(row['mlst'])

for idx,row in matrix.iterrows():
    if matrix.iloc[idx,0] in types.keys(): #and matrix.iloc[idx,0] in mlst.keys(): # If you want the output file filtered by the frequent MLSTs 
        matrix.at[idx,'types'] = types[str(matrix.iloc[idx,0])]
        #matrix.at[idx,'mlst'] = mlst[str(matrix.iloc[idx,0])] # If you want add MLST in a new column to each genome
    else:
        pass

matrix.to_csv('/home/user/matrix_90_ab_ml_1prcst_nored100_cl.tsv',sep='\t')

