#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Mar  1 15:48:51 2024

@author: brown
"""

# CO-OCCURRENCE MATRIX

import pandas as pd
import numpy as np

# Loading files
path = "/home/user/st_sys_subsys.tsv" # Input matrix

with open('/home/user/defense_systems_freq.id') as defense_file:
    def_freq = defense_file.read().splitlines() # Systems freq in at least 1% of genomes
    
input= pd.read_csv(path, sep = '\t', names= ['strains','types','subtypes'])
input2 = input.dropna()
df_list = []
for t in input2['types'].tolist():   
    t_l = t.split(",")
    df_list += t_l



df_uniq = list(set(df_list)) 
df_def = [i for i in df_uniq if i in def_freq] # Filter frequent defense systems

# Dictionary to count the cooccurrences
cooccurrences = {s: {s: 0 for s in df_def} for s in df_def}


# Count the cooccurrences
for _, row in input2.iterrows():
    systems = row['types'].split(',')
    systems = [system.strip() for system in systems]  # Remove blank spaces
    systems = [i for i in systems if i] 
    systems_set =set(systems) # Unique
    for system1 in systems_set:
        for system2 in systems_set:
            if system1 in df_def and system2 in df_def:
                #print(row['strains'],system1,system2)
                cooccurrences[system1][system2] += 1
                
                    

matrix_coor = pd.DataFrame(cooccurrences)

# Ratios of cooccurrences
# for i,r in matrix_coor.iterrows():
#     for c in matrix_coor.columns:
#         if i == c: 
#             pass
#         else:
#             matriz_coor[i][c] = r[c] / r[i]


# Sort the matrix
order_defsys = ['SspBCDE','RM','Dnd','BREX','Cas','AbiH','RosmerTA','SanaTA','CapRel','Rst_PARIS','Retron','PrrC','DarTG','CBASS','Gao_Qat','Gabija','Septu','Shedu','Lamassu-Fam','Menshen','Mokosh','Druantia','PD-T7-5','PD-T4-5']
matrix_coor = matrix_coor.reindex(index=order_defsys,columns=order_defsys)

#matriz_coor = matriz_coor.where(matriz_coor < 1, 1)

matrix_coor.to_csv("/home/user/matrix_coocurrences_fig2.tsv", sep="\t")

