#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Nov  5 21:52:48 2024

@author: brown

Script to build the presence/absence matrix of defense systems and groups
"""
import pandas as pd
import numpy as np

# Loading data
defsys_file = "/home/user/defsys_strains_subtypes.tsv" # Defense systems (first and third column of 'defense_finder_systems_wored100.tsv')
with open('/home/user/ggtree_g1.ab') as f:       # Group 1 from the phylogeny
    group1 = f.read().splitlines()      
    
with open('/home/user/ggtree_g2.ab') as f:       # Group 2 from the phylogeny
    group2 = f.read().splitlines()

defsys = pd.read_csv(defsys_file, sep="\t")
defsys_nan = defsys.dropna()

defsys_list = ['R-M_Type_I','R-M_Type_II','R-M_Type_III','R-M_Type_IV','Cas','SspBCDE','Gao_Qat','Gabija','CBASS','RosmerTA','PD-T4-5','PD-T7-5','PD-Lambda-2']

# Building output dataframes
matrix = pd.DataFrame(columns =defsys_list)
matrix_groups =pd.DataFrame(columns =['Groups'])

for st in group1:
    matrix_groups.at[st,'Groups'] = "Group 1"
for st in group2:
    matrix_groups.at[st,'Groups'] = "Group 2"
    
for idx, row in defsys_nan.iterrows():
    for i in defsys_list:
        if i in row[1]:
            matrix.at[row[0],i] = i
        else: 
            pass
        
# Saving output files
matrix_groups.to_csv('/home/user/groups.tsv',sep="\t")
matrix.to_csv('/home/user/defsys_presaus_ann.tsv', sep="\t")
