#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 11 13:53:28 2025

@author: brown

This script reads an upper triangular matrix (emboss format) format and converts it into a square matrix
"""

import re
import csv
import os
import sys


# Read a upper triangle distance matrix with emboss format
def read_upper_triangle_matrix(file_name):
    matrix = []
    names = []

    with open(file_name, 'r') as f:
        next(f)  # Skip first line
        for line in f:
            tokens = line.strip().split()
            if len(tokens) < 3:
                continue  # Ignore lineas without emboss format

            name = tokens[-2]
            try:
                _ = int(tokens[-1])  # Check if the last element is a number
            except ValueError:
                continue
            try:
                # Check whether the first element (of the central diagonal) is 0
                diagonal = float(tokens[0])
                if diagonal != 0.0:
                    continue
            except ValueError:
                continue  # Skip if not numeric
            try:
                row = [float(x) for x in tokens[:-2]]
            except ValueError:
                print(f"Error when converting numbers in line: {line}")
                continue

            matrix.append(row)
            names.append(name)

    return matrix, names

# Convert upper triangle distance matrix to a square distance matrix
def upper_triangle_to_square(triangle):
    n = len(triangle) 
    square = [[0.0 for _ in range(n)] for _ in range(n)]

    for i in range(n):
        row = triangle[i]
        for j, value in enumerate(row):
            col = i + j
            if col >= n:
                raise ValueError(f"Row {i} has more elements than expected")
            square[i][col] = value
            if i != col:
                square[col][i] = value
    return square

# Save square distance matrix in a tsv format
def save_matrix(matrix, names, output):
    with open(output, 'w', newline='') as f:
        writer = csv.writer(f, delimiter='\t')
        writer.writerow(['ID'] + names)
        for name, row in zip(names, matrix):
            writer.writerow([name] + [f"{x:.2f}" for x in row])



triangle_input = sys.argv[1]
square_out = sys.argv[2]

triangle, names = read_upper_triangle_matrix(triangle_input)
square = upper_triangle_to_square(triangle)

save_matrix(square, names, square_out)


