###### CONDA ENVIRONMENT for Slim Tree: conda install -c conda-forge -c bioconda biopython r-bio3d snakemake biopython
######


import os
import os.path
from os import listdir
from os.path import isfile, join, isdir
from Bio import SeqIO



################################################################################################################ SET UP ################################################################################################################################################################################################################################

##### How many repetitions
SAMPLES=[x for x in range(0,1000)]



##### Check if folders exist
# if os.path.isdir('Entropies_Hominids')==True:    
    # shutil.rmtree('Entropies_Hominids', ignore_errors=True, onerror=None)
# os.mkdir('Entropies_Hominids')




############################################################################################################################################################################################################################################################



###### One Rule to Rule them ALL

rule all:
    input:
        # 'Entropies_Hominids/Combined_Entropies'
        expand('Complete_Runs/{sample}/{sample}_Rates',sample=SAMPLES),
        expand('Complete_Runs/{sample}/{sample}_Entropy',sample=SAMPLES),
        'Entropies_Hominids/Combined_Entropies',
        'Entropies_Hominids/Combined_Rates'





######################################################################################################################
#### Step 1 - Sample 1 individual per species for every protein, generate an entropy for every protein


#### Step 1
rule Sample_Dataset_Hominids:
    input:
        'Scripts/Calc_Entropy_of_Fasta.r',
        'Scripts/Sort_Data_For_Entropy_Calc.py'
    output:
        temp('Complete_Runs/{sample}/{sample}_Run')
    threads:1
    run:
        

        
        ##### Check if Sampling folder exists
        if os.path.isdir('Complete_Runs/')==False:
            shell('mkdir Complete_Runs')
            
        if os.path.isdir(F'Complete_Runs/{wildcards.sample}/')==False:
            shell(F'mkdir Complete_Runs/{wildcards.sample}')
        
        ### Clean up folder for sampling and fodler of output entropies
        shell(F'rm -rf Complete_Runs/{wildcards.sample}/*')

        
        ### Run once to create a 5-sample dataset ( Orang-Gor-Chimp-Hum-ArchHum)
        shell(F'python3 Scripts/Sort_Data_For_Entropy_Calc.py Complete_Runs/{wildcards.sample}/')
        
        ### Mark a complete
        shell(F'touch Complete_Runs/{wildcards.sample}/{wildcards.sample}_Run')
        

rule Entropy_reset:
    input:
        expand('Complete_Runs/{sample}/{sample}_Run',sample=SAMPLES)
    output:
        temp('Complete_Runs/Entropy_Cleanup')
    threads:1
    run:
    #### For Hominids
        shell('rm -rf Entropies_Hominids')
        shell('mkdir Entropies_Hominids')
        shell('touch Complete_Runs/Entropy_Cleanup')




#### Step 2
#### Calculate Entropy

rule Calcualte_Entropy_Hominids:
    input:
        'Complete_Runs/{sample}/{sample}_Run',
        'Complete_Runs/Entropy_Cleanup'
    output:
        temp('Complete_Runs/{sample}/{sample}_Entropy') #make temp
    threads:1
    run:
        #### Run entropy calculation for each protein
        FILES=os.listdir(F'Complete_Runs/{wildcards.sample}/')
        FILES=[x for x in FILES if '.fa' in x]
        for X in FILES:
            shell(F'Rscript Scripts/Calc_Entropy_of_Fasta.r {X} Complete_Runs/{wildcards.sample}/ Entropies_Hominids') #### Change directory (3rd argument) if changing set
        
        ### Checkmark that it was run
        shell(F'touch Complete_Runs/{wildcards.sample}/{wildcards.sample}_Entropy')



################# Step 2.5
#### Calculate Rates

rule Create_Rate_Folders:
    input:
        expand('Complete_Runs/{sample}/{sample}_Run',sample=SAMPLES),
        'Complete_Runs/Entropy_Cleanup'
    output:
        'Complete_Runs/Folders_for_Rates' 
    threads:1
    run:
        #### Run entropy calculation for each protein
        FILES=os.listdir(F'Complete_Runs/1/')
        FILES=[x for x in FILES if '.fa' in x]
        

        
        for X in FILES:
            PRTN_NAME=X.split('.')[0]

            if os.path.isdir(F"Entropies_Hominids/{PRTN_NAME}") == False:
                shell(F"mkdir Entropies_Hominids/{PRTN_NAME}")
            

        ### Checkmark that it was run
        shell(F'touch Complete_Runs/Folders_for_Rates')



################# Step 2.5
#### Calculate Rates

rule Calcualte_Rates_Hominids:
    input:
        'Complete_Runs/{sample}/{sample}_Run',
        'Complete_Runs/Entropy_Cleanup',
        'Complete_Runs/Folders_for_Rates'
    output:
        temp('Complete_Runs/{sample}/{sample}_Rates') ### Make temp
    threads:1
    run:
        #### Run entropy calculation for each protein
        FILES=os.listdir(F'Complete_Runs/{wildcards.sample}/')
        FILES=[x for x in FILES if '.fa' in x]
        
        
        
        for X in FILES:
            PRTN_NAME=X.split('.')[0]
            shell(F"rate4site -s Complete_Runs/{wildcards.sample}/{X} -o Entropies_Hominids/{PRTN_NAME}/{wildcards.sample}.output")
            
        ### Checkmark that it was run
        shell(F'touch Complete_Runs/{wildcards.sample}/{wildcards.sample}_Rates')





#######################################################################################################################
##### Step 3 - Combine Entropy for every run for each protein



##### Step 3
rule Combine_Entropies_Per_Protein_Hominids:
    input:
        expand('Complete_Runs/{sample}/{sample}_Run',sample=SAMPLES),
        expand('Complete_Runs/{sample}/{sample}_Entropy',sample=SAMPLES),
        expand('Complete_Runs/{sample}/{sample}_Rates',sample=SAMPLES)
        
    output:
        'Entropies_Hominids/Combined_Entropies',
        'Entropies_Hominids/Combined_Rates'
    threads:1
    run:
        shell('python3 Scripts/Combine_Entropies.py Entropies_Hominids Entropies_Hominids/Combined_Entropies')
        shell('python3 Scripts/Combine_Rates.py Entropies_Hominids Entropies_Hominids/Combined_Rates')
        shell('rm -rf Complete_Runs')
        # shell('rm -rf Entropies_Hominids/Protein_Lengths.txt')
        





