# laura maria engist, 2025
# script to run mmseqs and create rocx files

from evotuner import constants
import subprocess
import resource
import sys

class CreateAlignmentsAndROCX:
    def __init__(self, mmseqs_file, output_file, gap_open, gap_extend, mat_file, sequences_fasta):
        self.mmseqs_file = mmseqs_file
        self.output_file = output_file
        self.gap_open = gap_open
        self.gap_extend = gap_extend
        self.mat_file = mat_file
        self.sequences_fasta = sequences_fasta
    
    ''''
    Create alignments with MMseqs2 and a rocx file for the benchmark with SCOP40
    '''
    def create(self):
        alignments_existing = self.run_mmseqs()
        if alignments_existing:
            self.benchmark_scop40(
                self.mmseqs_file, constants.AWK_FILE, constants.SCOP_LOOKUP_FILE, self.output_file
            )

    ''''
    Run MMseqs2 to create alignments with a memory limit of 50GB
    Returns path to .m8 file
    If it fails, returns an empty string
    '''
    def run_mmseqs(self):
        command = [constants.MMSEQS, "easy-search", self.sequences_fasta, self.sequences_fasta, self.mmseqs_file, "tmp", "--comp-bias-corr", "0", "--mask", "0", "-v", "2"]
        command.append("--gap-open")
        command.append(str(self.gap_open))
        command.append("--gap-extend")
        command.append(str(self.gap_extend))
        command.append("-s")
        command.append(str(5.7))
        command.append("-e")
        command.append(str(10000))
        
        if self.mat_file is not None:
            command.append("--sub-mat")
            command.append(self.mat_file)

        subprocess.check_call(command)
        MEM_LIMIT = 50 * 1024 * 1024 * 1024 # 50 GB
        try:
            proc = subprocess.Popen(command, preexec_fn=resource.setrlimit(resource.RLIMIT_AS, (MEM_LIMIT, MEM_LIMIT)))
            sys.stdout.flush()
            proc.wait()
        except subprocess.CalledProcessError as e:
            return ""

        return self.mmseqs_file

    ''''
    Create a rocx file for the benchmark with SCOP40
    '''
    def benchmark_scop40(self, mmseqs_file, awk_file, scop_lookup_file, output_file):
        subprocess.check_call(
            ["awk", "-f", awk_file, scop_lookup_file, mmseqs_file],
            stdout=open(output_file, "w"),
        )

create_alignments_and_rocx = CreateAlignmentsAndROCX(
    mmseqs_file=sys.argv[1], # path to .m8 file
    output_file=sys.argv[2], # output file to store .rocx output
    gap_open=sys.argv[3], # gap open penalty
    gap_extend=sys.argv[4], # gap extend penalty 
    mat_file=sys.argv[5], # matrix file as .out file
    sequences_fasta=sys.argv[6] # embedded sequences as .fasta file
)
create_alignments_and_rocx.create()