#!/usr/bin/env python3
import sys
import re
import os

def main():
    # 1. Check for the correct number of command-line arguments
    if len(sys.argv) != 3:
        print("Usage: python pb_incrementer.py <starting_number> <filename>")
        sys.exit(1)

    # 2. Parse the starting digit
    try:
        start_num = int(sys.argv[1])
    except ValueError:
        print(f"Error: '{sys.argv[1]}' is not a valid number.")
        sys.exit(1)

    # 3. Read the input file
    input_filename = sys.argv[2]
    if not os.path.exists(input_filename):
        print(f"Error: The file '{input_filename}' does not exist.")
        sys.exit(1)

    with open(input_filename, 'r', encoding='utf-8') as f:
        content = f.read()

    # This counter will track how many actual <pb> tags we have processed
    counter = 0

    def process_tag(match):
        nonlocal counter
        
        comment = match.group(1)
        pb_tag = match.group(2)

        # If it's a comment, return it exactly as it is (ignore it)
        if comment:
            return comment
        
        # If it's a <pb> tag, process the sequence
        if pb_tag:
            # Calculate the sequence: X, X, X+1, X+1, X+2, X+2...
            current_facs = start_num + (counter // 2)
            counter += 1

            # Check if the tag already has a facs attribute
            if re.search(r'\bfacs=["\'][^"\']*["\']', pb_tag):
                # Replace the existing value with our new number
                new_tag = re.sub(
                    r'(\bfacs=["\'])[^"\']*?(["\'])', 
                    rf'\g<1>{current_facs}\g<2>', 
                    pb_tag
                )
            else:
                # Insert the facs attribute at the end of the tag
                if pb_tag.endswith('/>'):
                    # e.g., <pb n="1v"/> becomes <pb n="1v" facs="X"/>
                    new_tag = pb_tag[:-2] + f' facs="{current_facs}"/>'
                else:
                    # e.g., <pb n="1v"> becomes <pb n="1v" facs="X">
                    new_tag = pb_tag[:-1] + f' facs="{current_facs}">'
            
            return new_tag

    # 4. Regex that finds comments OR <pb> tags
    # Group 1 matches comments (which we skip)
    # Group 2 matches <pb> tags (which we process)
    pattern = re.compile(r'()|(<pb\b[^>]*>)', re.DOTALL)

    # Apply the replacement across the whole file
    modified_content = pattern.sub(process_tag, content)

    # 5. Save the output to a new file prefixed with "modified_"
    base_name = os.path.basename(input_filename)
    dir_name = os.path.dirname(input_filename)
    output_filename = os.path.join(dir_name, f"modified_{base_name}")

    with open(output_filename, 'w', encoding='utf-8') as f:
        f.write(modified_content)

    print(f"Success! Processed {counter} <pb> elements.")
    print(f"Saved to: {output_filename}")

if __name__ == "__main__":
    main()
