import re
import sys
import os
import glob

def srt_vtt_to_text(file_path):
    txt_file = os.path.splitext(file_path)[0] + '.txt'

    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()

        # Remove sequence numbers and timestamps
        content = re.sub(r'\d+\n', '', content)  # Remove sequence numbers
        content = re.sub(r'\d+:\d+:\d+,\d+ --> \d+:\d+:\d+,\d+\n', '', content)  # Remove SRT timestamps
        content = re.sub(r'\d+:\d+:\d+\.\d+ --> \d+:\d+:\d+\.\d+\n', '', content)  # Remove VTT timestamps

        # Remove any extra empty lines
        content = re.sub(r'\n\s*\n', '\n', content).strip()

        with open(txt_file, 'w', encoding='utf-8') as file:
            file.write(content)

        print(f"Successfully converted {file_path} to {txt_file}")
    except Exception as e:
        print(f"An error occurred with {file_path}: {e}")

def main(directory):
    if not os.path.isdir(directory):
        print(f"Directory {directory} does not exist.")
        return

    srt_files = glob.glob(os.path.join(directory, '**', '*.srt'), recursive=True)
    vtt_files = glob.glob(os.path.join(directory, '**', '*.vtt'), recursive=True)

    for file_path in srt_files + vtt_files:
        srt_vtt_to_text(file_path)

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python srt_to_text.py <directory>")
    else:
        directory = sys.argv[1]
        main(directory)

