import sys
from lxml import etree

# Get the input XML file path from the command line argument
input_file = sys.argv[1]

# Set the output file path based on the input file name
output_file = f"{input_file}.txt"

# Open the input XML file and output text file
with open(input_file, 'rb') as f, open(output_file, 'w', encoding='utf-8') as out:
    # Parse the XML
    context = etree.iterparse(f, events=('end',), tag='{http://www.mediawiki.org/xml/export-0.10/}page')
    
    for event, elem in context:
        title = elem.findtext('{http://www.mediawiki.org/xml/export-0.10/}title')
        text = elem.findtext('.//{http://www.mediawiki.org/xml/export-0.10/}text')
        
        if text:
            out.write(f'== {title} ==\n{text}\n\n')
        
        # Clear the element to free up memory
        elem.clear()
        while elem.getprevious() is not None:
            del elem.getparent()[0]
