#read argv xml file

import sys
import xml.etree.ElementTree as ET
import mwxml

def save_to_txt(title, author, date, text):
    with open(title + " " + author + " " + date + ".txt", "w") as f:
        f.write(text)

def main():
    if len(sys.argv) != 2:
        print("Usage: python wikidump2txt.py <xml file>")
        sys.exit(1)

    xml_file = sys.argv[1]
    
    # Open and parse the dump using mwxml
    dump = mwxml.Dump.from_file(open(xml_file, 'rb'))
    
    # Iterate through pages and their revisions
    for page in dump.pages:
        for revision in page:
            title = page.title
            text = revision.text or ""
            print("Title:", title)
            # Optionally print only the first 200 characters of the text
            print("Text:", text[:200] + ("..." if len(text) > 200 else ""))
            #print("=" * 80)
    
    total_pages = len(dump.pages)
    print("Total pages: ", total_pages)

if __name__ == "__main__":
    main()