Bikarhêner:Balyozxane/ziman dub.py

import re  
import pywikibot 

# Read the list of pages from pages.txt
with open('pages.txt', 'r', encoding='utf-8') as file:
    page_list = [line.strip() for line in file]

def get_lang_codes(page):
    site = pywikibot.Site("ku", "wiktionary")
    page_ku = pywikibot.Page(site, page)
    page_text = page_ku.text
    lang_codes = re.findall(r'{{ziman\|([^\}]+)}}', page_text)
    return lang_codes
    
def log_page(page_title, dublicate):
    if dublicate:
        with open('pages_dub.txt', 'a', encoding='utf-8') as file:
            file.write(page_title + '\n')
    else:
        with open('pages_skipped.txt', 'a', encoding='utf-8') as file:
            file.write(page_title + '\n')
            
def check_and_write_duplicates(page_list):
    duplicates = set()
    for page in page_list:
        print(f"\n<<{page}>>\n")
        lang_codes = get_lang_codes(page)
        print(f"lang_codes: {lang_codes}")
        if lang_codes:
            seen = set()
            for lang_code in lang_codes:
                if lang_code in seen:
                    duplicates.add(page)
                    log_page(page, True)
                    print(f"{lang_code} logged")
                    break
                else:
                    seen.add(lang_code)
                    log_page(page, False)
                    print(f"{lang_code} skipped")
               
    # Add notification for how many pages were found
    print(f"All pages processed. Found {len(duplicates)} pages.")
    
check_and_write_duplicates(page_list)