Bikarhêner:Balyozxane/qertafandin.py

import pywikibot
import re
from pywikibot import pagegenerators
from pywikibot.bot import AutomaticTWSummaryBot, ConfigParserBot, SingleSiteBot

def process_section(page_title: str, lang_code: str, section: str) -> str:
    #print(page_title, lang_code, section)
    # Check if the section contains a space in the title
    use_heve_template = ' ' in page_title

    # Define the adjusted regex pattern for each section
    pattern = re.compile(r'====\s*Etîmolojî\s*====s*J?i?\*?#?\s*\'?\'?\[\[([^\]]+)\]\]\'?\'?\s*\+\s*\'?\'?\[\[([^\]]+)\]\]\'?\'?\.?([^\n]+)?$', re.DOTALL | re.MULTILINE)

    # Find all matches in the section
    matches = re.finditer(pattern, section)

    # Choose the template based on the presence of a space in the section title
    template = 'heve' if use_heve_template else 'qertaf'

    # Create a modified version of the section
    modified_section = section

    # Iterate through matches
    for match in matches:
        print("Match found:")
        print(match.group(0))
        print(match.group(1))
        print(match.group(2))
        word1 = match.group(1)
        word2 = match.group(2)
        end_text = match.group(3)
        print(f"end_text: {end_text}")
        # Check if end_text is None before making replacements
        if end_text is None or end_text.strip() == "":
            # Replace "kmr" with "ku" only in the qertaf template
            qertaf_lang_code = "ku" if lang_code == "kmr" else lang_code

            if '|' in word1:
                parts = word1.split('|', 1)
                word1 = '{}|cuda1={}'.format(parts[0], parts[1])

            if '|' in word2:
                parts = word2.split('|', 1)
                word2 = '{}|cuda2={}'.format(parts[0], parts[1])

            # Build the replacement string
            replacement = f'==== Etîmolojî ====\nJi {{{{{template}|{qertaf_lang_code}|{word1}|{word2}}}}}.'

            # Replace the matched text with the new content in the modified section
            modified_section = modified_section.replace(match.group(0), replacement)

    return modified_section        

def extract_lang_sections(page_text):
    lang_sections = {}
    lang_code_pattern = re.compile(r'==[ ]*?{{ziman\|([^}]+)}}[ ]*?==')
    sections = re.split(r'(==[ ]*?{{ziman\|[^}]+}}[ ]*?==)', page_text)

    for i in range(1, len(sections), 2):
        lang_code = re.search(lang_code_pattern, sections[i]).group(1)
        lang_sections[lang_code] = sections[i + 1]

    return lang_sections

class AppendTextBot(
    SingleSiteBot,
    ConfigParserBot,
    AutomaticTWSummaryBot,
):
    summary_key = 'basic-changing'
    use_redirects = False
    update_options = {
        'summary': None,
        'text': 'tiştek',
        'top': False,
    }
    def treat_page(self) -> None:
        page = self.current_page
        pywikibot.output(f"Processing page: {page.title()}")
        text = page.text
        lang_sections = extract_lang_sections(page.text)

        modified_sections = {}

        # Process all sections
        for lang_code, section in lang_sections.items():
            modified_section = process_section(page.title(), lang_code, section)

            if modified_section != section:
                modified_sections[lang_code] = modified_section

        # Apply modifications to the entire page
        for lang_code, modified_section in modified_sections.items():
            # Split the page text into lines
            lines = text.split('\n')

            # Find the starting line index of the {{ziman|lang_code}} section
            lang_code_line = f'== {{{{ziman|{lang_code}}}}} =='
            lang_code_index = -1

            for i, line in enumerate(lines):
                if line.strip() == lang_code_line:
                    lang_code_index = i + 1
                    break

            if lang_code_index != -1:
                next_lang_code_index = lang_code_index + 1
                while next_lang_code_index < len(lines) and not lines[next_lang_code_index].strip().startswith('== {{ziman|'):
                    next_lang_code_index += 1

                # Update the page text with the modified section only within the current lang_code section
                text = '\n'.join(lines[:lang_code_index + 1] + [modified_sections[lang_code].strip() + "\n"] + lines[next_lang_code_index:]).strip()

        # Save the modified page
        if modified_sections:
            summary = f"+{{{{[[Şablon:qertaf|qertaf]]}}}} (bi [[User:Balyozxane/qertafandin.py|qertafandin.py]])"
            self.put_current(text, summary=summary)
        else:
            pywikibot.output("No meaningful changes detected. Skipping save.")


def main(*args: str) -> None:
    local_args = pywikibot.handle_args(args)
    gen_factory = pagegenerators.GeneratorFactory()
    local_args = gen_factory.handle_args(local_args)

    options = {'text': ''}

    for arg in local_args:
        option, _, value = arg.partition(':')
        if option in ('summary', 'text'):
            if not value:
                pywikibot.input(f'Please enter a value for {option}')
            options[option] = value
        else:
            options[option] = True

    gen = gen_factory.getCombinedGenerator(preload=True)

    if not pywikibot.bot.suggest_help(missing_generator=not gen):
        bot = AppendTextBot(generator=gen, **options)
        bot.run()

if __name__ == '__main__':
    main()