Bikarhêner:Balyozxane/katceke.py

import pywikibot import re from pywikibot import pagegenerators from pywikibot.bot import AutomaticTWSummaryBot, ConfigParserBot, SingleSiteBot from queue import Queue class AppendTextBot( SingleSiteBot, ConfigParserBot, AutomaticTWSummaryBot, ): summary_key = 'basic-changing' use_redirects = False update_options = { 'summary': None, 'text': '{{katbike}}', 'top': False, } def fetch_interwiki_link_content(self, page_title): site = pywikibot.Site("ku", "wiktionary") # Local wiki (ku.wiktionary.org) # Remove "Kategorî:" from the page title cleaned_title = page_title.replace("Kategorî:", "").strip() pywikibot.output(f"cleaned_title: {cleaned_title}") try: # Fetch the result of Module:înterwîkî-çêke from the local wiki params = { "action": "expandtemplates", "format": "json", "text": f"{{{{#invoke:înterwîkî-çêke|biwesine|{cleaned_title}}}}}", "prop": "wikitext" } request = pywikibot.data.api.Request(site=site, **params) response = request.submit() data = response['expandtemplates'] interwiki_link_content = data['wikitext'] category_result = "Category:" + interwiki_link_content # Add "Category:" to the result pywikibot.output(f"Rûpela en.wiktê: {category_result}") # Check if the page exists on en.wiktionary.org en_site = pywikibot.Site("en", "wiktionary") page = pywikibot.Page(en_site, category_result) page_exists = page.exists() pywikibot.output(f"page_exists: {page_exists}") ku_wiktionary_page_title = "Kategorî:" + cleaned_title return { "en_wiktionary_page_title": category_result, "ku_wiktionary_page_title": ku_wiktionary_page_title, "enPageExists": page_exists } except Exception as e: # Handle the exception and return None for the problematic page print(f"Error fetching interwiki link content for page {page_title}: {e}") return None def is_connected_to_wikidata(self, page_title): # Check if the ku.wiktionary page is connected to Wikidata site = pywikibot.Site("ku", "wiktionary") page = pywikibot.Page(site, page_title) try: item = pywikibot.ItemPage.fromPage(page) if item and item.title(with_ns=False) != '-1': return True except pywikibot.exceptions.NoPageError: pass # Handle the case where the page doesn't exist on Wikidata return False # Return False if the page is not connected to Wikidata def get_wid(self, lang, rupel): site = pywikibot.Site(lang, "wiktionary") page = pywikibot.Page(site, rupel) try: item = pywikibot.ItemPage.fromPage(page) if item and item.title(with_ns=False) != '-1': return item.title(with_ns=False) except pywikibot.exceptions.NoPageError: pass # Handle the case where the page doesn't exist on Wikidata return None # Return None for pages with missing or invalid Wikidata IDs def update_interwiki_links(self, page_titles): interwiki_data = [] # Open the output files for appending with open("newquick.txt", "a", encoding="utf-8") as output_file, open("createquick.txt", "a", encoding="utf-8") as createquick_file: for page_title in page_titles: print(f"Fetching interwiki link content for page: {page_title}") # Check if the ku.wiktionary page is connected to Wikidata new_page_title = "Kategorî:"+ page_title.strip() if self.is_connected_to_wikidata(new_page_title): print(f"Page '{page_title}' is connected to Wikidata. Skipping.") continue data = self.fetch_interwiki_link_content(page_title) if data is not None and data.get("enPageExists"): # Check if interwiki link content is to be skipped if "Category:[[Kategorî:Xeletiyên Modul:înterwîkî-çêke]]" in data["en_wiktionary_page_title"]: print("Skipping page due to interwiki link content.") continue # Skip processing this page # Implement retry mechanism for Wikidata ID retrieval wikidata_id = None retries = 5 # Number of retries while retries > 0: try: wikidata_id = self.get_wid("en", data["en_wiktionary_page_title"]) print(f"Linked to Wikidata with ID: {wikidata_id}") data["wikidata_id"] = wikidata_id break # Break the loop on successful retrieval except pywikibot.exceptions.MaxlagTimeoutError: print("Maxlag encountered. Retrying in 5 seconds...") time.sleep(5) # Wait for 5 seconds before retrying retries -= 1 if wikidata_id is None: print("Not linked to Wikidata.") else: print("Rûpela en.wikt tine ye.") if data is not None: # Check if data is not None before appending interwiki_data.append(data) # Write to the output files here if data.get("wikidata_id"): output_line = f'{data["wikidata_id"]}\tSkuwiktionary\t"{data["ku_wiktionary_page_title"]}"\n' output_file.write(output_line) elif data.get("enPageExists") and not data.get("wikidata_id"): # Save entries with enPageExists but no wikidata_id for Wikidata item creation createquick_line = f"CREATE\n" createquick_line += f'LAST\tSkuwiktionary\t"{data["ku_wiktionary_page_title"]}"\n' createquick_line += f'LAST\tLku\t"{data["ku_wiktionary_page_title"]}"\n' createquick_line += f'LAST\tDku\t"Wîkîmediya:Kategorî"\n' createquick_line += f'LAST\tSenwiktionary\t"{data["en_wiktionary_page_title"]}"\n' createquick_line += f'LAST\tLen\t"{data["en_wiktionary_page_title"]}"\n' createquick_line += f'LAST\tDen\t"Wikimedia category"\n' createquick_line += f'LAST\tP31\tQ4167836\n' createquick_file.write(createquick_line) print() return interwiki_data def fetch_parent_categories_recursive(self, page_title, all_parents=None, processed_categories=None): if all_parents is None: all_parents = set() # Initialize a set to store all parent categories if processed_categories is None: processed_categories = set() # Initialize a set to track processed categories site = pywikibot.Site("ku", "wiktionary") # Remove "Kategorî:" from the page title cleaned_title = page_title.replace("Kategorî:", "").strip() pywikibot.output(f"Fetching parent categories for page: {cleaned_title}") params = { "action": "expandtemplates", "format": "json", "text": f"{{{{#invoke:getParents|biwesine|{cleaned_title}}}}}", "prop": "wikitext" } request = pywikibot.data.api.Request(site=site, **params) response = request.submit() # Check if there is an error in the Lua module's response if 'error' in response: error_message = response['error']['info'] pywikibot.output(f"Error from Lua module: {error_message}") return all_parents # Exit the function if there's an error data = response['expandtemplates'] lua_result = data['wikitext'] if lua_result == "Xeletî çêbû": pywikibot.output(f"Skipping invalid parent category: '{lua_result}'") # Check if lua_result contains commas before splitting if ',' in lua_result: # Split the comma-separated list of parent categories parent_categories = lua_result.strip().split(',') else: # If no commas are found, treat lua_result as a single category parent_categories = [lua_result.strip()] for parent_category in parent_categories: parent_category = parent_category.strip() if parent_category not in processed_categories: processed_categories.add(parent_category) all_parents.add(parent_category) # Recursively fetch parent categories for this category self.fetch_parent_categories_recursive(parent_category, all_parents, processed_categories) return all_parents def create_parent_categories_from_list(self, all_parents): processed_parents = set() # Initialize an empty set to store processed parents # Add a console log to show the items in all_parents pywikibot.output(f"Items in all_parents: {', '.join(all_parents)}") for parent_name in all_parents: # Check if the parent has already been processed to avoid infinite recursion if parent_name in processed_parents: pywikibot.output(f"Category '{parent_name}' already processed. Skipping.") continue newcategory_name = parent_name.replace("Kategorî:", "").strip() category_name = "Kategorî:" + newcategory_name site = self.site page = pywikibot.Page(site, category_name) if page.exists(): pywikibot.output(f"Category '{category_name}' already exists. Skipping.") continue # Add {{katbike}} to the category page's content category_content = "{{katbike}}" summary = f"Rûpel bi {{{{[[Şablon:katbike|katbike]]}}}} hat çêkirin" pywikibot.output(f"'{category_name}' tê çêkirin.") page.text = category_content # Save the category page page.save(summary=summary) # After processing all parent categories, update interwiki links pywikibot.output(f"Processed parent categories for '{category_name}'.") updated_interwiki_data = self.update_interwiki_links([category_name]) def treat_page(self) -> None: page = self.current_page pywikibot.output(f"Processing page: {page.title()}") if page.exists(): pywikibot.output(f"Page '{page.title()}' already exists. Skipping.") return try: summary = f"Rûpel bi {{{{[[Şablon:katbike|katbike]]}}}} hat çêkirin" text = "{{katbike}}" pywikibot.output(f"Final text to put: {text}") self.put_current(text, summary=summary) # After processing the current page, update interwiki links updated_interwiki_data = self.update_interwiki_links([page.title()]) # Fetch and create parent categories recursively parent_categories = self.fetch_parent_categories_recursive(page.title().replace("Kategorî:", "")) processed_parents = set() # Initialize an empty set to store processed parents for parent_category in parent_categories: if "Parents not found for" in parent_category: pywikibot.output(f"Skipping creation of parent category '{parent_category}' due to message.") break if "Xeletî çêbû" in parent_category: pywikibot.output(f"Skipping creation of parent category '{parent_category}' due to message.") break newparent_category = parent_category.replace("Kategorî:", "").strip() parent_category = "Kategorî:" + newparent_category try: # Call create_parent_categories_from_list with the parent_category argument self.create_parent_categories_from_list(parent_category) except Exception as e: pywikibot.error(f"Error creating parent category '{parent_category}': {e}") pywikibot.output(f"Skipping creation of parent category '{parent_category}'.") continue except Exception as e: pywikibot.error(f"Error processing page '{page.title()}': {e}") pywikibot.output(f"Skipping page '{page.title()}'.") def main(*args: str) -> None: local_args = pywikibot.handle_args(args) gen_factory = pagegenerators.GeneratorFactory() local_args = gen_factory.handle_args(local_args) options = {'text': '{{katbike}}'} for arg in local_args: option, _, value = arg.partition(':') if option in ('summary', 'text'): if not value: pywikibot.input(f'Please enter a value for {option}') options[option] = value else: options[option] = True gen = gen_factory.getCombinedGenerator(preload=True) if not pywikibot.bot.suggest_help(missing_generator=not gen): bot = AppendTextBot(generator=gen, **options) bot.run() if __name__ == '__main__': main()