Mdwiki-TD · MrIbrahem · Mar 28, 2024 · Mar 28, 2024 · Mar 28, 2024
diff --git a/md_core/mdpy/bots/user_account_new.py b/md_core/mdpy/bots/user_account_new.py
@@ -9,6 +9,7 @@
 # ---
 '''
 
+import sys
 import os
 import configparser
 

diff --git a/ncc_core/nc_import/README.md b/ncc_core/nc_import/README.md
@@ -0,0 +1,35 @@
+# NC Commons Import Bot
+
+This Python script is designed to facilitate the import of files from NC Commons to Wikipedia. It automates the process of fetching files from NC Commons, uploading them to Wikipedia, and updating the relevant templates.
+
+1. **Get Languages List from User Page**:
+   - The bot retrieves a list of languages from the page titled "User:Mr. Ibrahem/import bot".
+   - It parses the content of this page to extract the list of languages represented as language codes.
+
+2. **Retrieve Pages with Template:NC for Each Language**:
+   - For each language obtained from the user page, the bot proceeds to fetch pages that contain the template "Template:NC".
+   - This is done by making API calls to Wikipedia using the language code to retrieve relevant pages.
+
+3. **Extract Templates with Name "Template:NC"**:
+   - Upon accessing each page containing the "Template:NC" template, the bot identifies and extracts all instances of this template.
+   - It utilizes a parsing library to parse the wikitext of the page and identify occurrences of the specified template.
+
+4. **Process Each Template**:
+   - For each extracted template, the bot retrieves relevant information such as file name and caption.
+   - It may also fetch additional details related to the file, such as its content or metadata, from NC Commons.
+
+5. **Upload File to Wikipedia**:
+   - Once the necessary information is gathered, the bot proceeds to upload the file to Wikipedia.
+   - This involves using the Wikimedia API to perform the upload operation.
+   - If the file already exists on Wikipedia, the bot may handle duplicates according to predefined logic.
+   - If the file is fetched from NC Commons, it is uploaded to Wikipedia with appropriate attribution and metadata.
+
+6. **Update Template in Page**:
+   - After successfully uploading the file, the bot updates the relevant template on the Wikipedia page.
+   - It replaces the existing template with the newly uploaded file's information.
+   - This ensures that the Wikipedia page reflects the latest changes made by the bot.
+
+7. **Repeat for All Pages and Languages**:
+   - The bot iterates through all pages containing "Template:NC" for each language obtained.
+   - It repeats the process of extracting templates, uploading files, and updating templates for each page.
+   - This ensures comprehensive coverage of files across multiple languages on Wikipedia.
diff --git a/ncc_core/nc_import/bot.py b/ncc_core/nc_import/bot.py
@@ -4,15 +4,16 @@
 
 """
 from nc_import.bots.gt_pages import get_pages
-from nc_import.bots.wrk_pages import work_on_pages
-
-langs = [
-    # "af",
-    "test"
-]
+from nc_import.bots.wdrk_pages import work_on_pages
+from nc_import.bots.get_langs import get_langs_codes
 
 
 def start():
+    """
+    A function that starts the process by iterating over languages, getting pages for each language, and then working on those pages.
+    """
+    langs = get_langs_codes()
+    # ---
     for code in langs:
         pages = get_pages(code)
         work_on_pages(code, pages)

diff --git a/ncc_core/nc_import/bots/get_langs.py b/ncc_core/nc_import/bots/get_langs.py
@@ -0,0 +1,34 @@
+"""
+this bot get langs from nccommons page:
+https://nccommons.org/wiki/User:Mr._Ibrahem/import_bot
+
+"""
+import re
+from newapi.ncc_page import MainPage as ncc_MainPage
+from newapi import printe
+
+
+
+def get_text():
+    """
+    Retrieves text content from a specific page.
+    """
+    title = "User:Mr. Ibrahem/import bot"
+    page = ncc_MainPage(title, "www", family="nccommons")
+    text = page.get_text()
+    # match all langs like: * ar\n* fr
+    # ---
+    return text
+
+def get_langs_codes():
+    """
+    Extracts language codes from the text content of a page.
+    """
+    text = get_text()
+    langs = []
+    fi = re.findall(r"\* (.*)\n", text)
+    for i in fi:
+        langs.append(i.strip())
+    # ---
+    printe.output(f"langs: {langs}")
+    return langs
diff --git a/ncc_core/nc_import/bots/gt_pages.py b/ncc_core/nc_import/bots/gt_pages.py
@@ -1,24 +1,10 @@
-from newapi.wiki_page import MainPage, NEW_API
-
-# api_new = NEW_API('en', family='wikipedia')
-# login    = api_new.Login_to_wiki()
-# pages  = api_new.Get_template_pages(title, namespace="*", Max=10000)
-"""
-page      = MainPage(title, 'ar', family='wikipedia')
-exists    = page.exists()
-text      = page.get_text()
-timestamp = page.get_timestamp()
-user      = page.get_user()
-links     = page.page_links()
-words     = page.get_words()
-purge     = page.purge()
-templates = page.get_templates()
-save_page = page.save(newtext='', summary='', nocreate=1, minor='')
-create    = page.Create(text='', summary='')
-"""
+from newapi.wiki_page import NEW_API
 
 
 def get_pages(code):
+    """
+    Retrieves template pages related to a given language code.
+    """
     api_new = NEW_API(code, family="wikipedia")
 
     api_new.Login_to_wiki()

diff --git a/ncc_core/nc_import/bots/import_files.py b/ncc_core/nc_import/bots/import_files.py
@@ -4,16 +4,16 @@
 
 """
 import sys
-# ---
-# from newapi.wiki_page import MainPage, NEW_API
 from newapi.ncc_page import MainPage as ncc_MainPage, NEW_API as ncc_NEW_API
 from newapi import printe
-# ---
 from nc_import.bots import upload_file
 # upload = upload_file.upload_by_url(file_name, text, url, comment='', code="en", family="wikipedia")
 
 
 def get_file_text(title):
+    """
+    Retrieves the text content of a file from NC Commons.
+    """
     title = f"File:{title}" if not title.startswith("File:") else title
     printe.output(f"<<yellow>>get_file_text: {title} from nccommons:")
 
@@ -24,14 +24,17 @@ def get_file_text(title):
 
 
 def import_file(title, code):
+    """
+    Imports a file from NC Commons to Wikipedia.
+    """
     printe.output(f"<<yellow>>import_file: File:{title} to {code}wiki:")
     # ---
     file_text = get_file_text(title)
     # ---
-    api_new = ncc_NEW_API('www', family='nccommons')
+    api_new = ncc_NEW_API("www", family="nccommons")
     # api_new.Login_to_wiki()
     img_url = api_new.Get_image_url(title)
     # ---
-    upload = upload_file.upload_by_url(title, file_text, img_url, comment='Bot: import from nccommons.org', code=code, family="wikipedia")
+    upload = upload_file.upload_by_url(title, file_text, img_url, comment="Bot: import from nccommons.org", code=code, family="wikipedia")
     # ---
     return upload
diff --git a/ncc_core/nc_import/bots/upload_file.py b/ncc_core/nc_import/bots/upload_file.py
@@ -8,27 +8,25 @@
 # ---
 
 """
-#
-# (C) Ibrahem Qasim, 2023
-#
-# ---
-import requests
 import urllib.request
-import tempfile
-import os
 import sys
+
 # ---
 from newapi import printe
+
 # ---
 sys.argv.append("botuser")
 # ---
 from newapi.wiki_page import NEW_API
-# api_new  = NEW_API('www', family='nccommons')
+# api_new  = NEW_API('ar', family='wikipedia')
 # api_new.Login_to_wiki()
 # json1    = api_new.post_params(params, addtoken=False)
 
 
 def download_file(url):
+    """
+    Downloads a file from a given URL to a temporary location.
+    """
     try:
         # Download the file to a temporary location
         temp_file_path, _ = urllib.request.urlretrieve(url)
@@ -39,7 +37,25 @@ def download_file(url):
         return None
 
 
+def do_post(code, family, params, files=None):
+    """
+    Makes a POST request to the Wikipedia API with specified parameters.
+    """
+    api_new = NEW_API(code, family=family)
+    api_new.Login_to_wiki()
+    # ---
+    if files:
+        result = api_new.post_params(params, addtoken=True, files=files)
+    else:
+        result = api_new.post_params(params, addtoken=True)
+    # ---
+    return result
+
+
 def upload_by_file(file_name, text, url, comment="", code="en", family="wikipedia"):
+    """
+    Uploads a file to Wikipedia using a local file.
+    """
     # ---
     if file_name.startswith("File:"):
         file_name = file_name.replace("File:", "")
@@ -49,17 +65,14 @@ def upload_by_file(file_name, text, url, comment="", code="en", family="wikipedi
     # ---
     params = {"action": "upload", "format": "json", "filename": file_name, "comment": comment, "text": text, "utf8": 1}
     # ---
-    api_new = NEW_API(code, family=family)
-    # api_new.Login_to_wiki()
-    # ---
-    result = api_new.post_params(params, addtoken=True, files={"file": open(file_path, "rb")})
+    result = do_post(code, family, params, files={"file": open(file_path, "rb")})
     # ---
     upload_result = result.get("upload", {})
     # ---
     success = upload_result.get("result") == "Success"
     error = result.get("error", {})
     error_code = result.get("error", {}).get("code", "")
-    error_info = result.get("error", {}).get("info", '')
+    error_info = result.get("error", {}).get("info", "")
     # ---
     # {'upload': {'result': 'Warning', 'warnings': {'duplicate': ['Buckle_fracture_of_distal_radius_(Radiopaedia_46707).jpg']}, 'filekey': '1amgwircbots.rdrfjg.13.', 'sessionkey': '1amgwircbots.rdrfjg.13.'}}
     # ---
@@ -81,23 +94,23 @@ def upload_by_file(file_name, text, url, comment="", code="en", family="wikipedi
 
 
 def upload_by_url(file_name, text, url, comment="", code="en", family="wikipedia"):
+    """
+    Uploads a file to Wikipedia using a URL.
+    """
     # ---
     if file_name.startswith("File:"):
         file_name = file_name.replace("File:", "")
     # ---
     params = {"action": "upload", "format": "json", "filename": file_name, "url": url, "comment": comment, "text": text, "utf8": 1}
     # ---
-    api_new = NEW_API(code, family=family)
-    api_new.Login_to_wiki()
-    # ---
-    result = api_new.post_params(params, addtoken=True)
+    result = do_post(code, family, params)
     # ---
     upload_result = result.get("upload", {})
     # ---
     success = upload_result.get("result") == "Success"
     error = result.get("error", {})
     error_code = result.get("error", {}).get("code", "")
-    error_info = result.get("error", {}).get("info", '')
+    error_info = result.get("error", {}).get("info", "")
     # ---
     # {'upload': {'result': 'Warning', 'warnings': {'duplicate': ['Buckle_fracture_of_distal_radius_(Radiopaedia_46707).jpg']}, 'filekey': '1amgwircbots.rdrfjg.13.', 'sessionkey': '1amgwircbots.rdrfjg.13.'}}
     # ---
@@ -113,10 +126,7 @@ def upload_by_url(file_name, text, url, comment="", code="en", family="wikipedia
     if error:
         printe.output(f"<<lightred>> error when upload_by_url, error_code:{error_code}")
         printe.output(error_info)
-    errors = [
-        "copyuploadbaddomain",
-        "copyuploaddisabled"
-    ]
+    errors = ["copyuploadbaddomain", "copyuploaddisabled"]
     if error_code in errors or " url " in error_info.lower():
         return upload_by_file(file_name, text, url, comment=comment, code=code, family=family)
     # ----
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,6 +9,7 @@ @@
     # ---
     '''
+    import sys
     import os
     import configparser
@@ Expand Down @@