up

Mdwiki-TD · Mar 10, 2024 · 6b61c26 · 6b61c26
1 parent b2a18f1
commit 6b61c26
Show file tree

Hide file tree

Showing 6 changed files with 187 additions and 32 deletions.
diff --git a/ncc_core/nc_import/bot.py b/ncc_core/nc_import/bot.py
@@ -11,10 +11,12 @@
     "test"
 ]
 
+
 def start():
     for code in langs:
         pages = get_pages(code)
         work_on_pages(code, pages)
 
-if __name__ == '__main__':
-    start()
+
+if __name__ == "__main__":
+    start()
diff --git a/ncc_core/nc_import/bots/gt_pages.py b/ncc_core/nc_import/bots/gt_pages.py
@@ -1,9 +1,9 @@
-
 from newapi.wiki_page import MainPage, NEW_API
+
 # api_new = NEW_API('en', family='wikipedia')
 # login    = api_new.Login_to_wiki()
 # pages  = api_new.Get_template_pages(title, namespace="*", Max=10000)
-'''
+"""
 page      = MainPage(title, 'ar', family='wikipedia')
 exists    = page.exists()
 text      = page.get_text()
@@ -15,14 +15,14 @@
 templates = page.get_templates()
 save_page = page.save(newtext='', summary='', nocreate=1, minor='')
 create    = page.Create(text='', summary='')
-'''
+"""
+
 
 def get_pages(code):
-    api_new = NEW_API(code, family='wikipedia')
+    api_new = NEW_API(code, family="wikipedia")
 
     api_new.Login_to_wiki()
-    
+
     pages = api_new.Get_template_pages("Template:NC", namespace="*", Max=10000)
 
     return pages
-
diff --git a/ncc_core/nc_import/bots/import_files.py b/ncc_core/nc_import/bots/import_files.py
@@ -1,14 +1,36 @@
+"""
 
+bot for importing files from nccommons to wikipedia
 
-from newapi.wiki_page import MainPage
+"""
+import sys
+# ---
+# from newapi.wiki_page import MainPage, NEW_API
+from newapi.ncc_page import MainPage as ncc_MainPage, NEW_API as ncc_NEW_API
 from newapi import printe
-'''
-page      = MainPage(title, 'ar', family='wikipedia')
-exists    = page.exists()
-text      = page.get_text()
-save_page = page.save(newtext='', summary='', nocreate=1, minor='')
-'''
+# ---
+from nc_import.bots import upload_file
+# upload = upload_file.upload_by_url(file_name, text, url, comment='', code="en", family="wikipedia")
+
+def get_file_text(title):
+    title = f"File:{title}" if not title.startswith("File:") else title
+    printe.output(f"<<yellow>>get_file_text: {title} from nccommons:")
+
+    page = ncc_MainPage(title, "www", family="nccommons")
+    text = page.get_text()
+
+    return text
+
 
 def import_file(title, code):
     printe.output(f"<<yellow>>import_file: File:{title} to {code}wiki:")
-
+    # ---
+    file_text = get_file_text(title)
+    # ---
+    api_new  = ncc_NEW_API('www', family='nccommons')
+    # api_new.Login_to_wiki()
+    img_url = api_new.Get_image_url(title)
+    # ---
+    upload = upload_file.upload_by_url(title, file_text, img_url, comment='Bot: import from nccommons.org', code=code, family="wikipedia")
+    # ---
+    return upload
diff --git a/ncc_core/nc_import/bots/template_nc.py b/ncc_core/nc_import/bots/template_nc.py
@@ -0,0 +1,3 @@
+"""
+[[File:NCCommonsLogo.svg|thumb|120px|[https://nccommons.org{{localurl:File:{{{1|NCCommonsLogo.svg}}}}} Importing] from NC Commons pending]]
+"""
diff --git a/ncc_core/nc_import/bots/upload_file.py b/ncc_core/nc_import/bots/upload_file.py
@@ -0,0 +1,116 @@
+#!/usr/bin/python3
+"""
+
+Usage:
+# ---
+from nc_import.bots import upload_file
+# upload = upload_file.upload_by_url(file_name, text, url, comment='', code="en", family="wikipedia")
+# ---
+
+"""
+#
+# (C) Ibrahem Qasim, 2023
+#
+# ---
+import requests
+import urllib.request
+import tempfile
+import os
+# ---
+
+from newapi import printe
+from newapi.ncc_page import NEW_API
+# api_new  = NEW_API('www', family='nccommons')
+# api_new.Login_to_wiki()
+# json1    = api_new.post_params(params, addtoken=False)
+
+
+def download_file(url):
+    try:
+        # Download the file to a temporary location
+        temp_file_path, _ = urllib.request.urlretrieve(url)
+        print(f"File downloaded to: {temp_file_path}")
+        return temp_file_path
+    except Exception as e:
+        print(f"An error occurred while downloading the file: {e}")
+        return None
+
+
+def upload_by_file(file_name, text, url, comment="", code="en", family="wikipedia"):
+    # ---
+    if file_name.startswith("File:"):
+        file_name = file_name.replace("File:", "")
+    # ---
+    # get the file from url
+    file_path = download_file(url)
+    # ---
+    params = {"action": "upload", "format": "json", "filename": file_name, "comment": comment, "text": text, "utf8": 1}
+    # ---
+    api_new = NEW_API(code, family=family)
+    # api_new.Login_to_wiki()
+    # ---
+    result = api_new.post_params(params, addtoken=True, files={"file": open(file_path, "rb")})
+    # ---
+    upload_result = result.get("upload", {})
+    # ---
+    success = upload_result.get("result") == "Success"
+    error = result.get("error", {})
+    error_code = result.get("error", {}).get("code", "")
+    error_info = result.get("error", {}).get("info", '')
+    # ---
+    # {'upload': {'result': 'Warning', 'warnings': {'duplicate': ['Buckle_fracture_of_distal_radius_(Radiopaedia_46707).jpg']}, 'filekey': '1amgwircbots.rdrfjg.13.', 'sessionkey': '1amgwircbots.rdrfjg.13.'}}
+    # ---
+    duplicate = upload_result.get("warnings", {}).get("duplicate", [""])[0].replace("_", " ")
+    # ---
+    if success:
+        printe.output(f"<<lightgreen>> ** upload true .. [[File:{file_name}]] ")
+        return True
+
+    if duplicate:
+        printe.output(f"<<lightred>> ** duplicate file:  {duplicate}.")
+
+    if error:
+        printe.output(f"<<lightred>> error when upload_by_url, error_code:{error_code}")
+        printe.output(error)
+
+    # ----
+    return False
+
+def upload_by_url(file_name, text, url, comment="", code="en", family="wikipedia"):
+    # ---
+    if file_name.startswith("File:"):
+        file_name = file_name.replace("File:", "")
+    # ---
+    params = {"action": "upload", "format": "json", "filename": file_name, "url": url, "comment": comment, "text": text, "utf8": 1}
+    # ---
+    api_new = NEW_API(code, family=family)
+    api_new.Login_to_wiki()
+    # ---
+    result = api_new.post_params(params, addtoken=True)
+    # ---
+    upload_result = result.get("upload", {})
+    # ---
+    success = upload_result.get("result") == "Success"
+    error = result.get("error", {})
+    error_code = result.get("error", {}).get("code", "")
+    error_info = result.get("error", {}).get("info", '')
+    # ---
+    # {'upload': {'result': 'Warning', 'warnings': {'duplicate': ['Buckle_fracture_of_distal_radius_(Radiopaedia_46707).jpg']}, 'filekey': '1amgwircbots.rdrfjg.13.', 'sessionkey': '1amgwircbots.rdrfjg.13.'}}
+    # ---
+    duplicate = upload_result.get("warnings", {}).get("duplicate", [""])[0].replace("_", " ")
+    # ---
+    if success:
+        printe.output(f"<<lightgreen>> ** true .. [[File:{file_name}]] ")
+        return True
+
+    if duplicate:
+        printe.output(f"<<lightred>> ** duplicate file:  {duplicate}.")
+
+    if error:
+        printe.output(f"<<lightred>> error when upload_by_url, error_code:{error_code}")
+        # printe.output(error)
+
+    if error_info == "Uploads by URL are not allowed from this domain.":
+        return upload_by_file(file_name, text, url, comment=comment, code=code, family=family)
+    # ----
+    return False
diff --git a/ncc_core/nc_import/bots/wrk_pages.py b/ncc_core/nc_import/bots/wrk_pages.py
@@ -1,23 +1,25 @@
-
 import wikitextparser as wtp
+from newapi import printe
 from newapi.wiki_page import MainPage
 from nc_import.bots.import_files import import_file
-'''
+
+"""
 page      = MainPage(title, 'ar', family='wikipedia')
 exists    = page.exists()
 text      = page.get_text()
 save_page = page.save(newtext='', summary='', nocreate=1, minor='')
-'''
+"""
+
 
 class PageWork:
     def __init__(self, code, title):
         self.code = code
         self.title = title
         self.temps = []
-        self.page = MainPage(self.title, self.code, family='wikipedia')
+        self.page = MainPage(self.title, self.code, family="wikipedia")
         self.text = self.page.get_text()
         self.new_text = self.text
-    
+
     def start(self):
         # ---
         if not self.page.exists():
@@ -28,26 +30,35 @@ def start(self):
         self.work_on_temps()
         self.save()
 
-
     def get_temps(self):
         # ---
         parsed = wtp.parse(self.text)
         # ---
         for temp in parsed.templates:
             # ---
-            name = str(temp.normal_name()).strip().lower().replace('_', ' ')
-            if name == 'NC':
+            name = str(temp.normal_name()).strip().lower().replace("_", " ")
+            # ---
+            if name == "nc":
                 self.temps.append(temp)
+        # ---
+        printe.output(f"{len(self.temps)} temps")
 
     def work_one_temp(self, temp):
-        args = temp.arguments
-        # ---
-        print(args)
+        # args = temp.arguments
         # ---
         text = temp.string
         # ---
-        file_name = args[0].strip()
-        caption   = args[1].strip()
+        file_name = ""
+        caption = ""
+        # ---
+        if temp.get_arg("1"):
+            file_name = temp.get_arg("1").value
+        # ---
+        if temp.get_arg("2"):
+            caption = temp.get_arg("2").value
+        # ---
+        printe.output(f"<<purple>> File:<<default>> {file_name}")
+        printe.output(f"<<purple>> caption:<<default>> {caption}")
         # ---
         done = import_file(file_name, self.code)
         # ---
@@ -67,13 +78,14 @@ def work_on_temps(self):
             # ---
             if temp_new_text != string:
                 self.new_text = self.new_text.replace(string, temp_new_text)
-    
+
     def save(self):
         if self.new_text != self.text:
             self.page.save(newtext=self.new_text, summary="bot: fix NC")
 
+
 def work_on_pages(code, pages):
     for numb, page_title in enumerate(pages, 1):
         print(f"{numb=}: {page_title=}:")
-        bot = PageWork(code,  page_title)
-        bot.start()
+        bot = PageWork(code, page_title)
+        bot.start()