Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

. #117

Merged
merged 2 commits into from
Mar 28, 2024
Merged

. #117

Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions md_core/mdpy/bots/user_account_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# ---
'''

import sys
import os
import configparser

Expand Down
35 changes: 35 additions & 0 deletions ncc_core/nc_import/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# NC Commons Import Bot

This Python script is designed to facilitate the import of files from NC Commons to Wikipedia. It automates the process of fetching files from NC Commons, uploading them to Wikipedia, and updating the relevant templates.

1. **Get Languages List from User Page**:
- The bot retrieves a list of languages from the page titled "User:Mr. Ibrahem/import bot".
- It parses the content of this page to extract the list of languages represented as language codes.

2. **Retrieve Pages with Template:NC for Each Language**:
- For each language obtained from the user page, the bot proceeds to fetch pages that contain the template "Template:NC".
- This is done by making API calls to Wikipedia using the language code to retrieve relevant pages.

3. **Extract Templates with Name "Template:NC"**:
- Upon accessing each page containing the "Template:NC" template, the bot identifies and extracts all instances of this template.
- It utilizes a parsing library to parse the wikitext of the page and identify occurrences of the specified template.

4. **Process Each Template**:
- For each extracted template, the bot retrieves relevant information such as file name and caption.
- It may also fetch additional details related to the file, such as its content or metadata, from NC Commons.

5. **Upload File to Wikipedia**:
- Once the necessary information is gathered, the bot proceeds to upload the file to Wikipedia.
- This involves using the Wikimedia API to perform the upload operation.
- If the file already exists on Wikipedia, the bot may handle duplicates according to predefined logic.
- If the file is fetched from NC Commons, it is uploaded to Wikipedia with appropriate attribution and metadata.

6. **Update Template in Page**:
- After successfully uploading the file, the bot updates the relevant template on the Wikipedia page.
- It replaces the existing template with the newly uploaded file's information.
- This ensures that the Wikipedia page reflects the latest changes made by the bot.

7. **Repeat for All Pages and Languages**:
- The bot iterates through all pages containing "Template:NC" for each language obtained.
- It repeats the process of extracting templates, uploading files, and updating templates for each page.
- This ensures comprehensive coverage of files across multiple languages on Wikipedia.
13 changes: 7 additions & 6 deletions ncc_core/nc_import/bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@

"""
from nc_import.bots.gt_pages import get_pages
from nc_import.bots.wrk_pages import work_on_pages

langs = [
# "af",
"test"
]
from nc_import.bots.wdrk_pages import work_on_pages
from nc_import.bots.get_langs import get_langs_codes


def start():
"""
A function that starts the process by iterating over languages, getting pages for each language, and then working on those pages.
"""
langs = get_langs_codes()
# ---
for code in langs:
pages = get_pages(code)
work_on_pages(code, pages)
Expand Down
34 changes: 34 additions & 0 deletions ncc_core/nc_import/bots/get_langs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""
this bot get langs from nccommons page:
https://nccommons.org/wiki/User:Mr._Ibrahem/import_bot

"""
import re
from newapi.ncc_page import MainPage as ncc_MainPage
from newapi import printe



def get_text():
"""
Retrieves text content from a specific page.
"""
title = "User:Mr. Ibrahem/import bot"
page = ncc_MainPage(title, "www", family="nccommons")
text = page.get_text()
# match all langs like: * ar\n* fr
# ---
return text

def get_langs_codes():
"""
Extracts language codes from the text content of a page.
"""
text = get_text()
langs = []
fi = re.findall(r"\* (.*)\n", text)
for i in fi:
langs.append(i.strip())
# ---
printe.output(f"langs: {langs}")
return langs
22 changes: 4 additions & 18 deletions ncc_core/nc_import/bots/gt_pages.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,10 @@
from newapi.wiki_page import MainPage, NEW_API

# api_new = NEW_API('en', family='wikipedia')
# login = api_new.Login_to_wiki()
# pages = api_new.Get_template_pages(title, namespace="*", Max=10000)
"""
page = MainPage(title, 'ar', family='wikipedia')
exists = page.exists()
text = page.get_text()
timestamp = page.get_timestamp()
user = page.get_user()
links = page.page_links()
words = page.get_words()
purge = page.purge()
templates = page.get_templates()
save_page = page.save(newtext='', summary='', nocreate=1, minor='')
create = page.Create(text='', summary='')
"""
from newapi.wiki_page import NEW_API


def get_pages(code):
"""
Retrieves template pages related to a given language code.
"""
api_new = NEW_API(code, family="wikipedia")

api_new.Login_to_wiki()
Expand Down
13 changes: 8 additions & 5 deletions ncc_core/nc_import/bots/import_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@

"""
import sys
# ---
# from newapi.wiki_page import MainPage, NEW_API
from newapi.ncc_page import MainPage as ncc_MainPage, NEW_API as ncc_NEW_API
from newapi import printe
# ---
from nc_import.bots import upload_file
# upload = upload_file.upload_by_url(file_name, text, url, comment='', code="en", family="wikipedia")


def get_file_text(title):
"""
Retrieves the text content of a file from NC Commons.
"""
title = f"File:{title}" if not title.startswith("File:") else title
printe.output(f"<<yellow>>get_file_text: {title} from nccommons:")

Expand All @@ -24,14 +24,17 @@ def get_file_text(title):


def import_file(title, code):
"""
Imports a file from NC Commons to Wikipedia.
"""
printe.output(f"<<yellow>>import_file: File:{title} to {code}wiki:")
# ---
file_text = get_file_text(title)
# ---
api_new = ncc_NEW_API('www', family='nccommons')
api_new = ncc_NEW_API("www", family="nccommons")
# api_new.Login_to_wiki()
img_url = api_new.Get_image_url(title)
# ---
upload = upload_file.upload_by_url(title, file_text, img_url, comment='Bot: import from nccommons.org', code=code, family="wikipedia")
upload = upload_file.upload_by_url(title, file_text, img_url, comment="Bot: import from nccommons.org", code=code, family="wikipedia")
# ---
return upload
54 changes: 32 additions & 22 deletions ncc_core/nc_import/bots/upload_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,25 @@
# ---

"""
#
# (C) Ibrahem Qasim, 2023
#
# ---
import requests
import urllib.request
import tempfile
import os
import sys

# ---
from newapi import printe

# ---
sys.argv.append("botuser")
# ---
from newapi.wiki_page import NEW_API
# api_new = NEW_API('www', family='nccommons')
# api_new = NEW_API('ar', family='wikipedia')
# api_new.Login_to_wiki()
# json1 = api_new.post_params(params, addtoken=False)


def download_file(url):
"""
Downloads a file from a given URL to a temporary location.
"""
try:
# Download the file to a temporary location
temp_file_path, _ = urllib.request.urlretrieve(url)
Expand All @@ -39,7 +37,25 @@ def download_file(url):
return None


def do_post(code, family, params, files=None):
"""
Makes a POST request to the Wikipedia API with specified parameters.
"""
api_new = NEW_API(code, family=family)
api_new.Login_to_wiki()
# ---
if files:
result = api_new.post_params(params, addtoken=True, files=files)
else:
result = api_new.post_params(params, addtoken=True)
# ---
return result


def upload_by_file(file_name, text, url, comment="", code="en", family="wikipedia"):
"""
Uploads a file to Wikipedia using a local file.
"""
# ---
if file_name.startswith("File:"):
file_name = file_name.replace("File:", "")
Expand All @@ -49,17 +65,14 @@ def upload_by_file(file_name, text, url, comment="", code="en", family="wikipedi
# ---
params = {"action": "upload", "format": "json", "filename": file_name, "comment": comment, "text": text, "utf8": 1}
# ---
api_new = NEW_API(code, family=family)
# api_new.Login_to_wiki()
# ---
result = api_new.post_params(params, addtoken=True, files={"file": open(file_path, "rb")})
result = do_post(code, family, params, files={"file": open(file_path, "rb")})
# ---
upload_result = result.get("upload", {})
# ---
success = upload_result.get("result") == "Success"
error = result.get("error", {})
error_code = result.get("error", {}).get("code", "")
error_info = result.get("error", {}).get("info", '')
error_info = result.get("error", {}).get("info", "")
# ---
# {'upload': {'result': 'Warning', 'warnings': {'duplicate': ['Buckle_fracture_of_distal_radius_(Radiopaedia_46707).jpg']}, 'filekey': '1amgwircbots.rdrfjg.13.', 'sessionkey': '1amgwircbots.rdrfjg.13.'}}
# ---
Expand All @@ -81,23 +94,23 @@ def upload_by_file(file_name, text, url, comment="", code="en", family="wikipedi


def upload_by_url(file_name, text, url, comment="", code="en", family="wikipedia"):
"""
Uploads a file to Wikipedia using a URL.
"""
# ---
if file_name.startswith("File:"):
file_name = file_name.replace("File:", "")
# ---
params = {"action": "upload", "format": "json", "filename": file_name, "url": url, "comment": comment, "text": text, "utf8": 1}
# ---
api_new = NEW_API(code, family=family)
api_new.Login_to_wiki()
# ---
result = api_new.post_params(params, addtoken=True)
result = do_post(code, family, params)
# ---
upload_result = result.get("upload", {})
# ---
success = upload_result.get("result") == "Success"
error = result.get("error", {})
error_code = result.get("error", {}).get("code", "")
error_info = result.get("error", {}).get("info", '')
error_info = result.get("error", {}).get("info", "")
# ---
# {'upload': {'result': 'Warning', 'warnings': {'duplicate': ['Buckle_fracture_of_distal_radius_(Radiopaedia_46707).jpg']}, 'filekey': '1amgwircbots.rdrfjg.13.', 'sessionkey': '1amgwircbots.rdrfjg.13.'}}
# ---
Expand All @@ -113,10 +126,7 @@ def upload_by_url(file_name, text, url, comment="", code="en", family="wikipedia
if error:
printe.output(f"<<lightred>> error when upload_by_url, error_code:{error_code}")
printe.output(error_info)
errors = [
"copyuploadbaddomain",
"copyuploaddisabled"
]
errors = ["copyuploadbaddomain", "copyuploaddisabled"]
if error_code in errors or " url " in error_info.lower():
return upload_by_file(file_name, text, url, comment=comment, code=code, family=family)
# ----
Expand Down
Loading