Skip to content

Commit

Permalink
Merge pull request #176 from MrIbrahem/update
Browse files Browse the repository at this point in the history
Enhance Medwiki Processing and API Integration
  • Loading branch information
MrIbrahem authored Sep 10, 2024
2 parents 0f582f7 + b359060 commit d98526e
Show file tree
Hide file tree
Showing 7 changed files with 204 additions and 12 deletions.
17 changes: 14 additions & 3 deletions copy_to_en/medwiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
MainPage = super_page.MainPage

Dir = Path(__file__).parent
revids = {}


def medwiki_cat_members(cat="Category:Mdwiki Translation Dashboard articles"):
Expand Down Expand Up @@ -70,7 +71,9 @@ def Create(title, text, summary):


def get_text(x):
alltext = mdwiki_api.GetPageText(x)
alltext, revid = mdwiki_api.GetPageText(x, get_revid=True)
# ---
revids[x] = revid
# ---
if not alltext:
print("no text: " + x)
Expand Down Expand Up @@ -101,7 +104,9 @@ def get_text(x):
elif newtext.lower().find("{{drugbox") != -1:
newtext = newtext[newtext.lower().find("{{drugbox") :]
# ---
newtext = f"{unlinkedwikibase}\n\n{newtext}"
revid_temp = f"{{{{mdwiki revid|{revid}}}}}"
# ---
newtext = f"{unlinkedwikibase}\n{revid_temp}\n{newtext}"
# ---
return newtext

Expand Down Expand Up @@ -169,6 +174,12 @@ def main():
all_pages = [x for x in all_pages if x not in done]
# ---
start(all_pages)
# ---
file = Dir / "all_pages_revids.json"
# ---
with open(file, "w", encoding="utf-8") as f:
f.write(json.dumps(revids), ensure_ascii=False)
# ---


def main2():
Expand All @@ -185,7 +196,7 @@ def main2():
if __name__ == "__main__":
if "test" in sys.argv:
# one_page("Posaconazole")
one_page("COVID-19")
one_page("Tropicamide")
# one_page("Chronic lymphocytic leukemia")
elif "main2" in sys.argv:
main2()
Expand Down
7 changes: 5 additions & 2 deletions md_core_helps/apis/mdwiki_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,13 @@ def wordcount(title, srlimit="30"):
return words


def GetPageText(title, redirects=False):
def GetPageText(title, redirects=False, get_revid=False):
# printe.output( '**GetarPageText: ')
# ---
params = {
"action": "parse",
# "prop": "wikitext|sections",
"prop": "wikitext",
"prop": "wikitext|revid",
"page": title,
# "redirects": 1,
# "normalize": 1,
Expand All @@ -119,6 +119,9 @@ def GetPageText(title, redirects=False):
if not text:
printe.output(f'page {title} text == "".')
# ---
if get_revid:
return text, json1.get("parse", {}).get("revid", 0)
# ---
return text


Expand Down
3 changes: 2 additions & 1 deletion td_core/after_translate/bots/add_to_wd.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def work_with_2_qids(oldq, new_q):


def add_wd(qid, enlink, lang, target):
print("add_wd:")
params = {
"action": "wbsetsitelink",
"linktitle": target,
Expand All @@ -114,7 +115,7 @@ def add_wd(qid, enlink, lang, target):
# ---
ss = wikidataapi.post(params, token=True)
# ---
printe.output(ss)
printe.output(str(ss))
# ---
if ss and "success" in ss:
return True
Expand Down
122 changes: 122 additions & 0 deletions td_core/after_translate/mdwikicx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#!/usr/bin/python3
"""
https://hashtags.wmcloud.org/json/?query=mdwikicx
بوت قواعد البيانات
python3 core8/pwb.py after_translate/mdwikicx
python3 core8/pwb.py after_translate/mdwikicx pages_users
python3 core8/pwb.py after_translate/mdwikicx justsql
python3 core8/pwb.py after_translate/mdwikicx -lang:ur
"""

import sys
import requests
import json
import re
from pathlib import Path
from newapi import printe
from newapi.page import MainPage
from mdpy.bots import en_to_md
from after_translate.bots import add_to_wd


def get_result():
url = "https://hashtags.wmcloud.org/json/?query=mdwikicx"
# ---
try:
r = requests.get(url)
result = r.json()
rows = result.get("Rows", [])
return rows

except Exception as e:
print(f"Exception: {e}")
# ---
return []


def work_one_page(x):
# ---
qid = en_to_md.mdtitle_to_qid.get(x["mdtitle"], "")
# ---
page_title = x["page_title"]
lang = x["lang"]
# ---
page = MainPage(page_title, lang, family="wikipedia")
# ---
if not page.exists():
return
# ---
ns = page.namespace()
# ---
if ns != 0:
printe.output(f"not article.{page_title=}\t{ns=}")
return
# ---
qid_in = page.get_qid()
# ---
if not qid_in:
add_to_wd.add_wd(qid, "", lang, page_title)
return
# ---
printe.output(f"<<blue>> {qid_in=}, {qid=}")


def main():
# ---
for arg in sys.argv:
arg, _, value = arg.partition(":")
if arg in ["lang", "-lang"]:
lang_o = value
# ---
result_list = [
{"Domain": "fr.wikipedia.org", "Timestamp": "2024-08-22T03:09:01Z", "Username": "Mr. Ibrahem", "Page_title": "Utilisateur:Mr. Ibrahem/Acute lymphoblastic leukemia", "Edit_summary": "Created by translating the page [[:mdwiki:Acute lymphoblastic leukemia|Acute lymphoblastic leukemia]]. #mdwikicx .", "Revision_ID": 217884468},
]
# ---
_result_keys = {
"Domain": "ar.wikipedia.org",
"Timestamp": "2024-09-04T01:33:31Z",
"Username": "Mr. Ibrahem",
"Page_title": "مستخدم:Mr. Ibrahem/Tropicamide",
"Edit_summary": 'Created by translating the page "[[:mdwiki:Special:Redirect/revision/5210|Tropicamide]] to:ar #mdwikicx"',
"Revision_ID": 67801114,
}
# ---
titles = []
# ---
for x in result_list:
# ---
lang = x.get("Domain", "").replace(".wikipedia.org", "")
# ---
tab = {
"lang": lang,
"timestamp": x.get("Timestamp", "").split("T")[0],
"username": x.get("Username", ""),
"page_title": x.get("Page_title", ""),
"mdtitle": "",
}
# ---
md_title_find = re.search(r"\|(.*?)\]\]", x.get("Edit_summary", ""))
# ---
if md_title_find:
tab["mdtitle"] = md_title_find.group(1)
# ---
if tab["username"].find("Mr. Ibrahem") != -1 or tab["username"].find("Doc James") != -1:
continue
# ---
# printe.output(tab)
# ---
titles.append(tab)
# ---
with open(Path(__file__).parent / "titles.json", "w", encoding="utf-8") as f:
json.dump(titles, f, ensure_ascii=False)
# ---
for x in titles:
# ---
work_one_page(x)


if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions td_core/after_translate/sql_new.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/usr/bin/python3
"""
https://hashtags.wmcloud.org/json/?query=mdwikicx
بوت قواعد البيانات
python3 core8/pwb.py after_translate/sql_new
Expand Down
60 changes: 55 additions & 5 deletions wprefs/bot1.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""
# import os
import random
import sys
from pathlib import Path

Expand All @@ -24,7 +25,7 @@
# ---
# print(Dir)
# ---
from wprefs.api import GetPageText, page_put
from wprefs.api import GetPageText#, page_put
from wprefs.helps import ec_de_code
from wprefs.files import setting, save_wprefcash
from wprefs.wpref_text import fix_page
Expand Down Expand Up @@ -88,10 +89,50 @@ def one_page(page, lang):
return ""


def one_file(file, lang):
# ---
text = ""
# ---
rand_title = random.randint(1000000, 9999999)
rand_title = f"t_{rand_title}"
# ---
if file.startswith("texts/"):
file = Dir.parent / "public_html/fixwikirefs" / file
# ---
try:
with open(file, "r", encoding="utf-8") as f:
text = f.read()
except Exception as e:
text = ""
print(e)
return ""
# ---
if text == "":
print("notext")
return ""
# ---
newtext = fix_page_here(text, rand_title, lang)
# ---
if text == newtext:
print("no changes")
return ""
# ---
if not newtext:
print("notext")
return ""
# ---
filee = save_wprefcash(rand_title, newtext)
# ---
print(filee)
# ---
return ""


def maine():
# ---
page = ""
lange = ""
file = ""
# ---
for arg in sys.argv:
arg, _, value = arg.partition(":")
Expand All @@ -103,15 +144,24 @@ def maine():
move_dot[1] = True
# ---
if arg == "lang":
lange = value
lange = value.strip()
if arg == "page":
page = value.replace("_", " ")
if arg == "file":
file = value.replace("_", " ")
# ---
if page == "" and file == "":
print("no page or file")
return ""
# ---
if page == "" or lange == "":
print("no page or lang")
if lange == "":
print("no lang")
return ""
# ---
one_page(page, lange)
if file:
one_file(file, lange)
else:
one_page(page, lange)
# ---
return ""

Expand Down
5 changes: 4 additions & 1 deletion wprefs/infobox.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@ def Expend_Infobox(text, title, section_0):
section_0 = newtext
print_s('section_0 = newtext')
# ---
title2 = re.escape(title)
try:
title2 = re.escape(title)
except Exception as e:
title2 = title
# ---
newtext = re.sub(r"\}\s*(\'\'\'%s\'\'\')" % title2, r"}\n\n\g<1>", newtext)
section_0 = re.sub(r"\}\s*(\'\'\'%s\'\'\')" % title2, r"}\n\n\g<1>", section_0)
Expand Down

0 comments on commit d98526e

Please sign in to comment.