Mdwiki-TD · MrIbrahem · Apr 14, 2024 · Apr 14, 2024 · Apr 14, 2024 · Apr 14, 2024
diff --git a/.gitignore b/.gitignore
@@ -38,3 +38,4 @@ mass/radiox/**
 ncc_core/nc_import/long_bots.py
 /md_core/newapix
 /mass/eyerounds/htmls
+/fix_mass/fix_sets/studies_done
diff --git a/fix_mass/fix_sets/bots/done.py b/fix_mass/fix_sets/bots/done.py
@@ -0,0 +1,22 @@
+"""
+
+from fix_mass.fix_sets.bots.done import studies_done_append, find_done #find_done(study_id)
+
+"""
+from pathlib import Path
+
+Dir = Path(__file__).parent.parent
+
+studies_done_dir = Dir / "studies_done"
+
+def find_done(study_id):
+    file = studies_done_dir / f"{study_id}.done"
+    if file.exists():
+        return True
+    return False
+
+
+def studies_done_append(study_id):
+    file = studies_done_dir / f"{study_id}.done"
+    if not file.exists():
+        file.touch()
diff --git a/fix_mass/fix_sets/bots/get_img_info.py b/fix_mass/fix_sets/bots/get_img_info.py
@@ -21,31 +21,22 @@
 
 st_dic_infos = Dir / "jsons/studies_files_infos"
 
-def dump_st(data, s_id):
-    file = st_dic_infos / f"{s_id}_s_id.json"
+
+def dump_st(data, file):
+
     with open(file, "w", encoding="utf-8") as f:
         json.dump(data, f, ensure_ascii=False, indent=2)
         printe.output(f"<<green>> write {len(data)} to file: {file}")
 
-def gt_img_info(title):
+
+def gt_img_info(titles, id_to_url={}):
     # ---
-    title = [title] if not isinstance(title, list) else title
+    titles = [titles] if not isinstance(titles, list) else titles
     # ---
-    info = {}
-    printe.output(f"one_img_info: {len(title)=}")
+    titles = [x for x in titles if x]
     # ---
-    params = {
-        "action": "query",
-        "titles": "|".join(title),
-        # "prop": "revisions|categories|info|extlinks",
-        "prop": "extlinks",
-        # "clprop": "sortkey|hidden", # categories
-        # "rvprop": "timestamp|content|user|ids", # revisions
-        # "cllimit": "max",  # categories
-        "ellimit": "max",  # extlinks
-        "formatversion": "2",
-    }
-    data = api_new.post_params(params)
+    info = {}
+    printe.output(f"one_img_info: {len(titles)=}")
     # ---
     _x = {
         "pages": [
@@ -65,47 +56,102 @@ def gt_img_info(title):
         ]
     }
     # ---
-    pages = data.get("query", {}).get("pages", [])
+    params = {
+        "action": "query",
+        # "titles": "|".join(titles),
+        # "prop": "revisions|categories|info|extlinks",
+        "prop": "revisions|extlinks",
+        # "clprop": "sortkey|hidden", # categories
+        "rvprop": "content",  # revisions
+        # "cllimit": "max",  # categories
+        "ellimit": "max",  # extlinks
+        "formatversion": "2",
+    }
     # ---
-    for page in pages:
-        extlinks = page.get("extlinks", [])
-        title = page.get("title")
+    # work with 40 titles at once
+    for i in range(0, len(titles), 40):
+        group = titles[i : i + 40]
+        params["titles"] = "|".join(group)
         # ---
-        info[title] = {"img_url": "", "case_url": "", "study_url": "", "caseId": "", "studyId": ""}
+        # print("|".join(group))
         # ---
-        for extlink in extlinks:
-            url = extlink.get("url")
-            ma = re.match("https://radiopaedia.org/cases/(\d+)/studies/(\d+)", url)
-            if url.find("/images/") != -1:
-                info[title]["img_url"] = url
-
-            elif re.match(r"^https://radiopaedia.org/cases/[^\d\/]+$", url):
-                info[title]["case_url"] = url
-
-            elif ma:
-                info[title]["study_url"] = url
-                info[title]["caseId"] = ma.group(1)
-                info[title]["studyId"] = ma.group(2)
+        data = api_new.post_params(params)
+        # ---
+        error = data.get("error", {})
+        if error:
+            printe.output(json.dumps(error, indent=2))
+        # ---
+        pages = data.get("query", {}).get("pages", [])
+        # ---
+        for page in pages:
+            extlinks = page.get("extlinks", [])
+            title = page.get("title")
+            # ---
+            # info[title] = {"img_url": "", "case_url": "", "study_url": "", "caseId": "", "studyId": "", "img_id": ""}
+            info[title] = {"img_url": "", "img_id": ""}
+            # ---
+            for extlink in extlinks:
+                url = extlink.get("url")
+                # ma = re.match("https://radiopaedia.org/cases/(\d+)/studies/(\d+)", url)
+                if url.find("/images/") != -1:
+                    info[title]["img_url"] = url
+
+                # elif re.match(r"^https://radiopaedia.org/cases/[^\d\/]+$", url):
+                #     info[title]["case_url"] = url
+
+                # elif ma:
+                #     info[title]["study_url"] = url
+                #     info[title]["caseId"] = ma.group(1)
+                #     info[title]["studyId"] = ma.group(2)
+            # ---
+            revisions = page.get("revisions")
+            if info[title]["img_url"]:
+                continue
+            # ---
+            if not revisions:
+                continue
+            # ---
+            revisions = revisions[0]["content"]
+            # match * Image ID: 58331091 in revisions.split("\n")
+            ma = re.search(r"Image ID: (\d+)", revisions)
+            if ma:
+                info[title]["img_id"] = ma.group(1)
+                info[title]["img_url"] = id_to_url.get(str(ma.group(1)), "")
+            else:
+                print(revisions)
     # ---
     # printe.output(json.dumps(pages, indent=2))
     # ---
     return info
 
 
-def one_img_info(title, study_id):
+def one_img_info(title, study_id, json_data):
     # ---
-    info = gt_img_info(title)
+    file = st_dic_infos / f"{study_id}_s_id.json"
+    # ---
+    if file.exists():
+        printe.output(f"<<green>> one_img_info: {file} exists")
+        with open(file, encoding="utf-8") as f:
+            return json.load(f)
+    # ---
+    id_to_url = {}
+    # ---
+    for x in json_data:
+        for n, image in enumerate(x["images"], start=1):
+            id_to_url[str(image["id"])] = image["public_filename"]
+    # ---
+    info = gt_img_info(title, id_to_url)
     # ---
     # printe.output(json.dumps(pages, indent=2))
     # ---
-    dump_st(info, study_id)
+    dump_st(info, file)
     # ---
     return info
 
 
 def test():
-    title = ["File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 4).jpg", "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 2).jpg"]
-    info = one_img_info(title)
+    title = ["File:1st metatarsal head fracture (Radiopaedia 99187-120594 Frontal 1).png", "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 02).jpg"]
+    info = gt_img_info(title)
     # ---
     print(json.dumps(info, indent=2))
     # ---

diff --git a/fix_mass/fix_sets/bots/mv_files.py b/fix_mass/fix_sets/bots/mv_files.py
@@ -0,0 +1,100 @@
+"""
+s
+from fix_mass.fix_sets.bots.mv_files import to_move_work
+
+"""
+import re
+import json
+import sys
+from pathlib import Path
+from newapi import printe
+
+Dir = Path(__file__).parent.parent
+
+st_dit = Dir / "jsons/studies_files"
+
+from newapi.ncc_page import NEW_API
+
+api_new = NEW_API("www", family="nccommons")
+api_new.Login_to_wiki()
+
+
+def dump_it(data):
+    for s_id, files in data.items():
+        with open(st_dit / f"{s_id}.json", "w", encoding="utf-8") as f:
+            json.dump(files, f, ensure_ascii=False, indent=2)
+            printe.output(f"<<green>> write {len(files)} to {s_id}.json")
+
+
+def change_names(file_dict):
+    modified_file_dict = {}
+    new_t = []
+
+    for key, value in file_dict.items():
+        new_key = f"0{key}"
+
+        # new_filename = value.replace(value[value.rfind(" ") + 1 : value.find(").jpg")], new_key)
+        ma = re.match(r"^(.*?) \d+(\)\.\w+)$", value)
+        if not ma:
+            modified_file_dict[value] = value
+            continue
+        # ---
+        new_filename = ma.group(1) + " " + new_key + ma.group(2)
+        # ---
+        if new_filename in new_t:
+            printe.output(f"duplicte: {new_filename}")
+            return False
+
+        modified_file_dict[value] = new_filename
+
+        new_t.append(new_filename)
+
+    return modified_file_dict
+
+
+def mv_file(old, new):
+    if "mv_test" in sys.argv:
+        return True
+    move_it = api_new.move(old, new, reason="")
+    return move_it
+
+
+def mv_files_change_text(text, tab):
+
+    n_text = text
+    # ---
+    for old, new in tab.items():
+        # ---
+        mv = mv_file(old, new)
+        # ---
+        if mv:
+            n_text = n_text.replace(old, new)
+    # ---
+    return n_text
+
+
+def to_move_work(text, to_move):
+    # ---
+    new_text = text
+    # ---
+    if "mv" in sys.argv:
+        for ty, files in to_move.items():
+            # ---
+            # if any file start with http return text
+            if any(x.startswith("http") for x in files.values()):
+                printe.output(f"<<red>> {ty} {len(files)} x.startswith(http)")
+                return text
+            # ---
+            printe.output(f"<<blue>> {ty} {len(files)}")
+            # printe.output(files)
+            # ---
+            neww = change_names(files)
+            # ---
+            if neww:
+                # ---
+                new_text = mv_files_change_text(new_text, neww)
+                # printe.output(json.dumps(neww, indent=2))
+        # ---
+        text = new_text
+    # ---
+    return text