1

Mdwiki-TD · Apr 14, 2024 · 4c74dba · 4c74dba
1 parent 6e93d7a
commit 4c74dba
Show file tree

Hide file tree

Showing 4 changed files with 143 additions and 48 deletions.
diff --git a/fix_mass/fix_sets/bots/get_img_info.py b/fix_mass/fix_sets/bots/get_img_info.py
@@ -21,16 +21,20 @@
 
 st_dic_infos = Dir / "jsons/studies_files_infos"
 
-def dump_st(data, s_id):
-    file = st_dic_infos / f"{s_id}_s_id.json"
+
+def dump_st(data, file):
+
     with open(file, "w", encoding="utf-8") as f:
         json.dump(data, f, ensure_ascii=False, indent=2)
         printe.output(f"<<green>> write {len(data)} to file: {file}")
 
-def gt_img_info(titles):
+
+def gt_img_info(titles, id_to_url={}):
     # ---
     titles = [titles] if not isinstance(titles, list) else titles
     # ---
+    titles = [x for x in titles if x]
+    # ---
     info = {}
     printe.output(f"one_img_info: {len(titles)=}")
     # ---
@@ -54,11 +58,11 @@ def gt_img_info(titles):
     # ---
     params = {
         "action": "query",
-        "titles": "|".join(titles),
+        # "titles": "|".join(titles),
         # "prop": "revisions|categories|info|extlinks",
-        "prop": "extlinks",
+        "prop": "revisions|extlinks",
         # "clprop": "sortkey|hidden", # categories
-        # "rvprop": "timestamp|content|user|ids", # revisions
+        "rvprop": "content",  # revisions
         # "cllimit": "max",  # categories
         "ellimit": "max",  # extlinks
         "formatversion": "2",
@@ -83,41 +87,71 @@ def gt_img_info(titles):
             extlinks = page.get("extlinks", [])
             title = page.get("title")
             # ---
-            info[title] = {"img_url": "", "case_url": "", "study_url": "", "caseId": "", "studyId": ""}
+            # info[title] = {"img_url": "", "case_url": "", "study_url": "", "caseId": "", "studyId": "", "img_id": ""}
+            info[title] = {"img_url": "", "img_id": ""}
             # ---
             for extlink in extlinks:
                 url = extlink.get("url")
-                ma = re.match("https://radiopaedia.org/cases/(\d+)/studies/(\d+)", url)
+                # ma = re.match("https://radiopaedia.org/cases/(\d+)/studies/(\d+)", url)
                 if url.find("/images/") != -1:
                     info[title]["img_url"] = url
 
-                elif re.match(r"^https://radiopaedia.org/cases/[^\d\/]+$", url):
-                    info[title]["case_url"] = url
+                # elif re.match(r"^https://radiopaedia.org/cases/[^\d\/]+$", url):
+                #     info[title]["case_url"] = url
 
-                elif ma:
-                    info[title]["study_url"] = url
-                    info[title]["caseId"] = ma.group(1)
-                    info[title]["studyId"] = ma.group(2)
+                # elif ma:
+                #     info[title]["study_url"] = url
+                #     info[title]["caseId"] = ma.group(1)
+                #     info[title]["studyId"] = ma.group(2)
+            # ---
+            revisions = page.get("revisions")
+            if info[title]["img_url"]:
+                continue
+            # ---
+            if not revisions:
+                continue
+            # ---
+            revisions = revisions[0]["content"]
+            # match * Image ID: 58331091 in revisions.split("\n")
+            ma = re.search(r"Image ID: (\d+)", revisions)
+            if ma:
+                info[title]["img_id"] = ma.group(1)
+                info[title]["img_url"] = id_to_url.get(str(ma.group(1)), "")
+            else:
+                print(revisions)
     # ---
     # printe.output(json.dumps(pages, indent=2))
     # ---
     return info
 
 
-def one_img_info(title, study_id):
+def one_img_info(title, study_id, json_data):
     # ---
-    info = gt_img_info(title)
+    file = st_dic_infos / f"{study_id}_s_id.json"
+    # ---
+    if file.exists():
+        printe.output(f"<<green>> one_img_info: {file} exists")
+        with open(file, encoding="utf-8") as f:
+            return json.load(f)
+    # ---
+    id_to_url = {}
+    # ---
+    for x in json_data:
+        for n, image in enumerate(x["images"], start=1):
+            id_to_url[str(image["id"])] = image["public_filename"]
+    # ---
+    info = gt_img_info(title, id_to_url)
     # ---
     # printe.output(json.dumps(pages, indent=2))
     # ---
-    dump_st(info, study_id)
+    dump_st(info, file)
     # ---
     return info
 
 
 def test():
-    title = ["File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 4).jpg", "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 2).jpg"]
-    info = one_img_info(title)
+    title = ["File:1st metatarsal head fracture (Radiopaedia 99187-120594 Frontal 1).png", "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 02).jpg"]
+    info = gt_img_info(title)
     # ---
     print(json.dumps(info, indent=2))
     # ---

diff --git a/fix_mass/fix_sets/bots/mv_files.py b/fix_mass/fix_sets/bots/mv_files.py
@@ -80,6 +80,11 @@ def to_move_work(text, to_move):
     if "mv" in sys.argv:
         for ty, files in to_move.items():
             # ---
+            # if any file start with http return text
+            if any(x.startswith("http") for x in files.values()):
+                printe.output(f"<<red>> {ty} {len(files)} x.startswith(http)")
+                return text
+            # ---
             printe.output(f"<<blue>> {ty} {len(files)}")
             # printe.output(files)
             # ---

diff --git a/fix_mass/fix_sets/bots/set_text.py b/fix_mass/fix_sets/bots/set_text.py
@@ -4,65 +4,105 @@
 
 
 """
+from newapi import printe
 
-
-def make_text(modality, files, set_title):
+def make_text(modality, files, set_title, leen):
     # ---
     text = f"== {modality} ==\n"
+    if leen == 1:
+        text = ""
 
     text += "{{Imagestack\n|width=850\n"
     text += f"|title={set_title}\n|align=centre\n|loop=no\n"
 
     # sort files {1: "file:...", 2: "file:..."}
     files = {k: v for k, v in sorted(files.items())}
 
-    for n, image_name in files.items():
+    for _n, image_name in files.items():
         text += f"|{image_name}|\n"
     # ---
-    text += "\n}}\n\n"
+    text += "}}\n"
     # ---
     return text
 
 
-def make_text_one_study(json_data, url_to_file, study_title):
+def make_text_one_study(json_data, data, study_title):
     # ---
-    text = ""
+    url_to_file    = {v["img_url"]: x for x, v in data.items()}
+    img_id_to_file = {str(v["img_id"]): x for x, v in data.items()}
     # ---
     to_move = {}
     # ---
+    modalities = set([x["modality"] for x in json_data])
+    # ---
+    printe.output(f"modalities: {modalities}")
+    # ---
+    noo = 0
+    # ---
     for x in json_data:
-        # ---
-        noo = 0
         # ---
         modality = x["modality"]
-        images = x["images"]
+        images   = x["images"]
         # ---
-        print(f"modality: {modality}, images: {len(images)}")
+        ty = modality
+        # ---
+        # print(f"modality: {modality}, images: {len(images)}")
         # ---
         files = {}
         # ---
         # sort images by position key
-        images = sorted(images, key=lambda x: x["position"])
+        # images = sorted(images, key=lambda x: x["position"])
         # ---
         for n, image in enumerate(images, start=1):
             # ---
+            plane_projection = image["plane_projection"]
+            # ---
+            if len(modalities) == 1 and plane_projection:
+                ty = plane_projection
+            # ---
+            if ty not in to_move:
+                to_move[ty] = {}
+            # ---
+            img_id = image["id"]
             public_filename = image["public_filename"]
             # ---
             file_name = url_to_file.get(public_filename)
             # ---
+            if not file_name:
+                file_name = img_id_to_file.get(str(img_id))
+                # print(f"img_id_to_file file_name: {file_name}")
+            # ---
             if not file_name:
                 noo += 1
                 file_name = public_filename
             # ---
-            files[n] = file_name
+            numb = len(to_move[ty]) + 1
+            # ---
+            # files[numb] = file_name
+            to_move[ty][numb] = file_name
             # ---
         # ---
-        print(f"noo: {noo}")
-        print(f"files: {len(files)}")
-        # ---
-        text += make_text(modality, files, study_title)
-        # ---
-        to_move[modality] = files
-        # ---
+        # # ---
+        # to_move[ty].update(files)
+    # ---
+    print(f"noo: {noo}")
+    # ---
+    text = ""
+    # ---
+    study_title2 = study_title
+    # ---
+    # if len(to_move) > 1:
+    #     study_title2 = ""
+    # ---
+    # sum all files in to_move
+    all_files = sum([len(x) for x in to_move.values()])
+    # ---
+    if all_files == len(to_move):
+        printe.output("len to_move == all_files")
+        return text, to_move
+    # ---
+    for ty, files in to_move.items():
+        print(f"ty: {ty}, files: {len(files)}")
+        text += make_text(ty, files, study_title2, len(to_move))
     # ---
     return text, to_move
diff --git a/fix_mass/fix_sets/fix.py b/fix_mass/fix_sets/fix.py
@@ -7,6 +7,7 @@
 import json
 import sys
 from pathlib import Path
+import xxlimited
 from newapi import printe
 
 from fix_mass.fix_sets.bots.get_img_info import one_img_info
@@ -15,7 +16,6 @@
 from fix_mass.fix_sets.bots.study_files import get_study_files
 from fix_mass.fix_sets.bots.mv_files import to_move_work
 from fix_mass.fix_sets.jsons.files import studies_titles, studies_titles2
-from fix_mass.fix_sets.bots.done import studies_done_append
 from fix_mass.fix_sets.bots.done import studies_done_append, find_done #find_done(study_id)
 
 from newapi.ncc_page import MainPage as ncc_MainPage
@@ -44,27 +44,26 @@ def update_set_text(title, n_text, study_id):
     n_text += f"\n\n{cat_text}"
     # ---
     if p_text != n_text:
-        page.save(newtext=n_text, summary="update")
+        tyy = page.save(newtext=n_text, summary="update")
         # ---
-        studies_done_append(study_id)
+        if tyy:
+            studies_done_append(study_id)
         # ---
 
 
 def work_text(study_id, study_title):
     files = get_study_files(study_id)
     # ---
-    data = one_img_info(files, study_id)
+    json_data = get_stacks(study_id)
+    # ---
+    data = one_img_info(files, study_id, json_data)
     # ---
     # 'File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 1).jpg': {'img_url': 'https://prod-images-static.radiopaedia.org/images/61855971/f11ad965ab35e44ae8ac9ed236afb1cf4547507d8f464cbc3c6316a4cb76fb32.jpg', 'case_url': 'https://radiopaedia.org/cases/appendicitis-ct-angiogram', 'study_url': 'https://radiopaedia.org/cases/154713/studies/134732', 'caseId': '154713', 'studyId': '134732'}
     # printe.output(data)
     # ---
-    url_to_file = {v["img_url"]: x for x, v in data.items()}
-    # ---
     # printe.output(json.dumps(url_to_file, indent=2))
     # ---
-    json_data = get_stacks(study_id)
-    # ---
-    text, to_move = make_text_one_study(json_data, url_to_file, study_title)
+    text, to_move = make_text_one_study(json_data, data, study_title)
     # ---
     return text, to_move
 
@@ -74,6 +73,9 @@ def work_one_study(study_id):
     # ---
     study_title = studies_titles.get(study_id)# or studies_titles2.get(study_id)
     # ---
+    if not study_title and "studies_titles2" in sys.argv:
+        study_title = studies_titles2.get(study_id)
+    # ---
     printe.output(f"study_id: {study_id}, study_title: {study_title}")
     # ---
     if not study_title:
@@ -82,10 +84,18 @@ def work_one_study(study_id):
     # ---
     if find_done(study_id):
         printe.output(f"<<purple>> study_id: {study_id} already done")
-        return
+        if "nodone" not in sys.argv:
+            return
     # ---
     text, to_move = work_text(study_id, study_title)
     # ---
+    text = text.strip()
+    # ---
+    if text.find("|http") != -1:
+        printe.output(f"<<red>> text has http links... study_id: {study_id}")
+        printe.output(text)
+        return
+    # ---
     text = to_move_work(text, to_move)
     # ---
     update_set_text(study_title, text, study_id)
@@ -104,6 +114,12 @@ def main(ids):
     ids = [arg for arg in sys.argv[1:] if arg.isdigit()]
     # ---
     if "studies_titles" in sys.argv:
+        # studies_titles keys not in studies_titles2
+        # ids = [ x for x in studies_titles.keys() if x not in studies_titles2 ]
         ids = list(studies_titles.keys())
+    elif "studies_titles2" in sys.argv:
+        ids = [ x for x in studies_titles2.keys() if x not in studies_titles ]
+    # ---
+    printe.output(f"len of ids: {len(ids)}")
     # ---
     main(ids)