From 333f79751c4ac02e95e2e3a4fbc3d2388f1bfdd0 Mon Sep 17 00:00:00 2001 From: ibrahem Date: Sat, 15 Jun 2024 01:09:58 +0300 Subject: [PATCH 1/2] . --- .../count_files/Case_co.py | 0 .../count_files/__init__.py | 0 .../st3sort => fix_mass}/count_files/b.py | 2 +- .../st3sort => fix_mass}/count_files/do.py | 1 + .../st3sort => fix_mass}/count_files/fix.py | 0 fix_mass/fix_sets/__init__.py | 6 +- fix_mass/fix_sets/bots/__init__.py | 6 +- fix_mass/fix_sets/count_files/Case_co.py | 126 ------------- fix_mass/fix_sets/count_files/__init__.py | 7 - fix_mass/fix_sets/count_files/b.py | 51 ------ fix_mass/fix_sets/count_files/do.py | 104 ----------- fix_mass/fix_sets/fix.py | 2 +- fix_mass/fix_sets/start3.py | 173 ------------------ mass/radio/st3sort/start3.py | 52 +++--- 14 files changed, 36 insertions(+), 494 deletions(-) rename {mass/radio/st3sort => fix_mass}/count_files/Case_co.py (100%) rename {mass/radio/st3sort => fix_mass}/count_files/__init__.py (100%) rename {mass/radio/st3sort => fix_mass}/count_files/b.py (95%) rename {mass/radio/st3sort => fix_mass}/count_files/do.py (98%) rename {mass/radio/st3sort => fix_mass}/count_files/fix.py (100%) delete mode 100644 fix_mass/fix_sets/count_files/Case_co.py delete mode 100644 fix_mass/fix_sets/count_files/__init__.py delete mode 100644 fix_mass/fix_sets/count_files/b.py delete mode 100644 fix_mass/fix_sets/count_files/do.py delete mode 100644 fix_mass/fix_sets/start3.py diff --git a/mass/radio/st3sort/count_files/Case_co.py b/fix_mass/count_files/Case_co.py similarity index 100% rename from mass/radio/st3sort/count_files/Case_co.py rename to fix_mass/count_files/Case_co.py diff --git a/mass/radio/st3sort/count_files/__init__.py b/fix_mass/count_files/__init__.py similarity index 100% rename from mass/radio/st3sort/count_files/__init__.py rename to fix_mass/count_files/__init__.py diff --git a/mass/radio/st3sort/count_files/b.py b/fix_mass/count_files/b.py similarity index 95% rename from mass/radio/st3sort/count_files/b.py rename to fix_mass/count_files/b.py index 5072cd7f..b4182226 100644 --- a/mass/radio/st3sort/count_files/b.py +++ b/fix_mass/count_files/b.py @@ -48,4 +48,4 @@ if "del" in sys.argv: for x in errors: os.remove(studies_urls_to_files_dir / x) - print(f"delete {studies_urls_to_files_dir / x}") \ No newline at end of file + print(f"delete {studies_urls_to_files_dir / x}") diff --git a/mass/radio/st3sort/count_files/do.py b/fix_mass/count_files/do.py similarity index 98% rename from mass/radio/st3sort/count_files/do.py rename to fix_mass/count_files/do.py index 7dab5a22..939fe81a 100644 --- a/mass/radio/st3sort/count_files/do.py +++ b/fix_mass/count_files/do.py @@ -50,6 +50,7 @@ def main(ids_tab): studies = [study.split("/")[-1] for study in va["studies"]] # --- if not studies: + printe.output(f"!!! studies not found: {caseId=}.") continue # --- tab.append({"caseId": caseId, "title": title, "studies": studies}) diff --git a/mass/radio/st3sort/count_files/fix.py b/fix_mass/count_files/fix.py similarity index 100% rename from mass/radio/st3sort/count_files/fix.py rename to fix_mass/count_files/fix.py diff --git a/fix_mass/fix_sets/__init__.py b/fix_mass/fix_sets/__init__.py index d5106974..37fd5e5d 100644 --- a/fix_mass/fix_sets/__init__.py +++ b/fix_mass/fix_sets/__init__.py @@ -1,7 +1,7 @@ """ -python3 core8/pwb.py fix_mass/radio/cases_in_ids -python3 core8/pwb.py fix_mass/radio/to_work -python3 core8/pwb.py fix_mass/radio/st3/start3 get:500 +python3 core8/pwb.py mass/radio/cases_in_ids +python3 core8/pwb.py mass/radio/to_work +python3 core8/pwb.py mass/radio/st3/start3 get:500 """ diff --git a/fix_mass/fix_sets/bots/__init__.py b/fix_mass/fix_sets/bots/__init__.py index d5106974..37fd5e5d 100644 --- a/fix_mass/fix_sets/bots/__init__.py +++ b/fix_mass/fix_sets/bots/__init__.py @@ -1,7 +1,7 @@ """ -python3 core8/pwb.py fix_mass/radio/cases_in_ids -python3 core8/pwb.py fix_mass/radio/to_work -python3 core8/pwb.py fix_mass/radio/st3/start3 get:500 +python3 core8/pwb.py mass/radio/cases_in_ids +python3 core8/pwb.py mass/radio/to_work +python3 core8/pwb.py mass/radio/st3/start3 get:500 """ diff --git a/fix_mass/fix_sets/count_files/Case_co.py b/fix_mass/fix_sets/count_files/Case_co.py deleted file mode 100644 index d3e288d7..00000000 --- a/fix_mass/fix_sets/count_files/Case_co.py +++ /dev/null @@ -1,126 +0,0 @@ -""" -from fix_mass.radio.st3sort.count_files.Case_co import CaseDo -""" -import sys -import os -from pathlib import Path -import json -import traceback - -# --- -from newapi import printe -from fix_mass.radio.get_studies import get_images_stacks, get_images -from fix_mass.radio.bots.studies_utf import dump_studies_urls_to_files - -# --- -try: - import pywikibot - - pywikibotoutput = pywikibot.output -except ImportError: - pywikibotoutput = print -# --- -main_dir = Path(__file__).parent.parent -# --- -studies_dir = Path("/data/project/mdwiki/studies") -# --- -if not os.path.exists(studies_dir): - printe.output(f"<> studies_dir {studies_dir} not found") - studies_dir = main_dir / "studies" - printe.output(f"<> studies_dir set to {studies_dir}") - - -def printt(s): - if "nopr" in sys.argv: - return - printe.output(s) - - -class CaseDo: - def __init__(self, caseId, title, studies_ids): - self.caseId = caseId - self.title = title - self.studies_ids = studies_ids - self.img_to_url = {} - self.studies = {} - self.category = f"Category:Radiopaedia case {self.caseId} {self.title}" - # --- - - def get_studies(self): - for study in self.studies_ids: - st_file = studies_dir / f"{study}.json" - # --- - images = {} - # --- - if os.path.exists(st_file): - try: - with open(st_file, encoding="utf-8") as f: - images = json.loads(f.read()) - except Exception as e: - print("<> Traceback (most recent call last):") - printt(f"{study} : error") - print(e) - print(traceback.format_exc()) - print("CRITICAL:") - # --- - images = [image for image in images if image] - # --- - if not images: - printt(f"{study} : not found") - images = get_images_stacks(study) - # --- - if not images: - images = get_images(f"https://radiopaedia.org/cases/{self.caseId}/studies/{study}") - # --- - with open(st_file, "w", encoding="utf-8") as f: - json.dump(images, f, ensure_ascii=False, indent=2) - # --- - self.studies[study] = images - printt(f"study:{study} : len(images) = {len(images)}, st_file:{st_file}") - - def upload_images(self, study, images): - planes = {} - # --- - self.img_to_url[study] = {} - # --- - for i, image in enumerate(images, 1): - if not isinstance(image, dict): - continue - # --- - image_url = image.get("public_filename", "") - # --- - if not image_url: - continue - # --- - extension = image_url.split(".")[-1].lower() - # --- - if not extension: - extension = image["fullscreen_filename"].split(".")[-1].lower() - # --- - if extension == "bmp": - extension = "jpg" - # --- - image_id = image["id"] - plane = image["plane_projection"] - # --- - if plane not in planes: - planes[plane] = 0 - planes[plane] += 1 - # --- - file_name = f"{self.title} (Radiopaedia {self.caseId}-{study} {plane} {planes[plane]}).{extension}" - # --- - file_name = file_name.replace(" ", " ").replace(" ", " ").replace(" ", " ") - # --- - file_name = file_name.replace(":", ".").replace("/", ".") - # --- - self.img_to_url[study][f"File:{file_name}"] = {"url": image_url, "id": image_id} - - def start(self): - self.get_studies() - - for study, images in self.studies.items(): - printt(f"{study} : len(images) = {len(images)}") - # --- - self.upload_images(study, images) - - dump_studies_urls_to_files(self.img_to_url) diff --git a/fix_mass/fix_sets/count_files/__init__.py b/fix_mass/fix_sets/count_files/__init__.py deleted file mode 100644 index d5106974..00000000 --- a/fix_mass/fix_sets/count_files/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -""" - -python3 core8/pwb.py fix_mass/radio/cases_in_ids -python3 core8/pwb.py fix_mass/radio/to_work -python3 core8/pwb.py fix_mass/radio/st3/start3 get:500 - -""" diff --git a/fix_mass/fix_sets/count_files/b.py b/fix_mass/fix_sets/count_files/b.py deleted file mode 100644 index 10a07bf9..00000000 --- a/fix_mass/fix_sets/count_files/b.py +++ /dev/null @@ -1,51 +0,0 @@ -""" -python3 core8/pwb.py fix_mass/radio/st3sort/count_files/b - -""" -import os -import sys -import tqdm -import json -from pathlib import Path -from fix_mass.radio.bots.studies_utf import studies_urls_to_files_dir -# --- -Dir = Path(__file__).parent -# --- -errors = [] -all_files_to_url = {} -count_all_files = 0 -# --- -for x in tqdm.tqdm(os.listdir(studies_urls_to_files_dir)): - # --- - if not x.endswith(".json"): - continue - # --- - file = studies_urls_to_files_dir / x - # --- - try: - with open(file, encoding="utf-8") as f: - data = json.load(f) - except Exception as e: - errors.append(x) - continue - # --- - count_all_files += len(data) - # --- - for file, va in data.items(): - all_files_to_url[file] = va["url"] -# --- -print(f"{count_all_files=}") -print(f"{len(all_files_to_url)=}") -# --- -with open(Dir / "all_files_to_url.json", "w", encoding="utf-8") as f: - json.dump(all_files_to_url, f) -# --- -with open(Dir / "errors.json", "w", encoding="utf-8") as f: - json.dump(errors, f) -# --- -print(f"{len(errors)=}") -# --- -if "del" in sys.argv: - for x in errors: - os.remove(studies_urls_to_files_dir / x) - print(f"delete {studies_urls_to_files_dir / x}") \ No newline at end of file diff --git a/fix_mass/fix_sets/count_files/do.py b/fix_mass/fix_sets/count_files/do.py deleted file mode 100644 index f888bcc6..00000000 --- a/fix_mass/fix_sets/count_files/do.py +++ /dev/null @@ -1,104 +0,0 @@ -""" -python3 core8/pwb.py fix_mass/radio/st3sort/count_files/do nomult get:2323 - -tfj run mnt1 --mem 1Gi --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py fix_mass/radio/st3sort/do get:1 " -tfj run mnt2 --mem 1Gi --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py fix_mass/radio/st3sort/do get:2 " -tfj run mnt3 --mem 1Gi --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py fix_mass/radio/st3sort/do get:3 " -tfj run mnt4 --mem 1Gi --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py fix_mass/radio/st3sort/do get:4 " -tfj run mnt5 --mem 1Gi --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py fix_mass/radio/st3sort/do get:5 " -tfj run mnt6 --mem 1Gi --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py fix_mass/radio/st3sort/do get:6 " - -python3 core8/pwb.py fix_mass/radio/st3sort/do nomulti - -""" -import os -import sys - -sys.argv.append("dump_studies_urls_to_files") -import tqdm -import time -import json -from multiprocessing import Pool -from newapi import printe -from pathlib import Path -from fix_mass.radio.st3sort.count_files.Case_co import CaseDo -from fix_mass.radio.bots.studies_utf import studies_urls_to_files_dir - -main_dir = Path(__file__).parent.parent.parent - -def do_it(va): - # --- - caseId = va["caseId"] - title = va["title"] - studies = va["studies"] - # --- - bot = CaseDo(caseId, title, studies) - bot.start() - - -def main(ids_tab): - printe.output(f"<> start.py all: {len(ids_tab)}:") - # --- - tab = [] - # --- - n = 0 - for _, va in tqdm.tqdm(ids_tab.items()): - n += 1 - # --- - caseId = va["caseId"] - title = va["title"] - studies = [study.split("/")[-1] for study in va["studies"]] - # --- - if not studies: - continue - # --- - tab.append({"caseId": caseId, "title": title, "studies": studies}) - # --- - pool = Pool(processes=5) - pool.map(do_it, tab) - pool.close() - pool.terminate() - - -def start(): - with open(main_dir / "jsons/all_ids.json", encoding="utf-8") as f: - all_ids = json.load(f) - # --- - ids_tab = {} - # --- - # all files in studies_urls_to_files_dir - files = [x for x in os.listdir(studies_urls_to_files_dir) if x.endswith(".json")] - # --- - for file in files: - print(file) - break - # --- - for ii, va in tqdm.tqdm(all_ids.items()): - # --- - studies = [study.split("/")[-1] for study in va["studies"]] - # --- - new_s = [] - # --- - if "dump_studies_urls_to_files" in sys.argv: - for study in studies.copy(): - file = studies_urls_to_files_dir / f"{study}.json" - if not f"{study}.json" in files: - new_s.append(study) - # if os.path.exists(file): - # studies.remove(study) - # --- - if new_s: - # --- - ids_tab[ii] = va - # --- - print("xxxxxxxxxxxxxxxxxxxxxxxxxxx") - print(f"{len(ids_tab)=}, {len(all_ids)=}") - print("xxxxxxxxxxxxxxxxxxxxxxxxxxx") - # --- - time.sleep(3) - # --- - main(ids_tab) - - -if __name__ == "__main__": - start() diff --git a/fix_mass/fix_sets/fix.py b/fix_mass/fix_sets/fix.py index 6b47d923..a5d90df9 100644 --- a/fix_mass/fix_sets/fix.py +++ b/fix_mass/fix_sets/fix.py @@ -142,7 +142,7 @@ def main(ids): if __name__ == "__main__": - ids = [arg for arg in sys.argv[1:] if arg.isdigit()] + ids = [arg.strip() for arg in sys.argv if arg.strip().isdigit()] # --- if "studies_titles" in sys.argv: # studies_titles keys not in studies_titles2 diff --git a/fix_mass/fix_sets/start3.py b/fix_mass/fix_sets/start3.py deleted file mode 100644 index ae12a72c..00000000 --- a/fix_mass/fix_sets/start3.py +++ /dev/null @@ -1,173 +0,0 @@ -""" - -python3 core8/pwb.py fix_mass/fix_sets/start3 get:500 -python3 core8/pwb.py fix_mass/radio/st3sort/start3 nomulti - -python3 core8/pwb.py fix_mass/fix_sets/start3 nomulti ask - -tfj run mnx1 --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py fix_mass/fix_sets/start3 get:1 157" -tfj run mnx2 --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py fix_mass/fix_sets/start3 get:2 157" -tfj run mnx3 --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py fix_mass/fix_sets/start3 get:3 157" -tfj run mnx4 --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py fix_mass/fix_sets/start3 get:4 157" -tfj run gnr5 --mem 1Gi --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py fix_mass/fix_sets/start3 get:5 mdwiki" -tfj run gnr6 --mem 1Gi --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py fix_mass/fix_sets/start3 get:6 mdwiki" - -""" -import sys -import psutil -import tqdm -import json -import os -from pathlib import Path -from multiprocessing import Pool - -# --- -from newapi import printe -from mass.radio.st3sort.One_Case_New import OneCase - -# --- -radio_jsons_dir = Path(__file__).parent.parent.parent / "mass/radio/jsons/" -# --- -with open(radio_jsons_dir / "authors.json", encoding="utf-8") as f: - authors = json.load(f) -# --- -with open(radio_jsons_dir / "infos.json", encoding="utf-8") as f: - infos = json.load(f) -# --- -with open(radio_jsons_dir / "all_ids.json", encoding="utf-8") as f: - all_ids = json.load(f) -# --- -# cases_in_ids = [] -# --- -with open(radio_jsons_dir / "cases_in_ids.json", encoding="utf-8") as f: - cases_in_ids = json.load(f) -# --- -ids_by_caseId = {x: v for x, v in all_ids.items() if x not in cases_in_ids} -# --- -del cases_in_ids - - -def print_memory(): - _red_ = "\033[91m%s\033[00m" - - usage = psutil.Process(os.getpid()).memory_info().rss - usage = usage / 1024 // 1024 - - print(_red_ % f"memory usage: psutil {usage} MB") - - -def do_it(va): - # --- - case_url = va["case_url"] - caseId = va["caseId"] - title = va["title"] - studies = va["studies"] - author = va["author"] - # --- - bot = OneCase(case_url, caseId, title, studies, author) - bot.start() - # --- - del bot, author, title, studies - - -def multi_work(tab, numb=10): - done = 0 - for i in range(0, len(tab), numb): - group = tab[i : i + numb] - # --- - done += numb - printe.output(f"<> done: {done}:") - # --- - print_memory() - # --- - if "nomulti" in sys.argv or len(tab) < 10: - for x in group: - do_it(x) - else: - pool = Pool(processes=5) - pool.map(do_it, group) - pool.close() - pool.terminate() - - -def ddo(taba): - ids_tabs = taba - tabs = {} - print(f"all cases: {len(ids_tabs)}") - length = (len(ids_tabs) // 6) + 1 - for i in range(0, len(ids_tabs), length): - num = i // length + 1 - tabs[str(num)] = dict(list(ids_tabs.items())[i : i + length]) - # print(f'tab {num} : {len(tabs[str(num)])}') - print(f'tfj run mnx{num} --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py fix_mass/fix_sets/start3 get:{num} {len(tabs[str(num)])}"') - - for arg in sys.argv: - arg, _, value = arg.partition(":") - if arg == "get": - ids_tabs = tabs[value] - print(f"work in {len(ids_tabs)} cases") - del tabs - - return ids_tabs - - -def main(ids_tab): - printe.output(f"<> start.py all: {len(ids_tab)}:") - # --- - print_memory() - # --- - if "test" not in sys.argv and len(ids_tab) > 100: - ids_tab = ddo(ids_tab) - # --- - tab = [] - # --- - n = 0 - for _, va in tqdm.tqdm(ids_tab.items()): - n += 1 - # --- - caseId = va["caseId"] - case_url = va["url"] - # --- - author = va.get("author", "") - # --- - if not author: - author = infos.get(case_url, {}).get(str(caseId), "") - # --- - if not author: - author = authors.get(str(caseId), "") - # --- - title = va["title"] - # --- - studies = [study.split("/")[-1] for study in va["studies"]] - # --- - if not studies: - continue - # --- - tab.append({"caseId": caseId, "case_url": case_url, "title": title, "studies": studies, "author": author}) - # --- - del ids_tab - # --- - multi_work(tab) - - -def main_by_ids(ids): - printe.output(f"<> start.py main_by_ids: {len(ids)=}:") - # --- - ids_tab = {caseId: all_ids[caseId] for caseId in ids if caseId in all_ids} - # --- - not_in = [c for c in ids if c not in all_ids] - # --- - print(f"main_by_ids caseId not in all_ids: {len(not_in)}") - # --- - main(ids_tab) - - -if __name__ == "__main__": - ids = [arg for arg in sys.argv[1:] if arg.isdigit()] - # --- - ids = {x: all_ids[x] for x in ids if x in all_ids} - # --- - if ids: - main(ids) - else: - main(ids_by_caseId) diff --git a/mass/radio/st3sort/start3.py b/mass/radio/st3sort/start3.py index 2d4a1cc1..e215bd90 100644 --- a/mass/radio/st3sort/start3.py +++ b/mass/radio/st3sort/start3.py @@ -1,6 +1,6 @@ """ -python3 core8/pwb.py mass/radio/st3sort/start3 nomulti ask +python3 core8/pwb.py mass/radio/st3sort/start3 nomulti ask 97387 python3 core8/pwb.py mass/radio/st3sort/start3 get:500 python3 core8/pwb.py mass/radio/st3sort/start3 dump_studies_urls_to_files nomulti python3 /data/project/mdwiki/pybot/mass/radio/st3/start3.py test @@ -24,27 +24,30 @@ # --- from newapi import printe from mass.radio.st3sort.One_Case_New import OneCase + # --- -main_dir = Path(__file__).parent.parent +radio_jsons_dir = Path(__file__).parent.parent / "jsons" # --- -with open(main_dir / "jsons/authors.json", encoding="utf-8") as f: +with open(radio_jsons_dir / "authors.json", encoding="utf-8") as f: authors = json.load(f) # --- -with open(main_dir / "jsons/infos.json", encoding="utf-8") as f: +with open(radio_jsons_dir / "infos.json", encoding="utf-8") as f: infos = json.load(f) # --- -with open(main_dir / "jsons/all_ids.json", encoding="utf-8") as f: +with open(radio_jsons_dir / "all_ids.json", encoding="utf-8") as f: all_ids = json.load(f) # --- # cases_in_ids = [] # --- -with open(main_dir / "jsons/cases_in_ids.json", encoding="utf-8") as f: +with open(radio_jsons_dir / "cases_in_ids.json", encoding="utf-8") as f: cases_in_ids = json.load(f) # --- -ids_by_caseId = { - x: v - for x, v in all_ids.items() if x not in cases_in_ids -} +ids_by_caseId = {x: v for x, v in all_ids.items() if x not in cases_in_ids} +# --- +if "allids" in sys.argv: + ids_by_caseId = all_ids.copy() +# --- +printe.output(f"{len(ids_by_caseId)=}, {len(cases_in_ids)=}") # --- del cases_in_ids @@ -75,7 +78,7 @@ def do_it(va): def multi_work(tab, numb=10): done = 0 for i in range(0, len(tab), numb): - group = tab[i:i + numb] + group = tab[i : i + numb] # --- done += numb printe.output(f"<> done: {done}:") @@ -99,9 +102,9 @@ def ddo(taba): length = (len(ids_tabs) // 6) + 1 for i in range(0, len(ids_tabs), length): num = i // length + 1 - tabs[str(num)] = dict(list(ids_tabs.items())[i:i + length]) + tabs[str(num)] = dict(list(ids_tabs.items())[i : i + length]) # print(f'tab {num} : {len(tabs[str(num)])}') - print(f'tfj run mnx{num} --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py mass/radio/st3/start3 get:{num} {len(tabs[str(num)])}"') + print(f'tfj run mnx{num} --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py mass/radio/st3sort/start3 get:{num} {len(tabs[str(num)])}"') for arg in sys.argv: arg, _, value = arg.partition(":") @@ -143,15 +146,10 @@ def main(ids_tab): studies = [study.split("/")[-1] for study in va["studies"]] # --- if not studies: + printe.output(f"!!! studies not found: {caseId=}.") continue # --- - tab.append({ - "caseId": caseId, - "case_url": case_url, - "title": title, - "studies": studies, - "author": author - }) + tab.append({"caseId": caseId, "case_url": case_url, "title": title, "studies": studies, "author": author}) # --- del ids_tab # --- @@ -161,10 +159,7 @@ def main(ids_tab): def main_by_ids(ids): printe.output(f"<> start.py main_by_ids: {len(ids)=}:") # --- - ids_tab = { - caseId: all_ids[caseId] - for caseId in ids if caseId in all_ids - } + ids_tab = {caseId: all_ids[caseId] for caseId in ids if caseId in all_ids} # --- not_in = [c for c in ids if c not in all_ids] # --- @@ -174,4 +169,11 @@ def main_by_ids(ids): if __name__ == "__main__": - main(ids_by_caseId) + ids = [arg.strip() for arg in sys.argv if arg.strip().isdigit()] + # --- + ids = {x: all_ids[x] for x in ids if x in all_ids} + # --- + if ids: + main(ids) + else: + main(ids_by_caseId) From 718cb99fdb7c00e621e65f5754c8e7c33a7e582b Mon Sep 17 00:00:00 2001 From: ibrahem Date: Sat, 15 Jun 2024 05:18:40 +0300 Subject: [PATCH 2/2] . --- fix_mass/fix_sets/bots/find_from_url.py | 66 +++ fix_mass/fix_sets/bots/get_img_info.py | 14 +- fix_mass/fix_sets/bots/set_text.py | 24 +- fix_mass/fix_sets/bots/set_text2.py | 116 +++++ fix_mass/fix_sets/fix.py | 15 +- fix_mass/fix_sets/jsons/files.py | 4 +- fix_mass/fix_sets/jsons/find_from_url.jsonl | 31 ++ fix_mass/fix_sets/lists/sf_infos.py | 133 ++++++ fix_mass/fix_sets/lists/study_case_cats.py | 3 +- fix_mass/fix_sets/new.py | 113 +++++ fix_mass/fix_sets/read_sf_infos.py | 119 +++++ fix_mass/fix_sets/s.sh | 4 + mass/radio/bots/add_cat.py | 16 +- mass/radio/st3/One_Case_New.py | 59 ++- mass/radio/st3/o.py | 4 + mass/radio/st3/start3.py | 65 ++- mass/radio/st3sort/One_Case_New.py | 474 -------------------- mass/radio/st3sort/__init__.py | 0 mass/radio/st3sort/co.py | 21 - mass/radio/st3sort/count.py | 137 ------ mass/radio/st3sort/files.py | 74 --- mass/radio/st3sort/miss.py | 32 -- mass/radio/st3sort/na.py | 29 -- mass/radio/st3sort/o.py | 22 - mass/radio/st3sort/start3.py | 179 -------- mass/radio/st3sort/wanted.py | 53 --- 26 files changed, 720 insertions(+), 1087 deletions(-) create mode 100644 fix_mass/fix_sets/bots/find_from_url.py create mode 100644 fix_mass/fix_sets/bots/set_text2.py create mode 100644 fix_mass/fix_sets/jsons/find_from_url.jsonl create mode 100644 fix_mass/fix_sets/lists/sf_infos.py create mode 100644 fix_mass/fix_sets/new.py create mode 100644 fix_mass/fix_sets/read_sf_infos.py create mode 100644 fix_mass/fix_sets/s.sh delete mode 100644 mass/radio/st3sort/One_Case_New.py delete mode 100644 mass/radio/st3sort/__init__.py delete mode 100644 mass/radio/st3sort/co.py delete mode 100644 mass/radio/st3sort/count.py delete mode 100644 mass/radio/st3sort/files.py delete mode 100644 mass/radio/st3sort/miss.py delete mode 100644 mass/radio/st3sort/na.py delete mode 100644 mass/radio/st3sort/o.py delete mode 100644 mass/radio/st3sort/start3.py delete mode 100644 mass/radio/st3sort/wanted.py diff --git a/fix_mass/fix_sets/bots/find_from_url.py b/fix_mass/fix_sets/bots/find_from_url.py new file mode 100644 index 00000000..39c2d859 --- /dev/null +++ b/fix_mass/fix_sets/bots/find_from_url.py @@ -0,0 +1,66 @@ +""" + +from fix_mass.fix_sets.bots.find_from_url import find_file_name_from_url + +""" +import jsonlines +# import os +from pathlib import Path +from newapi.ncc_page import NEW_API +from newapi import printe + + +api_new = NEW_API("www", family="nccommons") +api_new.Login_to_wiki() + +jsons_dir = Path(__file__).parent.parent / "jsons" + +url_to_file_file = jsons_dir / "find_from_url.jsonl" + +if not url_to_file_file.exists(): + url_to_file_file.write_text('{"url": "", "file_name": ""}') + +data = jsonlines.open(url_to_file_file) +data = {d["url"]: d["file_name"] for d in data} + + +def append_data(url, file_name): + data[url] = file_name + # --- + with jsonlines.open(url_to_file_file, mode="a") as writer: + writer.write({"url": url, "file_name": file_name}) + + +def get_from_api(url): + # --- + params = {"action": "upload", "format": "json", "filename": "Wiki.jpg", "url": url, "stash": 1, "formatversion": "2"} + # --- + # { "upload": { "result": "Warning", "warnings": { "duplicate": [ "Angiodysplasia_-_cecal_active_bleed_(Radiopaedia_168775-136954_Coronal_91).jpeg" ] }, "filekey": "1b00hc5unqxw.olk8pi.13.", "sessionkey": "1b00hc5unqxw.olk8pi.13." } } + # --- + data = api_new.post_params(params) + # --- + duplicate = data.get("upload", {}).get("warnings", {}).get("duplicate", []) + # --- + if not duplicate: + return "" + # --- + du = "File:" + duplicate[0] + du = du.replace("_", " ") + # --- + printe.output(f"find_file_name_from_url: {du}") + # --- + return du + + +def find_file_name_from_url(url): + na = "" + if url in data: + printe.output(f"find_file_name_from_url: {data[url]}") + return data[url] + # --- + na = get_from_api(url) + # --- + if na: + append_data(url, na) + # --- + return na diff --git a/fix_mass/fix_sets/bots/get_img_info.py b/fix_mass/fix_sets/bots/get_img_info.py index cd829a2c..0df27b52 100644 --- a/fix_mass/fix_sets/bots/get_img_info.py +++ b/fix_mass/fix_sets/bots/get_img_info.py @@ -5,10 +5,10 @@ from fix_mass.fix_sets.bots.get_img_info import one_img_info """ -import sys +# import sys import re import json -import os +# import os from pathlib import Path from newapi import printe @@ -23,13 +23,17 @@ def dump_st(data, file): - + with open(file, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2) printe.output(f"<> write {len(data)} to file: {file}") -def gt_img_info(titles, id_to_url={}): +def gt_img_info(titles, id_to_url=None): + # --- + if not id_to_url: + id_to_url = {} + # --- # --- titles = [titles] if not isinstance(titles, list) else titles # --- @@ -137,7 +141,7 @@ def one_img_info(title, study_id, json_data): id_to_url = {} # --- for x in json_data: - for n, image in enumerate(x["images"], start=1): + for _, image in enumerate(x["images"], start=1): id_to_url[str(image["id"])] = image["public_filename"] # --- info = gt_img_info(title, id_to_url) diff --git a/fix_mass/fix_sets/bots/set_text.py b/fix_mass/fix_sets/bots/set_text.py index 5fe14e20..85b08fe2 100644 --- a/fix_mass/fix_sets/bots/set_text.py +++ b/fix_mass/fix_sets/bots/set_text.py @@ -6,6 +6,9 @@ """ from newapi import printe from fix_mass.fix_sets.bots.has_url import has_url_append +from fix_mass.fix_sets.bots.find_from_url import find_file_name_from_url +from fix_mass.fix_sets.lists.sf_infos import from_sf_infos # from_sf_infos(url, study_id) + def make_text(modality, files, set_title, leen): # --- @@ -29,7 +32,7 @@ def make_text(modality, files, set_title, leen): def make_text_one_study(json_data, data, study_title, study_id): # --- - url_to_file = {v["img_url"]: x for x, v in data.items()} + url_to_file = {v["img_url"]: x for x, v in data.items()} img_id_to_file = {str(v["img_id"]): x for x, v in data.items()} # --- to_move = {} @@ -43,14 +46,12 @@ def make_text_one_study(json_data, data, study_title, study_id): for x in json_data: # --- modality = x["modality"] - images = x["images"] + images = x["images"] # --- ty = modality # --- # print(f"modality: {modality}, images: {len(images)}") # --- - files = {} - # --- # sort images by position key # images = sorted(images, key=lambda x: x["position"]) # --- @@ -71,7 +72,15 @@ def make_text_one_study(json_data, data, study_title, study_id): # --- if not file_name: file_name = img_id_to_file.get(str(img_id)) - # print(f"img_id_to_file file_name: {file_name}") + # --- + if not file_name: + file_name = from_sf_infos(public_filename, study_id) + # --- + if not file_name: + file_name = find_file_name_from_url(public_filename) + # --- + if file_name and not file_name.startswith("File:"): + file_name = "File:" + file_name # --- if not file_name: noo += 1 @@ -79,12 +88,7 @@ def make_text_one_study(json_data, data, study_title, study_id): # --- numb = len(to_move[ty]) + 1 # --- - # files[numb] = file_name to_move[ty][numb] = file_name - # --- - # --- - # # --- - # to_move[ty].update(files) # --- print(f"noo: {noo}") # --- diff --git a/fix_mass/fix_sets/bots/set_text2.py b/fix_mass/fix_sets/bots/set_text2.py new file mode 100644 index 00000000..86ad4518 --- /dev/null +++ b/fix_mass/fix_sets/bots/set_text2.py @@ -0,0 +1,116 @@ +""" + +from fix_mass.fix_sets.bots.set_text import make_text_one_study +from fix_mass.fix_sets.bots.done import studies_done_append + +""" +from newapi import printe +from fix_mass.fix_sets.bots.has_url import has_url_append +from fix_mass.fix_sets.bots.find_from_url import find_file_name_from_url +from fix_mass.fix_sets.lists.sf_infos import from_sf_infos # from_sf_infos(url, study_id) + + +def make_text_one_study(json_data, data, study_title, study_id): + # --- + url_to_file = {v["img_url"]: x for x, v in data.items()} + # --- + to_move = {} + # --- + modalities = set([x["modality"] for x in json_data]) + # --- + printe.output(f"modalities: {modalities}") + # --- + noo = 0 + # --- + urlls = {} + # --- + texts = {} + # --- + for x in json_data: + # --- + modality = x["modality"] + images = x["images"] + # --- + ty = modality + # --- + # print(f"modality: {modality}, images: {len(images)}") + # --- + # sort images by position key + images = sorted(images, key=lambda x: x["position"]) + # --- + for _n, image in enumerate(images, start=1): + # --- + plane_projection = image["plane_projection"] + aux_modality = image["aux_modality"] + # --- + # if len(modalities) == 1 and plane_projection: + ty = plane_projection + # --- + if aux_modality: + ty = f"{plane_projection} {aux_modality}" + # --- + if ty not in texts: + texts[ty] = "" + # --- + if ty not in to_move: + to_move[ty] = {} + # --- + public_filename = image["public_filename"] + # --- + texts[ty] += f"|{public_filename}|\n" + # --- + file_name = "" + # --- + # file_name = url_to_file.get(public_filename) + # # --- + # if not file_name: + # file_name = from_sf_infos(public_filename, study_id) + # --- + if not file_name: + file_name = find_file_name_from_url(public_filename) + # --- + if file_name and not file_name.startswith("File:"): + file_name = "File:" + file_name + # --- + if file_name: + urlls[public_filename] = file_name + else: + noo += 1 + file_name = public_filename + # --- + numb = len(to_move[ty]) + 1 + # --- + to_move[ty][numb] = file_name + # --- + print(f"noo: {noo}") + # --- + text = "" + # --- + study_title2 = study_title + # --- + for ty, txt in texts.copy().items(): + for url, file_name in urlls.items(): + txt = txt.replace(url, file_name) + # --- + texts[ty] = txt + # --- + # sum all files in to_move + all_files = sum([len(x) for x in to_move.values()]) + # --- + if all_files == len(to_move): + printe.output("len to_move == all_files") + has_url_append(study_id) + return text, to_move + # --- + for ty, files in to_move.items(): + # --- + print(f"ty: {ty}, files: {len(files)}") + # --- + text += f"== {ty} ==\n" + text += "{{Imagestack\n|width=850\n" + text += f"|title={study_title2}\n|align=centre\n|loop=no\n" + text += texts[ty].strip() + text += "\n}}\n" + # --- + # --- + return text, to_move diff --git a/fix_mass/fix_sets/fix.py b/fix_mass/fix_sets/fix.py index a5d90df9..ae4c580e 100644 --- a/fix_mass/fix_sets/fix.py +++ b/fix_mass/fix_sets/fix.py @@ -1,17 +1,19 @@ """ +python3 core8/pwb.py fix_mass/fix_sets/fix 117540 ask nohasurl nodone printtext python3 core8/pwb.py fix_mass/fix_sets/fix studies_titles ask -python3 core8/pwb.py fix_mass/fix_sets/fix 134732 +python3 core8/pwb.py fix_mass/fix_sets/fix 117539 python3 core8/pwb.py fix_mass/fix_sets/fix 127660 nomulti tfj run fixmass --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py fix_mass/fix_sets/fix studies_titles" + """ -import json +# import json import sys from pathlib import Path -# import xxlimited from newapi import printe +from newapi.ncc_page import MainPage as ncc_MainPage from fix_mass.fix_sets.bots.get_img_info import one_img_info from fix_mass.fix_sets.bots.stacks import get_stacks # get_stacks(study_id) @@ -22,7 +24,6 @@ from fix_mass.fix_sets.bots.done import studies_done_append, find_done, already_done from fix_mass.fix_sets.bots.has_url import has_url_append, find_has_url, already_has_url -from newapi.ncc_page import MainPage as ncc_MainPage main_dir = Path(__file__).parent @@ -66,7 +67,7 @@ def work_text(study_id, study_title): # --- data = one_img_info(files, study_id, json_data) # --- - # 'File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 1).jpg': {'img_url': 'https://prod-images-static.radiopaedia.org/images/61855971/f11ad965ab35e44ae8ac9ed236afb1cf4547507d8f464cbc3c6316a4cb76fb32.jpg', 'case_url': 'https://radiopaedia.org/cases/appendicitis-ct-angiogram', 'study_url': 'https://radiopaedia.org/cases/154713/studies/134732', 'caseId': '154713', 'studyId': '134732'} + # 'File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 1).jpg': {'img_url': 'https://...', 'case_url': 'https://radiopaedia.org/cases/appendicitis-ct-angiogram', 'study_url': 'https://radiopaedia.org/cases/154713/studies/134732', 'caseId': '154713', 'studyId': '134732'} # printe.output(data) # --- # printe.output(json.dumps(url_to_file, indent=2)) @@ -125,12 +126,12 @@ def main(ids): # --- if "nohasurl" not in sys.argv: ids2 = [x for x in ids if x not in already_has_url] - printe.output(f"found {len(ids) - len(ids2)} has_url") + printe.output(f"found {len(ids) - len(ids2)} has_url, add 'nohasurl' to sys.argv") ids = ids2 # --- if "nodone" not in sys.argv: ids2 = [x for x in ids if x not in already_done] - printe.output(f"found {len(ids) - len(ids2)} done") + printe.output(f"found {len(ids) - len(ids2)} done, add 'nodone' to sys.argv") ids = ids2 # --- printe.output(f"<> len of ids: {len(ids)}") diff --git a/fix_mass/fix_sets/jsons/files.py b/fix_mass/fix_sets/jsons/files.py index 1889f784..2e188f61 100644 --- a/fix_mass/fix_sets/jsons/files.py +++ b/fix_mass/fix_sets/jsons/files.py @@ -17,8 +17,8 @@ # --- with open( jsons_dir / "studies_titles2.json", "r", encoding="utf-8") as f: studies_titles2 = json.load(f) - print(f"{len(studies_titles2)=}") + print(f"studies_titles2: {len(studies_titles2)=}") # --- with open( jsons_dir / "study_to_case_cats.json", "r", encoding="utf-8") as f: study_to_case_cats = json.load(f) - print(f"{len(study_to_case_cats)=}") + print(f"study_to_case_cats: {len(study_to_case_cats)=}") diff --git a/fix_mass/fix_sets/jsons/find_from_url.jsonl b/fix_mass/fix_sets/jsons/find_from_url.jsonl new file mode 100644 index 00000000..caa2641d --- /dev/null +++ b/fix_mass/fix_sets/jsons/find_from_url.jsonl @@ -0,0 +1,31 @@ +{"url": "https://prod-images-static.radiopaedia.org/images/57185071/380dfacba69f4e273da1cad6e3531b34f6328d7dc987f37676cab3b69e991764.jpeg", "file_name": "Angiodysplasia - cecal active bleed (Radiopaedia 168775-136954 Coronal 91).jpeg"} +{"url": "https://prod-images-static.radiopaedia.org/images/57185306/380dfacba69f4e273da1cad6e3531b34f6328d7dc987f37676cab3b69e991764.jpeg", "file_name": "Angiodysplasia - cecal active bleed (Radiopaedia 168775-136954 Coronal 91).jpeg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855971/f11ad965ab35e44ae8ac9ed236afb1cf4547507d8f464cbc3c6316a4cb76fb32.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 0155470153-01534732 This comic explains the pathophysiology of appendicitis. 015).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855972/556d77f84f8f02fbe0ae31871cddd22965ee66bc2abb4f8824a929088aa1a40d.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-13473016 This comic explains the pathophysiology of appendicitis. 016).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855970/ec239eac0b79ea139082532e9525a59b163617be6fb29aa1c8bacd3822059a1b.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 15471014-1014470142 This comic explains the pathophysiology of appendicitis. 014).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855973/2bdea73556100c7fb71c76c05394c69df2b00153ab6b00647c53c51ee7c88f3d.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 15017713-13017732 This comic explains the pathophysiology of appendicitis. 017).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855969/496b5682669f39d12df664dc9be6662322dcc657c8f7342be31caa31e996ac8f.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 10134713-134732 This comic explains the pathophysiology of appendicitis. 013).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855974/4cdfff6a6312ce7a83f69848d4fbfe4c6ae488b74540e595b2fc055fbb33c887.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 018).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855968/c470f2fc3ca20144fa07bcddea18a6f1c7c168f29138c59c70d33dcb3beadae4.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 15401213-13401232 This comic explains the pathophysiology of appendicitis. 012).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855975/eb70507d776c3affc082ed759a4c52223a94ff161230f39f18db6cd1109ea4b2.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 019).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855967/c6302597bf26dc800178e8ce62bdaebe9aa72d5097fe90073c67908fa659ae74.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 011).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855976/3dc33f83fa27028c395947891d08ce43a7b493fbf1ae03d804ca457578f9c902.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 020).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855966/78ec38a1e32e6cb6d637a568ce01a6e29ad966ce91a14e0dd42eacc3ad5d5473.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 010).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855977/a20a13ecf99c0c175bd3576af2a4f757d031b5bbbce9cc79fdc9a96bdc7d3c7c.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 021).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855965/d8bc5ca2bcb626b316a0275587ef95de40b62d92a40cbad5bec572fffc821ea6.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154709-094732 This comic explains the pathophysiology of appendicitis. 09).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855978/e4115aa9ee1d7e138ba560c887ffb1c8bc22b483094a8b3e20fb9e734a32bc18.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 022).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855964/699ce450c99a0fde63f9360c6a2d60cd4a802f5510ede66c8c8ffba593e27610.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 084713-134732 This comic explains the pathophysiology of appendicitis. 08).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855979/5c62998e367be16f43c42f6869e7b75cae48efb426945a453dc65a82cad28ff9.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 023).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855963/129233a36a8917809a9d483d6998620c7f3cc5fc85b96aae322261fe813591f7.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 07).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855980/3120ccf739364d832758331349440a53cbffd2fcdbcfef8736cbada186b5e3e5.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 024).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855962/8aa7150a92adbf34d89334093836e5b188d899c6e23e0f4ece1aac6176c15fb3.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 06).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855981/3a4ecfeebf99dae61e06bb8b5abd406a5ec17d321a118813e7480cf24bdb6ffc.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 025).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855961/1924cfe20a5c2041c72585a9faa5a3eaf8b1dfea8cbe6f054eebdcb8439b9b83.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 05).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855982/1f7db2c678c53ed6f859695aa47c28149d024f50dbaf247c7ae047f9f319df01.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 026).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855960/11d72f7e000a703ea3d1035716eb63d4a8939b03da77fa47feb6cf9c9f503857.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 04).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855983/4dc06872dbd5ad5d2f388a2d97c72f35711d6ba9bc1f430d3dcb875b6e731d3f.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 027).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855959/0d912c1f7d1782731a05a9f00c3e2c732cc8bb47efe1ff4b256f166554ede138.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 03).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855984/e1eef848c7cea6c7cc8739256b34486f218953782d885f8069c054c4112139a4.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 028).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855958/d6be3beb3ab062cd2b1fdf14bc328c0c6f55503d4d8061fb9468e576aa45bc51.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 02).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855985/d0b79cf557c786a3104b9cdcefcb90a37c5ad6344cc892a6909d9c513d21acc3.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 029).jpg"} +{"url": "https://prod-images-static.radiopaedia.org/images/61855957/c27115d7fe148b889f3d133e767cfc8b0cd3f0d59e4901261c248a89fd5ca934.jpg", "file_name": "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 01).jpg"} diff --git a/fix_mass/fix_sets/lists/sf_infos.py b/fix_mass/fix_sets/lists/sf_infos.py new file mode 100644 index 00000000..e0af3edf --- /dev/null +++ b/fix_mass/fix_sets/lists/sf_infos.py @@ -0,0 +1,133 @@ +""" +python3 core8/pwb.py fix_mass/fix_sets/lists/sf_infos + +tfj run --mem 1Gi rdfiles --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py fix_mass/fix_sets/lists/sf_infos read_files" + +from fix_mass.fix_sets.lists.sf_infos import from_sf_infos # from_sf_infos(url, study_id) + +""" +import sys +import os +import psutil +import json +import tqdm +from newapi import printe +from pathlib import Path + +t_dir = Path(__file__).parent.parent / "jsons" +# --- +starts_with = "https://prod-images-static.radiopaedia.org/images" +# --- +sf_infos_file = t_dir / "sf_infos.json" +# --- +if not sf_infos_file.exists(): + sf_infos_file.write_text("{}") +# --- +sfs_infos = {} +# --- +with open(sf_infos_file, "r", encoding="utf-8") as f: + sfs_infos = json.load(f) + + +def print_memory(): + _red_ = "\033[91m%s\033[00m" + + usage = psutil.Process(os.getpid()).memory_info().rss + usage = usage / 1024 // 1024 + + print(_red_ % f"memory usage: psutil {usage} MB") + + +def from_sf_infos(url, study_id): + # --- + if url.startswith(starts_with): + url = url[len(starts_with) :] + # --- + lista = sfs_infos.get(url) + # --- + if not lista: + printe.output(f"from_sf_infos: not found: {url}") + return "" + # --- + if len(lista) == 1: + return lista[0] + # --- + printe.output(f"from_sf_infos: {len(lista)}") + # --- + for file in lista: + # File:Persistent trigeminal artery (Radiopaedia 56019-62643 Axial 14).jpg + staa = f"-{study_id} " + if staa in file: + return file + # --- + printe.output(f"from_sf_infos: not found: {url}") + # --- + return "" + + +def dumpit(): + printe.output(f"dumpit: {len(sfs_infos)}") + with open(sf_infos_file, "w", encoding="utf-8") as f: + json.dump(sfs_infos, f, ensure_ascii=False) + printe.output(f"<> write {len(sfs_infos)} to file: {sf_infos_file}") + + +def read_files(): + # --- + jsons_dir = t_dir / "studies_files_infos" + list_files = list(jsons_dir.glob("*.json")) + # --- + printe.output(f"list_files: {len(list_files)}") + # --- + for i in range(0, len(list_files), 1000): + group = list_files[i : i + 1000] + # --- + for f in tqdm.tqdm(group, total=len(list_files)): + with open(f, "r", encoding="utf-8") as f: + data = json.load(f) + # { "File:Metatarsus adductus (Radiopaedia 62643-70938 Frontal 1).png": { "img_url": "https", "id": 42050951 },} + for file, v in data.items(): + # print(v) + img_url = v["img_url"] + # --- + if img_url.startswith(starts_with): + img_url = img_url[len(starts_with) :] + # --- + if img_url not in sfs_infos: + sfs_infos[img_url] = [] + # --- + if file not in sfs_infos[img_url]: + sfs_infos[img_url].append(file) + # --- + del data + # --- + del group + # --- + dumpit() + # --- + print_memory() + # --- + dumpit() + + +def start(): + # --- + printe.output(f"sfs_infos: {len(sfs_infos)}") + # --- + if "read_files" in sys.argv: + read_files() + # --- + # find urls with more then 1 value + uls = {k: v for k, v in sfs_infos.items() if len(v) > 1} + # --- + # sort it + uls = {k: v for k, v in sorted(uls.items(), key=lambda item: len(item[1]))} + # --- + printe.output(f"uls: {len(uls)}") + # --- + for k, v in uls.items(): + printe.output(f"{k}: {len(v)}") + + +if __name__ == "__main__": + start() diff --git a/fix_mass/fix_sets/lists/study_case_cats.py b/fix_mass/fix_sets/lists/study_case_cats.py index a299517d..e0ae2fe6 100644 --- a/fix_mass/fix_sets/lists/study_case_cats.py +++ b/fix_mass/fix_sets/lists/study_case_cats.py @@ -13,6 +13,7 @@ from pathlib import Path from mass.radio.jsons_files import jsons + # jsons.all_ids # jsons.cases_cats main_dir = Path(__file__).parent.parent @@ -55,4 +56,4 @@ file = main_dir / "jsons/study_to_case_cats.json" with open(file, "w", encoding="utf-8") as f: - json.dump(study_to_case_cats, f, ensure_ascii=False, indent=2) \ No newline at end of file + json.dump(study_to_case_cats, f, ensure_ascii=False, indent=2) diff --git a/fix_mass/fix_sets/new.py b/fix_mass/fix_sets/new.py new file mode 100644 index 00000000..9e58d9a6 --- /dev/null +++ b/fix_mass/fix_sets/new.py @@ -0,0 +1,113 @@ +""" + +python3 core8/pwb.py fix_mass/fix_sets/new 134732 ask +python3 core8/pwb.py fix_mass/fix_sets/new 117539 ask + +""" +# import json +import sys +from pathlib import Path + +from newapi import printe +from newapi.ncc_page import MainPage as ncc_MainPage + +from fix_mass.fix_sets.bots.get_img_info import one_img_info +from fix_mass.fix_sets.bots.stacks import get_stacks # get_stacks(study_id) +from fix_mass.fix_sets.bots.set_text2 import make_text_one_study +from fix_mass.fix_sets.bots.study_files import get_study_files +from fix_mass.fix_sets.bots.mv_files import to_move_work +from fix_mass.fix_sets.jsons.files import studies_titles, studies_titles2 +from fix_mass.fix_sets.bots.done import studies_done_append, find_done, already_done +from fix_mass.fix_sets.bots.has_url import has_url_append, find_has_url, already_has_url + + +main_dir = Path(__file__).parent + + +def update_set_text(title, n_text, study_id): + # --- + printe.output(f"<> update_set_text: {title}") + # --- + page = ncc_MainPage(title, "www", family="nccommons") + # --- + p_text = page.get_text() + # --- + # split p_text get after first [[Category: + # --- + cat_text = "" + if p_text.find("[[Category:") != -1: + cat_text = "[[Category:" + p_text.split("[[Category:", maxsplit=1)[1] + # --- + # cats = page.get_categories() + # # --- + # printe.output(cat_text) + # # --- + # cats_text = "\n".join([f"[[Category:{x}]]" for x in cats]) + # --- + n_text += f"\n\n{cat_text}" + # --- + if p_text == n_text: + printe.output("no changes..") + return + # --- + tyy = page.save(newtext=n_text, summary="update") + # --- + if tyy: + studies_done_append(study_id) + + +def work_text(study_id, study_title): + # --- + files = get_study_files(study_id) + # --- + json_data = get_stacks(study_id) + # --- + data = one_img_info(files, study_id, json_data) + # --- + text, to_move = make_text_one_study(json_data, data, study_title, study_id) + # --- + return text, to_move + + +def work_one_study(study_id): + # --- + study_title = studies_titles.get(study_id) # or studies_titles2.get(study_id) + # --- + printe.output(f"study_id: {study_id}, study_title: {study_title}") + # --- + if not study_title: + printe.output(f"<> study_title for: {study_id=} not found") + return + # --- + text, to_move = work_text(study_id, study_title) + # --- + text = text.strip() + # --- + if text.find("|http") != -1: + printe.output(f"<> text has http links... study_id: {study_id}") + has_url_append(study_id) + if "printtext" in sys.argv: + printe.output(text) + return + # --- + if not text: + printe.output(f"<> text is empty... study_id: {study_id}") + return + # --- + text = to_move_work(text, to_move) + # --- + update_set_text(study_title, text, study_id) + + +def main(ids): + # --- + printe.output(f"<> len of ids: {len(ids)}") + # --- + for study_id in ids: + work_one_study(study_id) + + +if __name__ == "__main__": + ids = [arg.strip() for arg in sys.argv if arg.strip().isdigit()] + # --- + main(ids) diff --git a/fix_mass/fix_sets/read_sf_infos.py b/fix_mass/fix_sets/read_sf_infos.py new file mode 100644 index 00000000..cee79572 --- /dev/null +++ b/fix_mass/fix_sets/read_sf_infos.py @@ -0,0 +1,119 @@ +""" +python3 core8/pwb.py fix_mass/fix_sets/read_sf_infos read_all + +tfj run --mem 1Gi rdinf --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py fix_mass/fix_sets/read_sf_infos" + + +""" +import sys +import os +import psutil +import json +import tqdm +from newapi import printe +from pathlib import Path + +Dir = Path(__file__).parent +# --- +numbs = 1000 if "2" not in sys.argv else 2 +# --- +starts_with = "https://prod-images-static.radiopaedia.org/images" + +sf_infos_dir = Dir / "sf_infos_json" +if not sf_infos_dir.exists(): + sf_infos_dir.mkdir() + + +def print_memory(): + _red_ = "\033[91m%s\033[00m" + + usage = psutil.Process(os.getpid()).memory_info().rss + usage = usage / 1024 // 1024 + + print(_red_ % f"memory usage: psutil {usage} MB") + + +def start(): + # --- + jsons_dir = Path(__file__).parent / "jsons/studies_files_infos" + # --- + list_files = list(jsons_dir.glob("*.json")) + # --- + printe.output(f"list_files: {len(list_files)}") + # --- + for i in range(0, len(list_files), numbs): + group = list_files[i : i + numbs] + # --- + infos_file = sf_infos_dir / f"{i}.json" + # --- + if infos_file.exists(): + printe.output(f"exists: {infos_file}") + continue + # --- + infos = {} + # --- + for f in tqdm.tqdm(group, total=len(list_files)): + with open(f, "r", encoding="utf-8") as f: + data = json.load(f) + # { "File:Metatarsus adductus (Radiopaedia 62643-70938 Frontal 1).png": { "img_url": "https", "id": 42050951 },} + for file, v in data.items(): + # print(v) + img_url = v["img_url"] + # --- + if img_url.startswith(starts_with): + img_url = img_url[len(starts_with) :] + # --- + if img_url not in infos: + infos[img_url] = [] + # --- + if file not in infos[img_url]: + infos[img_url].append(file) + # --- + del data + # --- + printe.output(f"<> write {len(infos)} to file: {infos_file}") + # --- + with open(infos_file, "w", encoding="utf-8") as f: + json.dump(infos, f, ensure_ascii=False) + # --- + del group, infos + # --- + print_memory() + + +def read_all(): + all_data = {} + # --- + all_data_file = Dir / "jsons/sf_infos.json" + # --- + list_files = list(sf_infos_dir.glob("*.json")) + # --- + printe.output(f"list_files: {len(list_files)}") + # --- + for f in tqdm.tqdm(list_files, total=len(list_files)): + # --- + with open(f, "r", encoding="utf-8") as f: + data = json.load(f) + # --- + for img_url, files in data.items(): + # --- + if img_url not in all_data: + all_data[img_url] = [] + # --- + files = [x for x in files if x not in all_data[img_url]] + # --- + all_data[img_url].extend(files) + # --- + del data + # --- + with open(all_data_file, "w", encoding="utf-8") as f: + json.dump(all_data, f, ensure_ascii=False) + # --- + printe.output(f"<> write {len(all_data)} to file: {all_data_file}") + + +if __name__ == "__main__": + if "read_all" in sys.argv: + read_all() + else: + start() diff --git a/fix_mass/fix_sets/s.sh b/fix_mass/fix_sets/s.sh new file mode 100644 index 00000000..b8e73ae7 --- /dev/null +++ b/fix_mass/fix_sets/s.sh @@ -0,0 +1,4 @@ + +python3 core8/pwb.py mass/radio/cases_in_ids +python3 core8/pwb.py mass/radio/to_work +python3 core8/pwb.py mass/radio/st3/start3 get:500 diff --git a/mass/radio/bots/add_cat.py b/mass/radio/bots/add_cat.py index f695bcf1..88b8bb60 100644 --- a/mass/radio/bots/add_cat.py +++ b/mass/radio/bots/add_cat.py @@ -12,11 +12,25 @@ study_done = [] +skip_titles = [ + "File:Angiodysplasia - cecal active bleed (Radiopaedia 168775-136954 Coronal 91).jpeg", +] -def add(da=[], title="", cat=""): +def add(da=None, title="", cat=""): if da: title, cat = da[0], da[1] # --- + if title.find("_") != -1: + title = title.replace("_", " ") + # --- + if title in skip_titles: + printe.output(f"Skipping {title}...") + return + # --- + if not title or not cat: + printe.output("no title or cat") + return + # --- cat_line = f"\n[[{cat}]]" summary = f"Bot: added [[:{cat}]]" # --- diff --git a/mass/radio/st3/One_Case_New.py b/mass/radio/st3/One_Case_New.py index 390f420b..6273a318 100644 --- a/mass/radio/st3/One_Case_New.py +++ b/mass/radio/st3/One_Case_New.py @@ -14,6 +14,8 @@ from mass.radio.get_studies import get_images_stacks, get_images from mass.radio.bots.bmp import work_bmp from mass.radio.bots.update import update_text_new +from mass.radio.bots.add_cat import add_cat_to_images # add_cat_to_images(sets, cat_title) +from mass.radio.bots.studies_utf import dump_studies_urls_to_files from mass.radio.jsons_files import jsons # , dumps_jsons, ids_to_urls, urls_to_ids # --- @@ -77,6 +79,7 @@ def __init__(self, case_url, caseId, title, studies_ids, author): self.title = title self.studies_ids = studies_ids self.images_count = 0 + self.img_to_url = {} self.files = [] self.studies = {} self.set_title = f"Radiopaedia case {self.caseId} {self.title}" @@ -163,7 +166,7 @@ def get_studies(self): json.dump(images, f, ensure_ascii=False, indent=2) # --- # sort images by "id" - images = sorted(images, key=lambda x: x["id"]) + # images = sorted(images, key=lambda x: x["id"]) # --- self.studies[study] = images printt(f"study:{study} : len(images) = {len(images)}, st_file:{st_file}") @@ -241,6 +244,8 @@ def upload_images(self, study, images): # --- to_up = {} # --- + self.img_to_url[study] = {} + # --- for i, image in enumerate(images, 1): image_url = image.get("public_filename", "") # --- @@ -261,7 +266,10 @@ def upload_images(self, study, images): extension = image["fullscreen_filename"].split(".")[-1].lower() # --- if extension == "bmp": - image_url, extension = work_bmp(image_url) + if "dump_studies_urls_to_files" not in sys.argv: + image_url, extension = work_bmp(image_url) + else: + extension = "jpg" # --- urls_done.append(image_url) # --- @@ -280,6 +288,11 @@ def upload_images(self, study, images): file_name = file_name.replace(":", ".").replace("/", ".") # --- to_up[f"File:{file_name}"] = (image_url, file_name, image_id, plane, modality, study) + # --- + self.img_to_url[study][f"File:{file_name}"] = {"url": image_url, "id": image_id} + # --- + if "dump_studies_urls_to_files" in sys.argv: + return # --- to_c = list(to_up.keys()) # --- @@ -333,6 +346,7 @@ def upload_images(self, study, images): if "updatetext" not in sys.argv: if self.images_count > 1: self.create_set(set_title, sets) + self.create_set_category(set_title, sets, study) def start(self): self.get_studies() @@ -342,6 +356,12 @@ def start(self): # --- self.upload_images(study, images) + if self.img_to_url: + dump_studies_urls_to_files(self.img_to_url) + + if "dump_studies_urls_to_files" in sys.argv: + return + printt(f"Images count: {self.images_count}") if self.images_count == 0: @@ -396,6 +416,41 @@ def create_set(self, set_title, sets): ssa = page.save(newtext=text, summary="update", nocreate=0, minor="") return ssa + def create_set_category(self, set_title, sets, study_id): + # --- + study_url = f"https://radiopaedia.org/cases/{self.caseId}/studies/{study_id}" + # --- + cat_title = f"Category:{set_title}" + # --- + printe.output(f"len of sets: {len(sets)} /// cat_title:{cat_title}") + # --- + text = f"* [{study_url} study: {study_id}]" + text += f"\n[[{self.category}|*]]" + text += f"\n[[Category:Radiopaedia studies|{study_id}]]" + # --- + done = False + # --- + if self.title_exists(cat_title): + done = True + # --- + if not done: + cat = ncc_MainPage(cat_title, "www", family="nccommons") + # --- + if cat.exists(): + printt(f"<> {cat_title} already exists") + done = True + # --- + if not done: + new = cat.Create(text=text, summary="create") + # --- + if new: + done = True + # --- + printt(f"Category {cat_title} created..{new=}") + # --- + if done: + add_cat_to_images(sets, cat_title) + def add_category(self, file_name): # --- if "add_category" not in sys.argv: diff --git a/mass/radio/st3/o.py b/mass/radio/st3/o.py index 659442cb..bfe81ada 100644 --- a/mass/radio/st3/o.py +++ b/mass/radio/st3/o.py @@ -1,5 +1,9 @@ """ python3 core8/pwb.py mass/radio/st3/o 154713 +python3 core8/pwb.py mass/radio/st3/o +python3 core8/pwb.py mass/radio/st3/o +python3 core8/pwb.py mass/radio/st3/o +python3 core8/pwb.py mass/radio/st3/o 154713 dump_studies_urls_to_files python3 core8/pwb.py mass/radio/st3/o add_category 10033 """ """Script for dealing with Radiopaedia case operations diff --git a/mass/radio/st3/start3.py b/mass/radio/st3/start3.py index b484dcd0..2b89ec27 100644 --- a/mass/radio/st3/start3.py +++ b/mass/radio/st3/start3.py @@ -1,7 +1,8 @@ """ +python3 core8/pwb.py mass/radio/st3/start3 nomulti ask 97387 python3 core8/pwb.py mass/radio/st3/start3 get:500 -python3 core8/pwb.py mass/radio/st3/start3 test nomulti +python3 core8/pwb.py mass/radio/st3/start3 dump_studies_urls_to_files nomulti python3 /data/project/mdwiki/pybot/mass/radio/st3/start3.py test tfj run mnx1 --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py mass/radio/st3/start3 get:1 157" @@ -25,26 +26,28 @@ from mass.radio.st3.One_Case_New import OneCase # --- -main_dir = Path(__file__).parent.parent +radio_jsons_dir = Path(__file__).parent.parent / "jsons" # --- -with open(main_dir / "jsons/authors.json", encoding="utf-8") as f: +with open(radio_jsons_dir / "authors.json", encoding="utf-8") as f: authors = json.load(f) # --- -with open(main_dir / "jsons/infos.json", encoding="utf-8") as f: +with open(radio_jsons_dir / "infos.json", encoding="utf-8") as f: infos = json.load(f) # --- -with open(main_dir / "jsons/all_ids.json", encoding="utf-8") as f: +with open(radio_jsons_dir / "all_ids.json", encoding="utf-8") as f: all_ids = json.load(f) # --- # cases_in_ids = [] # --- -with open(main_dir / "jsons/cases_in_ids.json", encoding="utf-8") as f: +with open(radio_jsons_dir / "cases_in_ids.json", encoding="utf-8") as f: cases_in_ids = json.load(f) # --- -ids_by_caseId = { - x: v - for x, v in all_ids.items() if x not in cases_in_ids -} +ids_by_caseId = {x: v for x, v in all_ids.items() if x not in cases_in_ids} +# --- +if "allids" in sys.argv: + ids_by_caseId = all_ids.copy() +# --- +printe.output(f"{len(ids_by_caseId)=}, {len(cases_in_ids)=}") # --- del cases_in_ids @@ -75,7 +78,7 @@ def do_it(va): def multi_work(tab, numb=10): done = 0 for i in range(0, len(tab), numb): - group = tab[i:i + numb] + group = tab[i : i + numb] # --- done += numb printe.output(f"<> done: {done}:") @@ -99,7 +102,7 @@ def ddo(taba): length = (len(ids_tabs) // 6) + 1 for i in range(0, len(ids_tabs), length): num = i // length + 1 - tabs[str(num)] = dict(list(ids_tabs.items())[i:i + length]) + tabs[str(num)] = dict(list(ids_tabs.items())[i : i + length]) # print(f'tab {num} : {len(tabs[str(num)])}') print(f'tfj run mnx{num} --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py mass/radio/st3/start3 get:{num} {len(tabs[str(num)])}"') @@ -142,13 +145,11 @@ def main(ids_tab): # --- studies = [study.split("/")[-1] for study in va["studies"]] # --- - tab.append({ - "caseId": caseId, - "case_url": case_url, - "title": title, - "studies": studies, - "author": author - }) + if not studies: + printe.output(f"!!! studies not found: {caseId=}.") + continue + # --- + tab.append({"caseId": caseId, "case_url": case_url, "title": title, "studies": studies, "author": author}) # --- del ids_tab # --- @@ -158,10 +159,7 @@ def main(ids_tab): def main_by_ids(ids): printe.output(f"<> start.py main_by_ids: {len(ids)=}:") # --- - ids_tab = { - caseId: all_ids[caseId] - for caseId in ids if caseId in all_ids - } + ids_tab = {caseId: all_ids[caseId] for caseId in ids if caseId in all_ids} # --- not_in = [c for c in ids if c not in all_ids] # --- @@ -171,20 +169,11 @@ def main_by_ids(ids): if __name__ == "__main__": + ids = [arg.strip() for arg in sys.argv if arg.strip().isdigit()] # --- - if "test" in sys.argv: - ids_by_caseId = { - "161846": { - "url": "https://radiopaedia.org/cases/cholangiocarcinoma-25", - "caseId": 161846, - "title": "Cholangiocarcinoma", - "studies": ["https://radiopaedia.org/cases/161846/studies/132257"], - "author": "Mohammadtaghi Niknejad", - "system": "Hepatobiliary", - "published": "19 Feb 2023" - } - } - # --- - print("ids_by_caseId: ", len(ids_by_caseId)) + ids = {x: all_ids[x] for x in ids if x in all_ids} # --- - main(ids_by_caseId) + if ids: + main(ids) + else: + main(ids_by_caseId) diff --git a/mass/radio/st3sort/One_Case_New.py b/mass/radio/st3sort/One_Case_New.py deleted file mode 100644 index dc91f26e..00000000 --- a/mass/radio/st3sort/One_Case_New.py +++ /dev/null @@ -1,474 +0,0 @@ -""" -from mass.radio.st3sort.One_Case_New import OneCase -""" -import sys -import os -from pathlib import Path -import json -import traceback - -# --- -from nccommons import api -from newapi import printe -from newapi.ncc_page import NEW_API, MainPage as ncc_MainPage -from mass.radio.get_studies import get_images_stacks, get_images -from mass.radio.bots.bmp import work_bmp -from mass.radio.bots.update import update_text_new -from mass.radio.bots.add_cat import add_cat_to_images # add_cat_to_images(sets, cat_title) -from mass.radio.bots.studies_utf import dump_studies_urls_to_files -from mass.radio.jsons_files import jsons # , dumps_jsons, ids_to_urls, urls_to_ids - -# --- -try: - import pywikibot - - pywikibotoutput = pywikibot.output -except ImportError: - pywikibotoutput = print -# --- - -# dumps_jsons(infos=0, urls=0, cases_in_ids=0, cases_dup=0, authors=0, to_work=0, all_ids=0, urls_to_get_info=0) -# --- -main_dir = Path(__file__).parent.parent -# --- -studies_dir = Path("/data/project/mdwiki/studies") -# --- -if not os.path.exists(studies_dir): - printe.output(f"<> studies_dir {studies_dir} not found") - studies_dir = main_dir / "studies" - printe.output(f"<> studies_dir set to {studies_dir}") -# --- -with open(os.path.join(str(main_dir), "authors_list/authors_infos.json"), encoding="utf-8") as f: - authors_infos = json.load(f) -# --- -api_new = NEW_API("www", family="nccommons") -api_new.Login_to_wiki() -# --- -urls_done = [] -# --- -PD_medical_pages = [] -if "updatetext" in sys.argv: - from mass.radio.lists.PD_medical import PD_medical_pages_def - - PD_medical_pages = PD_medical_pages_def() - - -def get_image_extension(image_url): - # Split the URL to get the filename and extension - _, filename = os.path.split(image_url) - - # Split the filename to get the name and extension - _name, extension = os.path.splitext(filename) - - # Return the extension (without the dot) - ext = extension[1:] - return ext or "jpeg" - - -def printt(s): - if "nopr" in sys.argv: - return - printe.output(s) - - -class OneCase: - def __init__(self, case_url, caseId, title, studies_ids, author): - self.author = author - self.caseId = caseId - self.case_url = case_url - self.title = title - self.studies_ids = studies_ids - self.images_count = 0 - self.img_to_url = {} - self.files = [] - self.studies = {} - self.set_title = f"Radiopaedia case {self.caseId} {self.title}" - self.category = f"Category:Radiopaedia case {self.caseId} {self.title}" - # --- - self.published = "" - self.system = "" - # --- - if self.case_url in jsons.infos: - self.published = jsons.infos[self.case_url]["published"] - # --- - if not self.author: - self.author = jsons.infos[self.case_url]["author"] - # --- - self.system = jsons.infos[self.case_url]["system"] - else: - if self.case_url in jsons.url_to_sys: - self.system = jsons.url_to_sys[self.case_url] - # --- - - def title_exists(self, title): - # --- - pages = api_new.Find_pages_exists_or_not([title], noprint=True) - # --- - if pages.get(title): - printt(f"<> api_new {title} already exists") - return True - # --- - # file_page = ncc_MainPage(title, 'www', family='nccommons') - # # --- - # if file_page.exists(): - # printt(f'<> File:{title} already exists') - # return True - # --- - return False - - def create_category(self): - text = f"* [{self.case_url} Radiopaedia case: {self.title} ({self.caseId})]\n" - text += f"[[Category:Radiopaedia images by case|{self.caseId}]]" - # --- - if self.system: - text += f"\n[[Category:Radiopaedia cases for {self.system}]]" - # --- - if self.title_exists(self.category): - return - # --- - cat = ncc_MainPage(self.category, "www", family="nccommons") - # --- - if cat.exists(): - printt(f"<> {self.category} already exists") - return - # --- - new = cat.Create(text=text, summary="create") - - printt(f"Category {self.category} created..{new=}") - - def get_studies(self): - for study in self.studies_ids: - st_file = studies_dir / f"{study}.json" - # --- - images = {} - # --- - if os.path.exists(st_file): - try: - with open(st_file, encoding="utf-8") as f: - images = json.loads(f.read()) - except Exception as e: - pywikibotoutput("<> Traceback (most recent call last):") - printt(f"{study} : error") - pywikibotoutput(e) - pywikibotoutput(traceback.format_exc()) - pywikibotoutput("CRITICAL:") - # --- - images = [image for image in images if image] - # --- - if not images: - printt(f"{study} : not found") - images = get_images_stacks(study) - # --- - if not images: - images = get_images(f"https://radiopaedia.org/cases/{self.caseId}/studies/{study}") - # --- - with open(st_file, "w", encoding="utf-8") as f: - json.dump(images, f, ensure_ascii=False, indent=2) - # --- - # sort images by "id" - # images = sorted(images, key=lambda x: x["id"]) - # --- - self.studies[study] = images - printt(f"study:{study} : len(images) = {len(images)}, st_file:{st_file}") - - def make_image_text(self, image_url, image_id, plane, modality, study_id): - auth_line = f"{self.author}" - # --- - auth_url = authors_infos.get(self.author, {}).get("url", "") - auth_location = authors_infos.get(self.author, {}).get("location", "") - if auth_url: - auth_line = f"[{auth_url} {self.author}]" - # --- - usa_license = "" - # --- - if auth_location.lower().find("united states") != -1: - usa_license = "{{PD-medical}}" - # --- - study_url = f"https://radiopaedia.org/cases/{self.caseId}/studies/{study_id}" - # --- - image_text = "== {{int:summary}} ==\n" - - image_text += ( - "{{Information\n" - f"|Description = \n" - f"* Radiopaedia case ID: [{self.case_url} {self.caseId}]\n" - # f'* Image ID: {image_id}\n' - f"* Study ID: [{study_url} {study_id}]\n" - f"* Image ID: [{image_url} {image_id}]\n" - f"* Plane projection: {plane}\n" - f"* Modality: {modality}\n" - f"* System: {self.system}\n" - f"* Author location: {auth_location}\n" - f"|Date = {self.published}\n" - f"|Source = [{self.case_url} {self.title}]\n" - f"|Author = {auth_line}\n" - "|Permission = http://creativecommons.org/licenses/by-nc-sa/3.0/\n" - "}}\n" - "== {{int:license}} ==\n" - "{{CC-BY-NC-SA-3.0}}\n" - f"{usa_license}\n" - f"[[{self.category}]]\n" - "[[Category:Uploads by Mr. Ibrahem]]" - ) - return image_text - - def upload_image(self, image_url, image_name, image_id, plane, modality, study_id): - if "noup" in sys.argv: - return image_name - # --- - file_title = f"File:{image_name}" - # --- - exists = self.title_exists(file_title) - # --- - if exists: - return image_name - # --- - image_text = self.make_image_text(image_url, image_id, plane, modality, study_id) - - file_name = api.upload_by_url(image_name, image_text, image_url, return_file_name=True, do_ext=True) - - printt(f"upload result: {file_name}") - if file_name and file_name != image_name: - # --- - if "updatetext" in sys.argv and f"File:{file_name}" not in PD_medical_pages: - update_text_new(f"File:{file_name}") - # --- - self.add_category(file_name) - - return file_name - - def upload_images(self, study, images): - sets = [] - planes = {} - modality = "" - # --- - to_up = {} - # --- - self.img_to_url[study] = {} - # --- - for i, image in enumerate(images, 1): - image_url = image.get("public_filename", "") - # --- - if not image_url: - printt("no image") - printt(image) - continue - # --- - if image_url in urls_done: - self.images_count += 1 - continue - # --- - # extension = get_image_extension(image_url) - extension = image_url.split(".")[-1].lower() - # --- - if not extension: - # extension = get_image_extension(image['fullscreen_filename']) - extension = image["fullscreen_filename"].split(".")[-1].lower() - # --- - if extension == "bmp": - if "dump_studies_urls_to_files" not in sys.argv: - image_url, extension = work_bmp(image_url) - else: - extension = "jpg" - # --- - urls_done.append(image_url) - # --- - image_id = image["id"] - plane = image["plane_projection"] - # --- - if plane not in planes: - planes[plane] = 0 - planes[plane] += 1 - # --- - file_name = f"{self.title} (Radiopaedia {self.caseId}-{study} {plane} {planes[plane]}).{extension}" - # --- - file_name = file_name.replace(" ", " ").replace(" ", " ").replace(" ", " ") - # --- - # fix BadFileName - file_name = file_name.replace(":", ".").replace("/", ".") - # --- - to_up[f"File:{file_name}"] = (image_url, file_name, image_id, plane, modality, study) - # --- - self.img_to_url[study][f"File:{file_name}"] = {"url": image_url, "id": image_id} - # --- - if "dump_studies_urls_to_files" in sys.argv: - return - # --- - to_c = list(to_up.keys()) - # --- - pages = api_new.Find_pages_exists_or_not(to_c) - # --- - # print(pages) - # --- - already_in = [k for k in to_up if pages.get(k)] - # --- - printt(f"already_in: {len(already_in)}") - # --- - for fa in already_in: - if fa not in sets: - self.images_count += 1 - sets.append(fa) - # --- - if "updatetext" in sys.argv: - # --- - tits1 = [x for x in already_in if x in to_up] - tits2 = [x for x in tits1 if f"File:{x}" not in PD_medical_pages] - # --- - printt(f"{len(tits1)=}, {len(tits2)=}") - # --- - for fa in tits2: - image_url, file_name, image_id, plane, modality, study_id = to_up[fa] - image_text = self.make_image_text(image_url, image_id, plane, modality, study_id) - # --- - file_title = f"File:{file_name}" - # --- - # update_text(file_title, image_text) - update_text_new(file_title) - # --- - not_in = {k: v for k, v in to_up.items() if not pages.get(k)} - # --- - printt(f"not_in: {len(not_in)}") - # --- - for i, (image_url, file_name, image_id, plane, modality, study_o) in enumerate(not_in.values(), 1): - # --- - printt(f"file: {i}/{len(not_in)} :") - # --- - new_name = self.upload_image(image_url, file_name, image_id, plane, modality, study_o) - # --- - file_n = f"File:{new_name}" if new_name else f"File:{file_name}" - # --- - if file_n not in sets: - self.images_count += 1 - sets.append(file_n) - # --- - set_title = f"Radiopaedia case {self.title} id: {self.caseId} study: {study}" - # --- - if "updatetext" not in sys.argv: - if self.images_count > 1: - self.create_set(set_title, sets) - self.create_set_category(set_title, sets, study) - - def start(self): - self.get_studies() - - for study, images in self.studies.items(): - printt(f"{study} : len(images) = {len(images)}") - # --- - self.upload_images(study, images) - - if self.img_to_url: - dump_studies_urls_to_files(self.img_to_url) - - if "dump_studies_urls_to_files" in sys.argv: - return - - printt(f"Images count: {self.images_count}") - - if self.images_count == 0: - printt("no category created") - return - - self.create_category() - - def create_set(self, set_title, sets): - text = "" - # --- - if "noset" in sys.argv: - return - # --- - sets = [x.strip() for x in sets if x.strip()] - # --- - if len(sets) < 2: - return - # --- - if self.title_exists(set_title): - return - # --- - text += "{{Imagestack\n|width=850\n" - text += f"|title={set_title}\n|align=centre\n|loop=no\n" - # --- - for image_name in sets: - text += f"|{image_name}|\n" - # --- - text += "\n}}\n[[Category:Image set]]\n" - text += f"[[Category:{self.set_title}|*]]\n" - text += "[[Category:Radiopaedia sets]]" - # --- - page = ncc_MainPage(set_title, "www", family="nccommons") - # --- - if not page.exists(): - new = page.Create(text=text, summary="") - return new - # --- - # if text != page.get_text(): - # printt(f'<>{set_title} already exists') - p_text = page.get_text() - # --- - if p_text.find(".bmp") != -1: - p_text = p_text.replace(".bmp", ".jpg") - ssa = page.save(newtext=p_text, summary="update", nocreate=0, minor="") - return ssa - - elif "fix" in sys.argv: - if text == p_text: - printt("<> no changes") - return True - ssa = page.save(newtext=text, summary="update", nocreate=0, minor="") - return ssa - - def create_set_category(self, set_title, sets, study_id): - # --- - study_url = f"https://radiopaedia.org/cases/{self.caseId}/studies/{study_id}" - # --- - cat_title = f"Category:{set_title}" - # --- - printe.output(f"len of sets: {len(sets)} /// cat_title:{cat_title}") - # --- - text = f"* [{study_url} study: {study_id}]" - text += f"\n[[{self.category}|*]]" - text += f"\n[[Category:Radiopaedia studies|{study_id}]]" - # --- - done = False - # --- - if self.title_exists(cat_title): - done = True - # --- - if not done: - cat = ncc_MainPage(cat_title, "www", family="nccommons") - # --- - if cat.exists(): - printt(f"<> {cat_title} already exists") - done = True - # --- - if not done: - new = cat.Create(text=text, summary="create") - # --- - if new: - done = True - # --- - printt(f"Category {cat_title} created..{new=}") - # --- - if done: - add_cat_to_images(sets, cat_title) - - def add_category(self, file_name): - # --- - if "add_category" not in sys.argv: - return - # --- - add_text = f"\n[[{self.category}]]" - # --- - file_title = f"File:{file_name}" - # --- - page = ncc_MainPage(file_title, "www", family="nccommons") - # --- - p_text = page.get_text() - # --- - if p_text.find("[[Category:Radiopaedia case") != -1: - printe.output(f"<>{file_title} has cat:") - printe.output(p_text) - # --- - if p_text.find(self.category) == -1: - new_text = p_text + add_text - ssa = page.save(newtext=new_text, summary=f"Bot: added [[:{self.category}]]") - return ssa diff --git a/mass/radio/st3sort/__init__.py b/mass/radio/st3sort/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/mass/radio/st3sort/co.py b/mass/radio/st3sort/co.py deleted file mode 100644 index 441f174e..00000000 --- a/mass/radio/st3sort/co.py +++ /dev/null @@ -1,21 +0,0 @@ -''' - -python3 /data/project/mdwiki/pybot/mass/radio/st3/co.py - -''' -import sys -import os - -user_script_paths = [ - '/data/project/mdwiki', - '/data/project/mdwiki/pybot', - '/data/project/mdwiki/pybot/md_core', - '/data/project/mdwiki/pybot/ncc_core', -] -for _u_path in user_script_paths: - if os.path.exists(_u_path): - sys.path.append(os.path.abspath(_u_path)) - -from mass.radio.st3sort.count import start - -start() diff --git a/mass/radio/st3sort/count.py b/mass/radio/st3sort/count.py deleted file mode 100644 index 5a7722d4..00000000 --- a/mass/radio/st3sort/count.py +++ /dev/null @@ -1,137 +0,0 @@ -""" - -python3 core8/pwb.py mass/radio/st3sort/count - -tfj run coca --image python3.9 --command "$HOME/local/bin/python3 c8/pwb.py mass/radio/cases_in_ids && $HOME/local/bin/python3 c8/pwb.py mass/radio/st3sort/count" - -""" -import os -import sys -import json -import tqdm -from pathlib import Path -from datetime import datetime - -from mass.radio.get_studies import get_images_stacks, get_images -from newapi.ncc_page import MainPage as ncc_MainPage - -main_dir = Path(__file__).parent.parent - -with open(main_dir / "jsons/all_ids.json", encoding="utf-8") as f: - all_ids = json.load(f) - -with open(main_dir / "jsons/cases_in_ids.json", encoding="utf-8") as f: - cases_in_ids = json.load(f) -# --- -studies_dir = Path("/data/project/mdwiki/studies") -# --- -if not os.path.exists(studies_dir): - studies_dir = main_dir / "studies" - print(f'<> studies_dir set to {studies_dir}') -# --- -ids_tab = { - x: v - for x, v in all_ids.items() if x not in cases_in_ids -} - -cases_done = len(all_ids) - len(ids_tab) - - -class All: - cases = 0 - images = 0 - studies = 0 - - -All.cases = len(ids_tab) -cases_count_file = main_dir / "jsons/cases_count.json" - - -def cases_counts(): - if not os.path.exists(cases_count_file): - with open(cases_count_file, "w", encoding="utf-8") as f: - f.write("{}") - - with open(cases_count_file, encoding="utf-8") as f: - cases_count = json.load(f) - - return cases_count - - -def get_studies(studies_ids, caseId): - print(f"get_studies {caseId=}") - images_count = 0 - for study in studies_ids: - st_file = studies_dir / f"{study}.json" - images = {} - if os.path.exists(st_file): - try: - with open(st_file, encoding="utf-8") as f: - images = json.load(f) - except Exception as e: - print(f"{study} : error") - images = [image for image in images if image] - if not images: - images = get_images_stacks(caseId) - if not images: - url = f"https://radiopaedia.org/cases/{caseId}/studies/{study}" - images = get_images(url) - images_count += len(images) - - return images_count - - -def sa(): - day = datetime.now().strftime("%Y-%b-%d %H:%M:%S") - # text = f"{day}\n" - text = f"* --~~~~\n" - - text += f"* All Cases: {len(all_ids):,}\n" - text += f"* Cases done: {cases_done:,}\n\n" - text += f";Remaining:\n" - text += f"* Cases: {All.cases:,}\n" - text += f"* Images: {All.images:,}\n" - text += f"* Studies: {All.studies:,}\n" - - print(text) - - page = ncc_MainPage("User:Mr. Ibrahem/Radiopaedia", "www", family="nccommons") - - if page.exists(): - page.save(newtext=text, summary="update") - else: - page.Create(text=text, summary="update") - - -def start(): - images_count = cases_counts() - print(f"{len(images_count)=}") - # --- - print(f"<> start.py all: {len(ids_tab)}:") - n = 0 - for _, va in tqdm.tqdm(ids_tab.items()): - n += 1 - caseId = va["caseId"] - - studies = [study.split("/")[-1] for study in va["studies"]] - All.studies += len(studies) - da = images_count.get(caseId) or images_count.get(str(caseId)) - if da: - images = da - else: - images = get_studies(studies, caseId) - images_count[caseId] = images - - All.images += images - - if "test" in sys.argv and n == 100: - break - - sa() - - with open(cases_count_file, "w", encoding="utf-8") as f: - json.dump(images_count, f, ensure_ascii=False, indent=2) - - -if __name__ == "__main__": - start() diff --git a/mass/radio/st3sort/files.py b/mass/radio/st3sort/files.py deleted file mode 100644 index ef1b2659..00000000 --- a/mass/radio/st3sort/files.py +++ /dev/null @@ -1,74 +0,0 @@ -""" - -python3 core8/pwb.py mass/radio/st3/files - -tfj run files --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py mass/radio/st3/files" - -""" -import re -from newapi import printe -from newapi.ncc_page import CatDepth -from newapi.ncc_page import MainPage as ncc_MainPage -from mass.radio.lists.cases_to_cats import cases_cats # cases_cats() - - -def images_to_cats(): - members = CatDepth("Category:Radiopaedia_images_by_system", sitecode="www", family="nccommons", depth=1, ns="10") - reg = r"^File:.*? \(Radiopaedia (\d+)\)\.\w+$" - # --- - tab = {} - # --- - for file in members: - match = re.match(reg, file) - if match: - case_id = match.group(1) - # --- - tab[file] = case_id - # --- - print(f"images_to_cats, lenth of members: {len(members)} ") - print(f"images_to_cats, lenth of tab: {len(tab)} ") - - return tab - - -def add(da=[], title="", cat=""): - if da: - title, cat = da[0], da[1] - # --- - page = ncc_MainPage(title, "www", family="nccommons") - - if not page.exists(): - return - - text = page.get_text() - # --- - if text.find(cat) != -1 or text.find("[[Category:Radiopaedia case") != -1: - printe.output(f"cat {title} already has it.") - return - # --- - newtext = text - newtext += f"\n[[{cat}]]" - # --- - page.save(newtext=newtext, summary=f"Bot: added [[:{cat}]]") - - -def start(): - # --- - cats = cases_cats() - imgs = images_to_cats() - # --- - new = { - x: cats[v] - for x, v in imgs.items() if v in cats - } - # --- - print(f"{len(new)=}") - for numb, (file, cat) in enumerate(new.items(), start=1): - # --- - printe.output(f"{file=}: {cat=}") - # --- - add(title=file, cat=cat) - - -if __name__ == "__main__": - start() diff --git a/mass/radio/st3sort/miss.py b/mass/radio/st3sort/miss.py deleted file mode 100644 index 47680129..00000000 --- a/mass/radio/st3sort/miss.py +++ /dev/null @@ -1,32 +0,0 @@ -""" - -python3 core8/pwb.py mass/radio/st3/miss - -tfj run miss --image python3.9 --command "$HOME/local/bin/python3 c8/pwb.py mass/radio/st3/miss" - -""" -import sys -import json -import os -from pathlib import Path -from mass.radio.st3sort.start3 import main - -# --- -main_dir = Path(__file__).parent.parent -with open(os.path.join(str(main_dir), "jsons/all_ids.json"), encoding="utf-8") as f: - all_ids = json.load(f) -# --- -lista = """ - 182746 - """ -# --- -new_ids = [x.strip() for x in lista.split("\n") if x.strip()] -# --- -# Parsing arguments -lookup_dict = {x: (all_ids.get(x) or all_ids.get(int(x))) for x in new_ids if x in all_ids} - -print(f"len new_ids: {len(new_ids)}") -print(f"len lookup_dict: {len(lookup_dict)}") -# --- -if "start" in sys.argv: - main(lookup_dict) diff --git a/mass/radio/st3sort/na.py b/mass/radio/st3sort/na.py deleted file mode 100644 index 978e37d4..00000000 --- a/mass/radio/st3sort/na.py +++ /dev/null @@ -1,29 +0,0 @@ -''' - -python3 /data/project/mdwiki/pybot/mass/radio/st3/na.py test - -''' -import sys -import os - -user_script_paths = [ - 'I:/core/new', - 'I:/mdwiki', - 'I:/mdwiki/pybot', - 'I:/mdwiki/pybot/md_core', - 'I:/mdwiki/pybot/ncc_core', - '/data/project/mdwiki', - '/data/project/mdwiki/pybot', - '/data/project/mdwiki/pybot/md_core', - '/data/project/mdwiki/pybot/ncc_core', -] -for _u_path in user_script_paths: - if os.path.exists(_u_path): - sys.path.append(os.path.abspath(_u_path)) - -from mass.radio.st3sort.start3 import ids_by_caseId, main - -# --- -print('ids_by_caseId: ', len(ids_by_caseId)) -# --- -main(ids_by_caseId) diff --git a/mass/radio/st3sort/o.py b/mass/radio/st3sort/o.py deleted file mode 100644 index b8236af0..00000000 --- a/mass/radio/st3sort/o.py +++ /dev/null @@ -1,22 +0,0 @@ -""" -python3 core8/pwb.py mass/radio/st3sort/o 154713 dump_studies_urls_to_files -python3 core8/pwb.py mass/radio/st3/o add_category 10033 -""" -"""Script for dealing with Radiopaedia case operations - -This script is used to handle operations related to Radiopaedia cases. -""" - -# Script for handling Radiopaedia operation tasks -import sys - -# --- -from mass.radio.st3sort.start3 import main_by_ids - -# --- -ids = [arg for arg in sys.argv[1:] if arg.isdigit()] -# --- -print(f"len ids: {len(ids)}") -# --- -main_by_ids(ids) -# --- diff --git a/mass/radio/st3sort/start3.py b/mass/radio/st3sort/start3.py deleted file mode 100644 index e215bd90..00000000 --- a/mass/radio/st3sort/start3.py +++ /dev/null @@ -1,179 +0,0 @@ -""" - -python3 core8/pwb.py mass/radio/st3sort/start3 nomulti ask 97387 -python3 core8/pwb.py mass/radio/st3sort/start3 get:500 -python3 core8/pwb.py mass/radio/st3sort/start3 dump_studies_urls_to_files nomulti -python3 /data/project/mdwiki/pybot/mass/radio/st3/start3.py test - -tfj run mnx1 --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py mass/radio/st3/start3 get:1 157" -tfj run mnx2 --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py mass/radio/st3/start3 get:2 157" -tfj run mnx3 --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py mass/radio/st3/start3 get:3 157" -tfj run mnx4 --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py mass/radio/st3/start3 get:4 157" -tfj run gnr5 --mem 1Gi --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py mass/radio/st3/start3 get:5 mdwiki" -tfj run gnr6 --mem 1Gi --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py mass/radio/st3/start3 get:6 mdwiki" - -""" -import sys -import psutil -import tqdm -import json -import os -from pathlib import Path -from multiprocessing import Pool - -# --- -from newapi import printe -from mass.radio.st3sort.One_Case_New import OneCase - -# --- -radio_jsons_dir = Path(__file__).parent.parent / "jsons" -# --- -with open(radio_jsons_dir / "authors.json", encoding="utf-8") as f: - authors = json.load(f) -# --- -with open(radio_jsons_dir / "infos.json", encoding="utf-8") as f: - infos = json.load(f) -# --- -with open(radio_jsons_dir / "all_ids.json", encoding="utf-8") as f: - all_ids = json.load(f) -# --- -# cases_in_ids = [] -# --- -with open(radio_jsons_dir / "cases_in_ids.json", encoding="utf-8") as f: - cases_in_ids = json.load(f) -# --- -ids_by_caseId = {x: v for x, v in all_ids.items() if x not in cases_in_ids} -# --- -if "allids" in sys.argv: - ids_by_caseId = all_ids.copy() -# --- -printe.output(f"{len(ids_by_caseId)=}, {len(cases_in_ids)=}") -# --- -del cases_in_ids - - -def print_memory(): - _red_ = "\033[91m%s\033[00m" - - usage = psutil.Process(os.getpid()).memory_info().rss - usage = usage / 1024 // 1024 - - print(_red_ % f"memory usage: psutil {usage} MB") - - -def do_it(va): - # --- - case_url = va["case_url"] - caseId = va["caseId"] - title = va["title"] - studies = va["studies"] - author = va["author"] - # --- - bot = OneCase(case_url, caseId, title, studies, author) - bot.start() - # --- - del bot, author, title, studies - - -def multi_work(tab, numb=10): - done = 0 - for i in range(0, len(tab), numb): - group = tab[i : i + numb] - # --- - done += numb - printe.output(f"<> done: {done}:") - # --- - print_memory() - # --- - if "nomulti" in sys.argv or len(tab) < 10: - for x in group: - do_it(x) - else: - pool = Pool(processes=5) - pool.map(do_it, group) - pool.close() - pool.terminate() - - -def ddo(taba): - ids_tabs = taba - tabs = {} - print(f"all cases: {len(ids_tabs)}") - length = (len(ids_tabs) // 6) + 1 - for i in range(0, len(ids_tabs), length): - num = i // length + 1 - tabs[str(num)] = dict(list(ids_tabs.items())[i : i + length]) - # print(f'tab {num} : {len(tabs[str(num)])}') - print(f'tfj run mnx{num} --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py mass/radio/st3sort/start3 get:{num} {len(tabs[str(num)])}"') - - for arg in sys.argv: - arg, _, value = arg.partition(":") - if arg == "get": - ids_tabs = tabs[value] - print(f"work in {len(ids_tabs)} cases") - del tabs - - return ids_tabs - - -def main(ids_tab): - printe.output(f"<> start.py all: {len(ids_tab)}:") - # --- - print_memory() - # --- - if "test" not in sys.argv and len(ids_tab) > 100: - ids_tab = ddo(ids_tab) - # --- - tab = [] - # --- - n = 0 - for _, va in tqdm.tqdm(ids_tab.items()): - n += 1 - # --- - caseId = va["caseId"] - case_url = va["url"] - # --- - author = va.get("author", "") - # --- - if not author: - author = infos.get(case_url, {}).get(str(caseId), "") - # --- - if not author: - author = authors.get(str(caseId), "") - # --- - title = va["title"] - # --- - studies = [study.split("/")[-1] for study in va["studies"]] - # --- - if not studies: - printe.output(f"!!! studies not found: {caseId=}.") - continue - # --- - tab.append({"caseId": caseId, "case_url": case_url, "title": title, "studies": studies, "author": author}) - # --- - del ids_tab - # --- - multi_work(tab) - - -def main_by_ids(ids): - printe.output(f"<> start.py main_by_ids: {len(ids)=}:") - # --- - ids_tab = {caseId: all_ids[caseId] for caseId in ids if caseId in all_ids} - # --- - not_in = [c for c in ids if c not in all_ids] - # --- - print(f"main_by_ids caseId not in all_ids: {len(not_in)}") - # --- - main(ids_tab) - - -if __name__ == "__main__": - ids = [arg.strip() for arg in sys.argv if arg.strip().isdigit()] - # --- - ids = {x: all_ids[x] for x in ids if x in all_ids} - # --- - if ids: - main(ids) - else: - main(ids_by_caseId) diff --git a/mass/radio/st3sort/wanted.py b/mass/radio/st3sort/wanted.py deleted file mode 100644 index 2d72f302..00000000 --- a/mass/radio/st3sort/wanted.py +++ /dev/null @@ -1,53 +0,0 @@ -""" -tfj run wanted --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py mass/radio/st3/wanted nomulti unused" - -python3 core8/pwb.py mass/radio/st3/wanted nomulti unused ask -python3 core8/pwb.py mass/radio/st3/wanted nomulti ask -python3 core8/pwb.py mass/radio/st3/wanted nomulti add_category - -""" -import sys -import re - -# --- -from newapi.ncc_page import NEW_API -from mass.radio.st3sort.start3 import main_by_ids - -# --- -api_new = NEW_API('www', family='nccommons') -api_new.Login_to_wiki() -# --- - - -def titles_to_ids(titles): - cases = [] - # --- - reg = r'^Category:Radiopaedia case (\d+) (.*?)$' - # --- - for cat in titles: - match = re.match(reg, cat) - if match: - case_id = match.group(1) - cases.append(case_id) - # --- - return cases - - -# --- -prop = "Wantedcategories" -# --- -if "unused" in sys.argv: - prop = "Unusedcategories" -# --- -# Unusedcategories: { "ns": 14, "title": "Category:Radiopaedia case 10033 Congenital diaphragmatic hernia" } -# Wantedcategories:{'value': '823', 'ns': 14, 'title': 'Category:Radiopaedia case 154144 Primary CNS lymphoma-atypical cortical location'} -# --- -cats = api_new.querypage_list(qppage=prop, qplimit="max", Max=5000) -# --- -cats = [x['title'] for x in cats] -# --- -print(f"len cats: {len(cats)}") -# --- -wanted_ids = titles_to_ids(cats) -# --- -main_by_ids(wanted_ids)