Skip to content

Commit

Permalink
1
Browse files Browse the repository at this point in the history
  • Loading branch information
MrIbrahem committed Apr 14, 2024
1 parent 6e93d7a commit 4c74dba
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 48 deletions.
72 changes: 53 additions & 19 deletions fix_mass/fix_sets/bots/get_img_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,20 @@

st_dic_infos = Dir / "jsons/studies_files_infos"

def dump_st(data, s_id):
file = st_dic_infos / f"{s_id}_s_id.json"

def dump_st(data, file):

with open(file, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
printe.output(f"<<green>> write {len(data)} to file: {file}")

def gt_img_info(titles):

def gt_img_info(titles, id_to_url={}):
# ---
titles = [titles] if not isinstance(titles, list) else titles
# ---
titles = [x for x in titles if x]
# ---
info = {}
printe.output(f"one_img_info: {len(titles)=}")
# ---
Expand All @@ -54,11 +58,11 @@ def gt_img_info(titles):
# ---
params = {
"action": "query",
"titles": "|".join(titles),
# "titles": "|".join(titles),
# "prop": "revisions|categories|info|extlinks",
"prop": "extlinks",
"prop": "revisions|extlinks",
# "clprop": "sortkey|hidden", # categories
# "rvprop": "timestamp|content|user|ids", # revisions
"rvprop": "content", # revisions
# "cllimit": "max", # categories
"ellimit": "max", # extlinks
"formatversion": "2",
Expand All @@ -83,41 +87,71 @@ def gt_img_info(titles):
extlinks = page.get("extlinks", [])
title = page.get("title")
# ---
info[title] = {"img_url": "", "case_url": "", "study_url": "", "caseId": "", "studyId": ""}
# info[title] = {"img_url": "", "case_url": "", "study_url": "", "caseId": "", "studyId": "", "img_id": ""}
info[title] = {"img_url": "", "img_id": ""}
# ---
for extlink in extlinks:
url = extlink.get("url")
ma = re.match("https://radiopaedia.org/cases/(\d+)/studies/(\d+)", url)
# ma = re.match("https://radiopaedia.org/cases/(\d+)/studies/(\d+)", url)
if url.find("/images/") != -1:
info[title]["img_url"] = url

elif re.match(r"^https://radiopaedia.org/cases/[^\d\/]+$", url):
info[title]["case_url"] = url
# elif re.match(r"^https://radiopaedia.org/cases/[^\d\/]+$", url):
# info[title]["case_url"] = url

elif ma:
info[title]["study_url"] = url
info[title]["caseId"] = ma.group(1)
info[title]["studyId"] = ma.group(2)
# elif ma:
# info[title]["study_url"] = url
# info[title]["caseId"] = ma.group(1)
# info[title]["studyId"] = ma.group(2)
# ---
revisions = page.get("revisions")
if info[title]["img_url"]:
continue
# ---
if not revisions:
continue
# ---
revisions = revisions[0]["content"]
# match * Image ID: 58331091 in revisions.split("\n")
ma = re.search(r"Image ID: (\d+)", revisions)
if ma:
info[title]["img_id"] = ma.group(1)
info[title]["img_url"] = id_to_url.get(str(ma.group(1)), "")
else:
print(revisions)
# ---
# printe.output(json.dumps(pages, indent=2))
# ---
return info


def one_img_info(title, study_id):
def one_img_info(title, study_id, json_data):
# ---
info = gt_img_info(title)
file = st_dic_infos / f"{study_id}_s_id.json"
# ---
if file.exists():
printe.output(f"<<green>> one_img_info: {file} exists")
with open(file, encoding="utf-8") as f:
return json.load(f)
# ---
id_to_url = {}
# ---
for x in json_data:
for n, image in enumerate(x["images"], start=1):
id_to_url[str(image["id"])] = image["public_filename"]
# ---
info = gt_img_info(title, id_to_url)
# ---
# printe.output(json.dumps(pages, indent=2))
# ---
dump_st(info, study_id)
dump_st(info, file)
# ---
return info


def test():
title = ["File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 4).jpg", "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 2).jpg"]
info = one_img_info(title)
title = ["File:1st metatarsal head fracture (Radiopaedia 99187-120594 Frontal 1).png", "File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 02).jpg"]
info = gt_img_info(title)
# ---
print(json.dumps(info, indent=2))
# ---
Expand Down
5 changes: 5 additions & 0 deletions fix_mass/fix_sets/bots/mv_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ def to_move_work(text, to_move):
if "mv" in sys.argv:
for ty, files in to_move.items():
# ---
# if any file start with http return text
if any(x.startswith("http") for x in files.values()):
printe.output(f"<<red>> {ty} {len(files)} x.startswith(http)")
return text
# ---
printe.output(f"<<blue>> {ty} {len(files)}")
# printe.output(files)
# ---
Expand Down
78 changes: 59 additions & 19 deletions fix_mass/fix_sets/bots/set_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,65 +4,105 @@
"""
from newapi import printe


def make_text(modality, files, set_title):
def make_text(modality, files, set_title, leen):
# ---
text = f"== {modality} ==\n"
if leen == 1:
text = ""

text += "{{Imagestack\n|width=850\n"
text += f"|title={set_title}\n|align=centre\n|loop=no\n"

# sort files {1: "file:...", 2: "file:..."}
files = {k: v for k, v in sorted(files.items())}

for n, image_name in files.items():
for _n, image_name in files.items():
text += f"|{image_name}|\n"
# ---
text += "\n}}\n\n"
text += "}}\n"
# ---
return text


def make_text_one_study(json_data, url_to_file, study_title):
def make_text_one_study(json_data, data, study_title):
# ---
text = ""
url_to_file = {v["img_url"]: x for x, v in data.items()}
img_id_to_file = {str(v["img_id"]): x for x, v in data.items()}
# ---
to_move = {}
# ---
modalities = set([x["modality"] for x in json_data])
# ---
printe.output(f"modalities: {modalities}")
# ---
noo = 0
# ---
for x in json_data:
# ---
noo = 0
# ---
modality = x["modality"]
images = x["images"]
images = x["images"]
# ---
print(f"modality: {modality}, images: {len(images)}")
ty = modality
# ---
# print(f"modality: {modality}, images: {len(images)}")
# ---
files = {}
# ---
# sort images by position key
images = sorted(images, key=lambda x: x["position"])
# images = sorted(images, key=lambda x: x["position"])
# ---
for n, image in enumerate(images, start=1):
# ---
plane_projection = image["plane_projection"]
# ---
if len(modalities) == 1 and plane_projection:
ty = plane_projection
# ---
if ty not in to_move:
to_move[ty] = {}
# ---
img_id = image["id"]
public_filename = image["public_filename"]
# ---
file_name = url_to_file.get(public_filename)
# ---
if not file_name:
file_name = img_id_to_file.get(str(img_id))
# print(f"img_id_to_file file_name: {file_name}")
# ---
if not file_name:
noo += 1
file_name = public_filename
# ---
files[n] = file_name
numb = len(to_move[ty]) + 1
# ---
# files[numb] = file_name
to_move[ty][numb] = file_name
# ---
# ---
print(f"noo: {noo}")
print(f"files: {len(files)}")
# ---
text += make_text(modality, files, study_title)
# ---
to_move[modality] = files
# ---
# # ---
# to_move[ty].update(files)
# ---
print(f"noo: {noo}")
# ---
text = ""
# ---
study_title2 = study_title
# ---
# if len(to_move) > 1:
# study_title2 = ""
# ---
# sum all files in to_move
all_files = sum([len(x) for x in to_move.values()])
# ---
if all_files == len(to_move):
printe.output("len to_move == all_files")
return text, to_move
# ---
for ty, files in to_move.items():
print(f"ty: {ty}, files: {len(files)}")
text += make_text(ty, files, study_title2, len(to_move))
# ---
return text, to_move
36 changes: 26 additions & 10 deletions fix_mass/fix_sets/fix.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import json
import sys
from pathlib import Path
import xxlimited
from newapi import printe

from fix_mass.fix_sets.bots.get_img_info import one_img_info
Expand All @@ -15,7 +16,6 @@
from fix_mass.fix_sets.bots.study_files import get_study_files
from fix_mass.fix_sets.bots.mv_files import to_move_work
from fix_mass.fix_sets.jsons.files import studies_titles, studies_titles2
from fix_mass.fix_sets.bots.done import studies_done_append
from fix_mass.fix_sets.bots.done import studies_done_append, find_done #find_done(study_id)

from newapi.ncc_page import MainPage as ncc_MainPage
Expand Down Expand Up @@ -44,27 +44,26 @@ def update_set_text(title, n_text, study_id):
n_text += f"\n\n{cat_text}"
# ---
if p_text != n_text:
page.save(newtext=n_text, summary="update")
tyy = page.save(newtext=n_text, summary="update")
# ---
studies_done_append(study_id)
if tyy:
studies_done_append(study_id)
# ---


def work_text(study_id, study_title):
files = get_study_files(study_id)
# ---
data = one_img_info(files, study_id)
json_data = get_stacks(study_id)
# ---
data = one_img_info(files, study_id, json_data)
# ---
# 'File:Appendicitis (CT angiogram) (Radiopaedia 154713-134732 This comic explains the pathophysiology of appendicitis. 1).jpg': {'img_url': 'https://prod-images-static.radiopaedia.org/images/61855971/f11ad965ab35e44ae8ac9ed236afb1cf4547507d8f464cbc3c6316a4cb76fb32.jpg', 'case_url': 'https://radiopaedia.org/cases/appendicitis-ct-angiogram', 'study_url': 'https://radiopaedia.org/cases/154713/studies/134732', 'caseId': '154713', 'studyId': '134732'}
# printe.output(data)
# ---
url_to_file = {v["img_url"]: x for x, v in data.items()}
# ---
# printe.output(json.dumps(url_to_file, indent=2))
# ---
json_data = get_stacks(study_id)
# ---
text, to_move = make_text_one_study(json_data, url_to_file, study_title)
text, to_move = make_text_one_study(json_data, data, study_title)
# ---
return text, to_move

Expand All @@ -74,6 +73,9 @@ def work_one_study(study_id):
# ---
study_title = studies_titles.get(study_id)# or studies_titles2.get(study_id)
# ---
if not study_title and "studies_titles2" in sys.argv:
study_title = studies_titles2.get(study_id)
# ---
printe.output(f"study_id: {study_id}, study_title: {study_title}")
# ---
if not study_title:
Expand All @@ -82,10 +84,18 @@ def work_one_study(study_id):
# ---
if find_done(study_id):
printe.output(f"<<purple>> study_id: {study_id} already done")
return
if "nodone" not in sys.argv:
return
# ---
text, to_move = work_text(study_id, study_title)
# ---
text = text.strip()
# ---
if text.find("|http") != -1:
printe.output(f"<<red>> text has http links... study_id: {study_id}")
printe.output(text)
return
# ---
text = to_move_work(text, to_move)
# ---
update_set_text(study_title, text, study_id)
Expand All @@ -104,6 +114,12 @@ def main(ids):
ids = [arg for arg in sys.argv[1:] if arg.isdigit()]
# ---
if "studies_titles" in sys.argv:
# studies_titles keys not in studies_titles2
# ids = [ x for x in studies_titles.keys() if x not in studies_titles2 ]
ids = list(studies_titles.keys())
elif "studies_titles2" in sys.argv:
ids = [ x for x in studies_titles2.keys() if x not in studies_titles ]
# ---
printe.output(f"len of ids: {len(ids)}")
# ---
main(ids)

0 comments on commit 4c74dba

Please sign in to comment.