Skip to content

Commit

Permalink
Merge pull request #132 from bptlab/feature/130-cohort-ontology
Browse files Browse the repository at this point in the history
Feature/130 cohort ontology
  • Loading branch information
nils-schmitt committed May 27, 2024
2 parents 62e9d79 + c3a257e commit 127c998
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 0 deletions.
Binary file modified tracex_project/db.sqlite3
Binary file not shown.
21 changes: 21 additions & 0 deletions tracex_project/extraction/logic/modules/module_cohort_tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def execute_and_save(

cohort_dict = self.__extract_cohort_tags(patient_journey)
cohort_dict = self.__remove_placeholder(cohort_dict)
cohort_dict = self.normalize_coniditons_snomed(cohort_dict)

return cohort_dict

Expand Down Expand Up @@ -65,3 +66,23 @@ def __remove_placeholder(cohort_data) -> Optional[Dict[str, str]]:
return None

return cohort_dict

@staticmethod
def normalize_coniditons_snomed(cohort_dict) -> Optional[Dict[str, str]]:
"""Normalizes conditions to a SNOMED code."""
condition = cohort_dict.get("condition")
preexisting_condition = cohort_dict.get("preexisting_condition")

if condition is not None:
(
cohort_dict["condition"],
cohort_dict["condition_snomed_code"],
) = u.get_snomed_ct_info(condition)

if preexisting_condition is not None:
(
cohort_dict["preexisting_condition"],
cohort_dict["preexisting_condition_snomed_code"],
) = u.get_snomed_ct_info(preexisting_condition)

return cohort_dict
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Generated by Django 4.2.13 on 2024-05-26 09:35

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('extraction', '0021_alter_event_event_type_alter_event_location_and_more'),
]

operations = [
migrations.RemoveField(
model_name='trace',
name='cohort',
),
migrations.AddField(
model_name='cohort',
name='condition_snomed_code',
field=models.IntegerField(blank=True, null=True),
),
migrations.AddField(
model_name='cohort',
name='preexisting_condition_snomed_code',
field=models.IntegerField(blank=True, null=True),
),
migrations.AddField(
model_name='cohort',
name='trace',
field=models.OneToOneField(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='cohort', to='extraction.trace'),
),
migrations.AlterField(
model_name='cohort',
name='age',
field=models.IntegerField(blank=True, null=True),
),
migrations.AlterField(
model_name='cohort',
name='condition',
field=models.CharField(blank=True, max_length=50, null=True),
),
migrations.AlterField(
model_name='cohort',
name='origin',
field=models.CharField(blank=True, max_length=50, null=True),
),
migrations.AlterField(
model_name='cohort',
name='preexisting_condition',
field=models.CharField(blank=True, max_length=100, null=True),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 4.2.13 on 2024-05-26 09:41

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('extraction', '0022_add_snomed_trace_cohort'),
]

operations = [
migrations.RemoveField(
model_name='cohort',
name='gender',
),
migrations.AddField(
model_name='cohort',
name='sex',
field=models.CharField(blank=True, max_length=25, null=True),
),
]
2 changes: 2 additions & 0 deletions tracex_project/extraction/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ class Cohort(models.Model):
sex = models.CharField(max_length=25, null=True, blank=True)
origin = models.CharField(max_length=50, null=True, blank=True)
condition = models.CharField(max_length=50, null=True, blank=True)
condition_snomed_code = models.IntegerField(null=True, blank=True)
preexisting_condition = models.CharField(max_length=100, null=True, blank=True)
preexisting_condition_snomed_code = models.IntegerField(null=True, blank=True)
manager = models.Manager()

def __str__(self):
Expand Down
13 changes: 13 additions & 0 deletions tracex_project/tracex/logic/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,16 @@
("activity_labeling", "Activity Labeler"),
("cohort_tagging", "Cohort Tagger")
]
SNOMED_CT_API_URL = (
"https://browser.ihtsdotools.org/snowstorm/snomed-ct/browser/MAIN/descriptions"
)
SNOMED_CT_PARAMS = params = {
"limit": 1,
"conceptActive": "true",
"lang": "english",
"skipTo": 0,
"returnLimit": 1,
}
SNOMED_CT_HEADERS = {
"User-Agent": "browser",
}
22 changes: 22 additions & 0 deletions tracex_project/tracex/logic/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
DataFrameUtilities -- Groups all functions related to DataFrame operations.
"""
import os
import json
from pathlib import Path
import base64
import tempfile
Expand All @@ -19,16 +20,21 @@
import pandas as pd
import pm4py
import numpy as np
import requests

from django.conf import settings
from django.db.models import Q
from openai import OpenAI

from tracex.logic.logger import log_tokens_used
from tracex.logic.constants import (
MAX_TOKENS,
TEMPERATURE_SUMMARIZING,
MODEL,
OAIK,
SNOMED_CT_API_URL,
SNOMED_CT_PARAMS,
SNOMED_CT_HEADERS,
)

from extraction.models import Trace
Expand Down Expand Up @@ -120,6 +126,22 @@ def get_snippet_bounds(index: int, length: int) -> tuple[int, int]:

return lower_bound, upper_bound

def get_snomed_ct_info(term):
"""Get the first matched name and code of a SNOMED CT term."""
SNOMED_CT_PARAMS["term"] = term
response = requests.get(
SNOMED_CT_API_URL, params=SNOMED_CT_PARAMS, headers=SNOMED_CT_HEADERS
)
data = json.loads(response.text)

term = None
code = None

if data.get("items"):
term = data["items"][0]["term"]
code = data["items"][0]["concept"]["conceptId"]

return term, code

class Conversion:
"""
Expand Down

0 comments on commit 127c998

Please sign in to comment.