diff --git a/requirements.txt b/requirements.txt index d21f25b9..da64168e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ plotly~=5.22.0 pandas~=2.1.3 numpy~=1.26.2 jinja2~=3.1.4 +regex~=2024.5.15 diff --git a/tracex_project/extraction/admin.py b/tracex_project/extraction/admin.py index 2fc93cd4..0c0dcf4e 100644 --- a/tracex_project/extraction/admin.py +++ b/tracex_project/extraction/admin.py @@ -1,10 +1,24 @@ """Admin file for extraction app.""" +from typing import Union + from django.contrib import admin + from extraction.models import Event, PatientJourney, Prompt, Trace, Cohort, Metric class CohortInline(admin.StackedInline): - """Inline for the Cohort model, used to display the related Cohort object in the Trace admin page.""" + """ + Django admin interface for the Cohort model. + + This inline admin interface is used to manage Cohort instances directly from the Trace admin page. + No extra blank forms are displayed for adding new Cohort instances, and deletion of Cohort instances + from the Trace admin page is not allowed. + + Attributes: + model: Specifies the model that this inline admin interface is for. + extra: Defines how many extra blank forms are displayed on the admin page when a new Trace is created. + can_delete: Determines whether the deletion of instances of the model is allowed from the admin interface. + """ model = Cohort extra = 0 @@ -12,14 +26,34 @@ class CohortInline(admin.StackedInline): class TraceInline(admin.TabularInline): - """Inline for the Trace model, used to display the related Trace objects in the PatientJourney admin page.""" + """ + Django admin interface for the Trace model. + + This inline admin interface is used to manage Trace instances directly from the PatientJourney admin page. + No extra blank forms are displayed for adding new Trace instances. + + Attributes: + model: Specifies the model that this inline admin interface is for. + extra: Defines how many extra blank forms are displayed on the admin page when a new PatientJourney is created. + """ model = Trace extra = 0 # Controls the number of empty forms displayed for adding related objects class EventInline(admin.TabularInline): - """Inline for the Event model, used to display the related Event objects in the Trace admin page.""" + """ + Django admin interface for the Event model. + + This inline admin interface is used to manage Event instances directly from the Trace admin page. + No extra blank forms are displayed for adding new Event instances. Certain fields related to metrics + are read-only. + + Attributes: + model: Specifies the model that this inline admin interface is for. + extra: Defines how many extra blank forms are displayed on the admin page when a new Trace is created. + readonly_fields: Specifies which fields on the admin interface are read-only. + """ model = Event extra = 0 @@ -29,36 +63,39 @@ class EventInline(admin.TabularInline): "metrics_correctness_confidence", ) - def metrics_activity_relevance(self, obj): + @staticmethod + def metrics_activity_relevance(obj: Event) -> Union[str, int]: """Returns the activity relevance metric for the event.""" return obj.metrics.activity_relevance if hasattr(obj, "metrics") else "-" - def metrics_timestamp_correctness(self, obj): + @staticmethod + def metrics_timestamp_correctness(obj: Event) -> Union[str, int]: """Returns the timestamp correctness metric for the event.""" return obj.metrics.timestamp_correctness if hasattr(obj, "metrics") else "-" - def metrics_correctness_confidence(self, obj): + @staticmethod + def metrics_correctness_confidence(obj: Event) -> Union[str, int]: """Returns the correctness confidence metric for the event.""" return obj.metrics.correctness_confidence if hasattr(obj, "metrics") else "-" @admin.register(PatientJourney) class PatientJourneyAdmin(admin.ModelAdmin): - """Admin page for the PatientJourney model.""" + """Django admin interface for managing PatientJourney instances and related Trace instances.""" inlines = [TraceInline] @admin.register(Trace) class TraceAdmin(admin.ModelAdmin): - """Admin page for the Trace model.""" + """Django admin interface for managing Trace instances and related Cohort and Event instances.""" inlines = [CohortInline, EventInline] @admin.register(Event) class EventAdmin(admin.ModelAdmin): - """Admin page for the Event model.""" + """Django admin interface for managing Event instances.""" admin.site.register(Metric) diff --git a/tracex_project/extraction/apps.py b/tracex_project/extraction/apps.py index fa451f86..d9f5128b 100644 --- a/tracex_project/extraction/apps.py +++ b/tracex_project/extraction/apps.py @@ -3,7 +3,16 @@ class ExtractionConfig(AppConfig): - """App configuration class for django UI.""" + """ + Configuration class for the 'extraction' Django application. + + This class allows customization of application configuration. It sets the default type of auto-created + primary key fields to be 64-bit integers and specifies the name of the application. + + Attributes: + default_auto_field: The type of auto-created primary key fields for models in this application. + name: The name of the application that is being configured. + """ default_auto_field = "django.db.models.BigAutoField" name = "extraction" diff --git a/tracex_project/extraction/content/inputs/journey_synth_covid_0.txt b/tracex_project/extraction/content/inputs/journey_synth_covid_0.txt deleted file mode 100644 index f50678b2..00000000 --- a/tracex_project/extraction/content/inputs/journey_synth_covid_0.txt +++ /dev/null @@ -1,3 +0,0 @@ -In June 2022 I began experiencing the first symptoms of Covid-19. It started with a mild cough and fatigue, which I initially brushed off as a common cold. -Somtime later I developed a high fever and difficulty breathing. Concerned, I decided to get tested for Covid-19. -On June 17th, I went to a local testing center and underwent a PCR test. After several days I received the results and got tested negative. Later I got infected and tested positive. \ No newline at end of file diff --git a/tracex_project/extraction/content/outputs/all_traces_event_type.xes b/tracex_project/extraction/content/outputs/all_traces_event_type.xes deleted file mode 100644 index 56a4c618..00000000 --- a/tracex_project/extraction/content/outputs/all_traces_event_type.xes +++ /dev/null @@ -1,489 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tracex_project/extraction/fixtures/prompts_fixture.json b/tracex_project/extraction/fixtures/prompts_fixture.json index 09fa7093..31b3b57d 100644 --- a/tracex_project/extraction/fixtures/prompts_fixture.json +++ b/tracex_project/extraction/fixtures/prompts_fixture.json @@ -1,897 +1,1073 @@ [ - { +{ "model": "extraction.prompt", "pk": 1, "fields": { - "name": "TEXT_TO_ACTIVITY_MESSAGES", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text understanding and summarization. Your Job is to take a given text about an illness and convert it into bullet points regarding all important points about the course of the disease. Do not include time dates and use a miximum of 6 words per bullet point. Include the number of the sentence in the text from which you take the bullet point. The related numbers are in front of the sentences. Only include ONE sentence number per bullet point!" - }, - { - "role": "user", - "content": "1: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever.\n2: Four days later I went to the doctor and got tested positive for Covid19.\n3: Then I got hospitalized for two weeks." - }, - { - "role": "assistant", - "content": "starting to experience symptoms #1\nvisiting doctor's #2\ntesting positive for Covid19 #2\ngetting admissioned to hospital #3\ngetting discharged from hospital #3" - }, - { - "role": "user", - "content": "8: Concerned about my condition, I contacted my primary care physician via phone.\n9: He advised me to monitor my symptoms and stay at home unless they became severe." - }, - { - "role": "assistant", - "content": "contacting primary care physician #8\nmonitoring symptoms at home #9" - }, - { - "role": "user", - "content": "5: First symptoms on 01/04/2020" - }, - { - "role": "assistant", - "content": "starting to experience symptoms #5" - }, - { - "role": "user", - "content": "1: On July 15, 2022, I started experiencing the first symptoms of Covid-19 for five days.\n2: Initially, I had a mild cough and fatigue." - }, - { - "role": "assistant", - "content": "starting to experience symptoms #1\nending to experience symptoms #1" - } - ] + "name": "TEXT_TO_ACTIVITY_MESSAGES", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text understanding and summarization. Your Job is to take a given text about a person's course of disease and to convert it into bullet points. If you are asked to focus on a specific condition, follow these steps: First think about which events are typically present in a course of disease with the condition. Then scan the text and determine which of the events you find are relevant to the condition. Finally, summarize only the relevant events in bullet points. Do not include timestamps and use a maximum of 6 words per bullet point. Include the number of the sentence in the text from which you take the bullet point at the end of the sentence, like this '#number'. The related numbers are in front of the sentences." + }, + { + "role": "user", + "content": "Focus on those events that are related to the course of the disease of Covid-19.\n\n1: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever.\n2: Four days later I went to the doctor and got tested positive for Covid-19.\n3: Then I got hospitalized for two weeks." + }, + { + "role": "assistant", + "content": "starting to experience symptoms #1\nvisiting doctor's #2\ntesting positive for Covid-19 #2\ngetting admitted to hospital #3\ngetting discharged from hospital #3" + }, + { + "role": "user", + "content": "8: Concerned about my condition, I contacted my primary care physician via phone.\n9: He advised me to monitor my symptoms and stay at home unless they became severe." + }, + { + "role": "assistant", + "content": "contacting primary care physician #8\nmonitoring symptoms at home #9" + }, + { + "role": "user", + "content": "5: First symptoms on 01/04/2020" + }, + { + "role": "assistant", + "content": "starting to experience symptoms #5" + }, + { + "role": "user", + "content": "Focus on those events that are related to the course of the disease of Covid-19.\n\n1: On July 15, 2022, I started experiencing the first symptoms of Covid-19 for five days.\n2: Initially, I had a mild cough and fatigue." + }, + { + "role": "assistant", + "content": "starting to experience symptoms #1\nending to experience symptoms #1" + }, + { + "role": "user", + "content": "5:After surviving Covid-19, I made getting vaccinated a top priority. 6: I received my first dose of the vaccine in early February 2022 and the second dose three weeks later. 7: Despite the challenges I faced during my infection, I remained determined to protect myself and others from the virus by getting vaccinated." + }, + { + "role": "assistant", + "content": "receiving first dose of vaccine #6\nreceiving second dose of vaccine #6" + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 2, "fields": { - "name": "START_DATE_MESSAGES", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text understanding and your job is to take a given text and a given activity label and to extract a start date to this activity label. Only output the extracted start date! Rely also on the context." - }, - { - "role": "user", - "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: experiencing mild symptoms" - }, - { - "role": "assistant", - "content": "20200401T0000" - }, - { - "role": "user", - "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: testing positive for Covid19" - }, - { - "role": "assistant", - "content": "20200405T0000" - }, - { - "role": "user", - "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: getting infected again" - }, - { - "role": "assistant", - "content": "20200601T0000" - }, - { - "role": "user", - "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: having back pain" - }, - { - "role": "assistant", - "content": "N/A" - }, - { - "role": "user", - "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: starting to experience symptoms" - }, - { - "role": "assistant", - "content": "20210701T0000" - }, - { - "role": "user", - "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: experiencing side effects of vaccination" - }, - { - "role": "assistant", - "content": "20211104T0000" - } - ] + "name": "START_DATE_MESSAGES", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text understanding and your job is to take a given text and a given activity label and to extract a start date to this activity label. The text might contain timestamps in the format --YYYYMM/DD--, with two hyphens enclosing the date. Only use these timestamps as backup, in case no other time information is available. Only output the extracted start date! Rely on the context to determine the start date, as it might not be explicitly mentioned." + }, + { + "role": "user", + "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever --2020/04/01--. Four days later I went to the doctor and got tested positive for Covid19--2020/04/01--. In June I got infected again--2020/04/01--. After that I had a back pain--2020/04/01--.\nActivity Label: experiencing mild symptoms" + }, + { + "role": "assistant", + "content": "20200401T0000" + }, + { + "role": "user", + "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever --2020/04/01--. Four days later I went to the doctor and got tested positive for Covid19--2020/04/01--. In June I got infected again--2020/04/01--. After that I had a back pain--2020/04/01--.\nActivity Label: testing positive for Covid19" + }, + { + "role": "assistant", + "content": "20200405T0000" + }, + { + "role": "user", + "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: getting infected again" + }, + { + "role": "assistant", + "content": "20200601T0000" + }, + { + "role": "user", + "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: having back pain" + }, + { + "role": "assistant", + "content": "N/A" + }, + { + "role": "user", + "content": "Text: I started experiencing flu-like symptoms in July 21--2021/07/01--. I then got tested positive for Covid19--2021/07/01--. In October I got infected again--2021/10/01--. Then on the 4th of November I got my first dosage of the vaccine.--2021/11/04-- I had heavy side effects.\nActivity Label: starting to experience symptoms" + }, + { + "role": "assistant", + "content": "20210701T0000" + }, + { + "role": "user", + "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: experiencing side effects of vaccination" + }, + { + "role": "assistant", + "content": "20211104T0000" + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 3, "fields": { - "name": "END_DATE_MESSAGES", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text understanding and your job is to take a given text, a given activity label and a given timestamp for the beginning of this activity and to then extract an end date to this activity label. Only output the extracted start date! Rely also on the context. Use averages if necessary. If there is no information about the end date at all, please state the start date also as the end date." - }, - { - "role": "user", - "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: experiencing mild symptoms\nStart Date: 20200401T0000" - }, - { - "role": "assistant", - "content": "20200405T0000" - }, - { - "role": "user", - "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: testing positive for Covid19\nStart Date: 20200405T0000" - }, - { - "role": "assistant", - "content": "20200405T0000" - }, - { - "role": "user", - "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: getting infected again\nStart Date: 20200601T0000" - }, - { - "role": "assistant", - "content": "20200615T0000" - }, - { - "role": "user", - "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: having back pain\nStart Date: N/A" - }, - { - "role": "assistant", - "content": "N/A" - }, - { - "role": "user", - "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: experiencing side effects of \nStart Date: 20211104T0000" - }, - { - "role": "assistant", - "content": "20211106T0000" - }, - { - "role": "user", - "content": "Text: Four days after the first april 2020 I went to the doctor and got tested positive for Covid19. I was then hospitalized for two weeks.\nActivity Label: getting hospitalized\nStart Date: 20200405T0000" - }, - { - "role": "assistant", - "content": "20200419T0000" - }, - { - "role": "user", - "content": "Text: In the next time I made sure to improve my mental well being.\nActivity Label: improving mental well being\nStart Date: 20210610T0000" - }, - { - "role": "assistant", - "content": "20210710T0000" - } - ] + "name": "END_DATE_MESSAGES", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text understanding and your job is to take a given text, a given activity label and a given timestamp for the beginning of this activity and to then extract an end date to this activity label. The text might contain dates in the format --YYYYMM/DD--, with two hyphens enclosing the date. Only use these as backup, in case no other time information is available. Only output the extracted end date! Rely on the context to determine the end date, as it might not be explicitly mentioned. Use averages if necessary. If there is no information about the end date at all, please state the start date also as the end date." + }, + { + "role": "user", + "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever --2020/04/01--. Four days later I went to the doctor and got tested positive for Covid19--2020/04/01--. In June I got infected again--2020/04/01--. After that I had a back pain--2020/04/01--.\nActivity Label: experiencing mild symptoms\nStart Date: 20200401T0000" + }, + { + "role": "assistant", + "content": "20200405T0000" + }, + { + "role": "user", + "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever --2020/04/01--. Four days later I went to the doctor and got tested positive for Covid19--2020/04/01--. In June I got infected again--2020/04/01--. After that I had a back pain--2020/04/01--..\nActivity Label: testing positive for Covid19\nStart Date: 20200405T0000" + }, + { + "role": "assistant", + "content": "20200405T0000" + }, + { + "role": "user", + "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: getting infected again\nStart Date: 20200601T0000" + }, + { + "role": "assistant", + "content": "20200615T0000" + }, + { + "role": "user", + "content": "Text: On April 1, 2020, I started experiencing mild symptoms such as a persistent cough, fatigue, and a low-grade fever. Four days later I went to the doctor and got tested positive for Covid19. In June I got infected again. After that I had a back pain.\nActivity Label: having back pain\nStart Date: N/A" + }, + { + "role": "assistant", + "content": "N/A" + }, + { + "role": "user", + "content": "Text: I started experiencing flu-like symptoms in July 21--2021/07/21--. I then got tested positive for Covid19--2021/07/21--. In October I got infected again--2021/10/01--. Then on the 4th of November I got my first dosage of the vaccine--2021/11/04--. I had heavy side effects--2021/11/04--.\nActivity Label: experiencing side effects \nStart Date: 20211104T0000" + }, + { + "role": "assistant", + "content": "20211106T0000" + }, + { + "role": "user", + "content": "Text: Four days after the first of April 2020 I went to the doctor and got tested positive for Covid19. I was then hospitalized for two weeks.\nActivity Label: getting hospitalized\nStart Date: 20200405T0000" + }, + { + "role": "assistant", + "content": "20200419T0000" + }, + { + "role": "user", + "content": "Text: In the next time I made sure to improve my mental well being.\nActivity Label: improving mental well being\nStart Date: 20210610T0000" + }, + { + "role": "assistant", + "content": "20210710T0000" + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 4, "fields": { - "name": "EVENT_TYPE_MESSAGES", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text categorization and your job is to take a given activity label and to classify it into one of the following event types: 'Symptom Onset', 'Symptom Offset', 'Diagnosis', 'Doctor Visit', 'Treatment', 'Hospital Admission', 'Hospital Discharge', 'Medication', 'Lifestyle Change' and 'Feelings'. Please consider the capitalization." - }, - { - "role": "user", - "content": "visiting doctor's" - }, - { - "role": "assistant", - "content": "Doctors Visit" - }, - { - "role": "user", - "content": "testing positive for Covid19" - }, - { - "role": "assistant", - "content": "Diagnosis" - }, - { - "role": "user", - "content": "getting hospitalized" - }, - { - "role": "assistant", - "content": "Hospital Admission" - }, - { - "role": "user", - "content": "isolating at home" - }, - { - "role": "assistant", - "content": "Lifestyle Change" - }, - { - "role": "user", - "content": "prescribed medication for discomfort" - }, - { - "role": "assistant", - "content": "Medication" - }, - { - "role": "user", - "content": "seeking consultation with specialist" - }, - { - "role": "assistant", - "content": "Doctors Visit" - }, - { - "role": "user", - "content": "receiving vaccines to protect against Covid19" - }, - { - "role": "assistant", - "content": "Treatment" - }, - { - "role": "user", - "content": "feeling a sense of relief" - }, - { - "role": "assistant", - "content": "Feeling" - }, - { - "role": "user", - "content": "starting to experience symptoms" - }, - { - "role": "assistant", - "content": "Symptom Onset" - } - ] + "name": "EVENT_TYPE_MESSAGES", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text categorization and your job is to take a given activity label and to classify it into one of the following event types: 'Symptom Onset', 'Symptom Offset', 'Diagnosis', 'Doctor Visit', 'Treatment', 'Hospital Admission', 'Hospital Discharge', 'Medication', 'Lifestyle Change' and 'Feelings'. Please consider the capitalization." + }, + { + "role": "user", + "content": "visiting doctor's" + }, + { + "role": "assistant", + "content": "Doctors Visit" + }, + { + "role": "user", + "content": "testing positive for Covid19" + }, + { + "role": "assistant", + "content": "Diagnosis" + }, + { + "role": "user", + "content": "getting hospitalized" + }, + { + "role": "assistant", + "content": "Hospital Admission" + }, + { + "role": "user", + "content": "isolating at home" + }, + { + "role": "assistant", + "content": "Lifestyle Change" + }, + { + "role": "user", + "content": "prescribed medication for discomfort" + }, + { + "role": "assistant", + "content": "Medication" + }, + { + "role": "user", + "content": "seeking consultation with specialist" + }, + { + "role": "assistant", + "content": "Doctors Visit" + }, + { + "role": "user", + "content": "receiving vaccines to protect against Covid19" + }, + { + "role": "assistant", + "content": "Treatment" + }, + { + "role": "user", + "content": "feeling a sense of relief" + }, + { + "role": "assistant", + "content": "Feeling" + }, + { + "role": "user", + "content": "starting to experience symptoms" + }, + { + "role": "assistant", + "content": "Symptom Onset" + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 5, "fields": { - "name": "LOCATION_MESSAGES", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text categorization and your job is to take a given activity label and categorize it into: 'Home', 'Hospital' or 'Doctors'. Use the context to categorize." - }, - { - "role": "user", - "content": "visiting doctor's" - }, - { - "role": "assistant", - "content": "Doctors" - }, - { - "role": "user", - "content": "consulting doctor over phone" - }, - { - "role": "assistant", - "content": "Home" - }, - { - "role": "user", - "content": "testing positive for Covid19" - }, - { - "role": "assistant", - "content": "Doctors" - }, - { - "role": "user", - "content": "getting hospitalized" - }, - { - "role": "assistant", - "content": "Hospital" - }, - { - "role": "user", - "content": "isolating at home" - }, - { - "role": "assistant", - "content": "Home" - }, - { - "role": "user", - "content": "prescribed medication for discomfort" - }, - { - "role": "assistant", - "content": "Doctors" - }, - { - "role": "user", - "content": "receiving special care with a ventilator" - }, - { - "role": "assistant", - "content": "Hospital" - }, - { - "role": "user", - "content": "receiving vaccines to protect against Covid19" - }, - { - "role": "assistant", - "content": "Doctors" - }, - { - "role": "user", - "content": "feeling a sense of relief" - }, - { - "role": "assistant", - "content": "Home" - }, - { - "role": "user", - "content": "starting to experience symptoms" - }, - { - "role": "assistant", - "content": "Home" - } - ] + "name": "LOCATION_MESSAGES", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text categorization and your job is to take a given activity label and categorize it into: 'Home', 'Hospital' or 'Doctors'. Use the context to categorize." + }, + { + "role": "user", + "content": "visiting doctor's" + }, + { + "role": "assistant", + "content": "Doctors" + }, + { + "role": "user", + "content": "consulting doctor over phone" + }, + { + "role": "assistant", + "content": "Home" + }, + { + "role": "user", + "content": "testing positive for Covid19" + }, + { + "role": "assistant", + "content": "Doctors" + }, + { + "role": "user", + "content": "getting hospitalized" + }, + { + "role": "assistant", + "content": "Hospital" + }, + { + "role": "user", + "content": "isolating at home" + }, + { + "role": "assistant", + "content": "Home" + }, + { + "role": "user", + "content": "prescribed medication for discomfort" + }, + { + "role": "assistant", + "content": "Doctors" + }, + { + "role": "user", + "content": "receiving special care with a ventilator" + }, + { + "role": "assistant", + "content": "Hospital" + }, + { + "role": "user", + "content": "receiving vaccines to protect against Covid19" + }, + { + "role": "assistant", + "content": "Doctors" + }, + { + "role": "user", + "content": "feeling a sense of relief" + }, + { + "role": "assistant", + "content": "Home" + }, + { + "role": "user", + "content": "starting to experience symptoms" + }, + { + "role": "assistant", + "content": "Home" + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 6, "fields": { - "name": "METRIC_ACTIVITY_MESSAGES", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text categorization and your job is to take a given bulletpoint and to categorize it into 'No Relevance', 'Low Relevance', 'Moderate Relevance' or 'High Relevance'. It is really important, that that relevance category is correct. Category definition: No Relevance: Events or actions that are not connected to the progression or impact of the disease of the patient in any way. Low Relevance: Events or actions that have limited potential to affect the progression of the disease of the patient and hold minimal significance in its course. Moderate Relevance: Events or actions that possess some potential to influence the disease's progression of the patient but may not be critical to its outcome. High Relevance: Events or actions that hold substantial potential to impact the disease's course of the patient and are crucial in understanding its trajectory." - }, - { - "role": "user", - "content": "receiving support from my children" - }, - { - "role": "assistant", - "content": "Low Relevance" - }, - { - "role": "user", - "content": "taking medicine" - }, - { - "role": "assistant", - "content": "High Relevance" - }, - { - "role": "user", - "content": "eating chips" - }, - { - "role": "assistant", - "content": "No Relevance" - }, - { - "role": "user", - "content": "starting to experience symptoms" - }, - { - "role": "assistant", - "content": "High Relevance" - }, - { - "role": "user", - "content": "feeling side effects from vaccination" - }, - { - "role": "assistant", - "content": "Moderate Relevance" - } - ] + "name": "METRIC_ACTIVITY_MESSAGES", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text categorization and your job is to take a given bulletpoint and to categorize it into 'No Relevance', 'Low Relevance', 'Moderate Relevance' or 'High Relevance'. It is really important, that that relevance category is correct. Category definition: No Relevance: Events or actions that are not connected to the progression or impact of the disease of the patient in any way. Low Relevance: Events or actions that have limited potential to affect the progression of the disease of the patient and hold minimal significance in its course. Moderate Relevance: Events or actions that possess some potential to influence the disease's progression of the patient but may not be critical to its outcome. High Relevance: Events or actions that hold substantial potential to impact the disease's course of the patient and are crucial in understanding its trajectory." + }, + { + "role": "user", + "content": "receiving support from my children" + }, + { + "role": "assistant", + "content": "Low Relevance" + }, + { + "role": "user", + "content": "taking medicine" + }, + { + "role": "assistant", + "content": "High Relevance" + }, + { + "role": "user", + "content": "eating chips" + }, + { + "role": "assistant", + "content": "No Relevance" + }, + { + "role": "user", + "content": "starting to experience symptoms" + }, + { + "role": "assistant", + "content": "High Relevance" + }, + { + "role": "user", + "content": "feeling side effects from vaccination" + }, + { + "role": "assistant", + "content": "Moderate Relevance" + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 7, "fields": { - "name": "METRIC_TIMESTAMP_MESSAGES", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text understanding and your job is to take a given text and to check if a given start date and end date of a given bulletpoint are correct. Correct is a start and end date in the format YYYYMMDDTHHMM if the date is appearing in the patient journey related to bulletpoint. If the start date and end date appearing in the context of the bulletpoint, you should output True. If there is another start or end date in the patient journey, the given timestamps are wrong and you should output False. If the start or end date is not appearing in the patient journey, it could be that the timestamp is estimated. In this case check if the estimation is reasonable and output True if it is and False if it is not." - }, - { - "role": "user", - "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: starting to experience symptoms\nStart Date: 20210721T0000\nEnd Date: 20210721T0000" - }, - { - "role": "assistant", - "content": "True" - }, - { - "role": "user", - "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: starting to experience symptoms\nStart Date: 20210721T0000\nEnd Date: 20210724T0000" - }, - { - "role": "assistant", - "content": "True" - }, - { - "role": "user", - "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: starting to experience symptoms\nStart Date: 07/21/2021\nEnd Date: 20210721T0000" - }, - { - "role": "assistant", - "content": "False" - }, - { - "role": "user", - "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: starting to experience symptoms\nStart Date: 20210721T0000\nEnd Date: N/A" - }, - { - "role": "assistant", - "content": "False" - }, - { - "role": "user", - "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: experiencing heavy side effects of vaccination\nStart Date: 20211104T0000\nEnd Date: 20211107T0000" - }, - { - "role": "assistant", - "content": "True" - }, - { - "role": "user", - "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: experiencing heavy side effects of vaccination\nStart Date: 20211201T0000\nEnd Date: 20211204T0000" - }, - { - "role": "assistant", - "content": "False" - } - ] + "name": "METRIC_TIMESTAMP_MESSAGES", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text understanding and your job is to take a given text and to check if a given start date and end date of a given bulletpoint are correct. Correct is a start and end date in the format YYYYMMDDTHHMM if the date is appearing in the patient journey related to bulletpoint. If the start date and end date appearing in the context of the bulletpoint, you should output True. If there is another start or end date in the patient journey, the given timestamps are wrong and you should output False. If the start or end date is not appearing in the patient journey, it could be that the timestamp is estimated. In this case check if the estimation is reasonable and output True if it is and False if it is not." + }, + { + "role": "user", + "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: starting to experience symptoms\nStart Date: 20210721T0000\nEnd Date: 20210721T0000" + }, + { + "role": "assistant", + "content": "True" + }, + { + "role": "user", + "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: starting to experience symptoms\nStart Date: 20210721T0000\nEnd Date: 20210724T0000" + }, + { + "role": "assistant", + "content": "True" + }, + { + "role": "user", + "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: starting to experience symptoms\nStart Date: 07/21/2021\nEnd Date: 20210721T0000" + }, + { + "role": "assistant", + "content": "False" + }, + { + "role": "user", + "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: starting to experience symptoms\nStart Date: 20210721T0000\nEnd Date: N/A" + }, + { + "role": "assistant", + "content": "False" + }, + { + "role": "user", + "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: experiencing heavy side effects of vaccination\nStart Date: 20211104T0000\nEnd Date: 20211107T0000" + }, + { + "role": "assistant", + "content": "True" + }, + { + "role": "user", + "content": "Text: I started experiencing flu-like symptoms in July 21. I then got tested positive for Covid19. In October I got infected again. Then on the 4th of November I got my first dosage of the vaccine. I had heavy side effects.\nActivity Label: experiencing heavy side effects of vaccination\nStart Date: 20211201T0000\nEnd Date: 20211204T0000" + }, + { + "role": "assistant", + "content": "False" + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 8, "fields": { - "name": "COMPARE_MESSAGES", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": " You are an expert in text understanding and your job is to understand the semantical meaning of bulletpoints and compare the semantic to each other. So you take two bulletpoints and check if they are semantically similar. You should return True if you think they are similar and False if you don't." - }, - { - "role": "user", - "content": "First: receiving support from my children\nSecond: taking medicine" - }, - { - "role": "assistant", - "content": "False" - }, - { - "role": "user", - "content": "First: visiting doctor's\nSecond: going to the doctor" - }, - { - "role": "assistant", - "content": "True" - }, - { - "role": "user", - "content": "First: experiencing covid 19 symptoms\nSecond: first symptoms of covid 19" - }, - { - "role": "assistant", - "content": "True" - }, - { - "role": "user", - "content": "First: experiencing first covid 19 symptoms\nSecond: experiencing worse symptoms" - }, - { - "role": "assistant", - "content": "False" - } - ] + "name": "COMPARE_MESSAGES", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": " You are an expert in text understanding and your job is to understand the semantical meaning of bulletpoints and compare the semantic to each other. So you take two bulletpoints and check if they are semantically similar. You should return True if you think they are similar and False if you don't." + }, + { + "role": "user", + "content": "First: receiving support from my children\nSecond: taking medicine" + }, + { + "role": "assistant", + "content": "False" + }, + { + "role": "user", + "content": "First: visiting doctor's\nSecond: going to the doctor" + }, + { + "role": "assistant", + "content": "True" + }, + { + "role": "user", + "content": "First: experiencing covid 19 symptoms\nSecond: first symptoms of covid 19" + }, + { + "role": "assistant", + "content": "True" + }, + { + "role": "user", + "content": "First: experiencing first covid 19 symptoms\nSecond: experiencing worse symptoms" + }, + { + "role": "assistant", + "content": "False" + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 9, "fields": { - "name": "COHORT_TAG_MESSAGES", - "category": "few-shot", - "text": [ - [ - "condition", - { - "role": "system", - "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract the illness it is about." - }, - { - "role": "user", - "content": "In July I got infected with Covid-19 which resulted in similar symptoms like a heavy flu." - }, - { - "role": "assistant", - "content": "Covid-19" - }, - { - "role": "user", - "content": "I had a heavy flu in July." - }, - { - "role": "assistant", - "content": "Flu" - }, - { - "role": "user", - "content": "Last year I was feeling really well, when all of a sudden I had severe breathtaking problems and high fever. I thought it was a flu, but it turned out to be Covid-19." - }, - { - "role": "assistant", - "content": "Covid-19" - } - ], - [ - "sex", - { - "role": "system", - "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract the sex of the author. If the sex isn't clear, you should take the context into account. Only if the context doesn't help, you should return 'N/A'." - }, - { - "role": "user", - "content": "I am a 25 year old software engineer living in California with my girlfriend. When I got Covid-19 last year I was really worried about my job and my girlfriend." - }, - { - "role": "assistant", - "content": "male" - }, - { - "role": "user", - "content": "I am a nurse living in Berlin with my boyfriend. When I got Covid-19 last year I was really worried about my job and my boyfriend." - }, - { - "role": "assistant", - "content": "female" - }, - { - "role": "user", - "content": "I got Covid-19 last year and I was really worried about my job. The diesease itself wasn't even that hard but it stressed me out, that I wasn't allowed to go to my job!" - }, - { - "role": "assistant", - "content": "N/A" - }, - { - "role": "user", - "content": "I am a 25 year old software engineer living in California with my girlfriend. When I got Covid-19 that struck me as a mother of two really heavily." - }, - { - "role": "assistant", - "content": "female" - }, - { - "role": "user", - "content": "I am a nurse living in Berlin. When I got Covid-19 I had to stay home what really hit me. As a divorced father I only see my boy once every month and now I couldn't even do that." - }, - { - "role": "assistant", - "content": "male" - } - ], - [ - "age", - { - "role": "system", - "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract the age of the author. If the sex isn't clear, you should take the context into account. Young means 25, middle aged 50 and old 75. Only if the context doesn't help, you should return 'N/A'." - }, - { - "role": "user", - "content": "I am a 22 year old software engineer living in California with my girlfriend. When I got Covid-19 last year I was really worried about my job and my girlfriend." - }, - { - "role": "assistant", - "content": "22" - }, - { - "role": "user", - "content": "I am a nurse living in Berlin. When I got Covid-19 I had to stay home what really hit me. Luckily as a young person I recovered quickly." - }, - { - "role": "assistant", - "content": "25" - }, - { - "role": "user", - "content": "I got Covid-19 last year and I was really worried about my job. The diesease itself wasn't even that hard but it stressed me out, that I wasn't allowed to go to my job!" - }, - { - "role": "assistant", - "content": "N/A" - }, - { - "role": "user", - "content": "I am an old man, so Covid-19 wasn't all easy on me." - }, - { - "role": "assistant", - "content": "75" - } - ], - [ - "origin", - { - "role": "system", - "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract the origin country of the author. If the origin isn't clear, you should take the context into account. Only if the context doesn't help, you should return 'N/A'." - }, - { - "role": "user", - "content": "I am a 25 year old software engineer living in California with my girlfriend. When I got Covid-19 last year I was really worried about my job and my girlfriend." - }, - { - "role": "assistant", - "content": "United States of America" - }, - { - "role": "user", - "content": "I am a nurse living in Berlin. When I got Covid-19 I had to stay home what really hit me. Luckily as a young person I recovered quickly." - }, - { - "role": "assistant", - "content": "Germany" - }, - { - "role": "user", - "content": "I got Covid-19 last year and I was really worried about my job. The diesease itself wasn't even that hard but it stressed me out, that I wasn't allowed to go to my job!" - }, - { - "role": "assistant", - "content": "N/A" - } - ], - [ - "preexisting_condition", - { - "role": "system", - "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract previous diseases of the author. These diseases have to be EXPLICITLY MENTIONED. And they have to have occured BEFORE the illness the text is about!" - }, - { - "role": "user", - "content": "I got Covid-19 last year, which was hard since I since ever had to fight Asthma." - }, - { - "role": "assistant", - "content": "Asthma" - }, - { - "role": "user", - "content": "I infected me with Covid-19 right after I recovered from a heavy cold." - }, - { - "role": "assistant", - "content": "Cold" - }, - { - "role": "user", - "content": "I got Covid-19 last year and I was really worried about my job. The diesease itself wasn't even that hard but it stressed me out, that I wasn't allowed to go to my job!" - }, - { - "role": "assistant", - "content": "N/A" - }, - { - "role": "user", - "content": "I got Covid-19 last year after I already got it right at the start in 2020." - }, - { - "role": "assistant", - "content": "Covid-19" - } + "name": "COHORT_TAG_MESSAGES", + "category": "few-shot", + "text": [ + [ + "condition", + { + "role": "system", + "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract the illness it is about. Return only the condition using two words a most." + }, + { + "role": "user", + "content": "In July I got infected with Covid-19 which resulted in similar symptoms like a heavy flu." + }, + { + "role": "assistant", + "content": "Covid-19" + }, + { + "role": "user", + "content": "I had a heavy flu in July." + }, + { + "role": "assistant", + "content": "Flu" + }, + { + "role": "user", + "content": "Last year I was feeling really well, when all of a sudden I had severe breathtaking problems and high fever. I thought it was a flu, but it turned out to be Covid-19." + }, + { + "role": "assistant", + "content": "Covid-19" + } + ], + [ + "gender", + { + "role": "system", + "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract the gender of the author. If the gender isn't clear, you should take the context into account. Only if the context doesn't help, you should return 'N/A'." + }, + { + "role": "user", + "content": "I am a 25 year old software engineer living in California with my girlfriend. When I got Covid-19 last year I was really worried about my job and my girlfriend." + }, + { + "role": "assistant", + "content": "male" + }, + { + "role": "user", + "content": "I am a nurse living in Berlin with my boyfriend. When I got Covid-19 last year I was really worried about my job and my boyfriend." + }, + { + "role": "assistant", + "content": "female" + }, + { + "role": "user", + "content": "I got Covid-19 last year and I was really worried about my job. The diesease itself wasn't even that hard but it stressed me out, that I wasn't allowed to go to my job!" + }, + { + "role": "assistant", + "content": "N/A" + }, + { + "role": "user", + "content": "I am a 25 year old software engineer living in California with my girlfriend. When I got Covid-19 that struck me as a mother of two really heavily." + }, + { + "role": "assistant", + "content": "female" + }, + { + "role": "user", + "content": "I am a nurse living in Berlin. When I got Covid-19 I had to stay home what really hit me. As a divorced father I only see my boy once every month and now I couldn't even do that." + }, + { + "role": "assistant", + "content": "male" + } + ], + [ + "age", + { + "role": "system", + "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract the age of the author. If the gender isn't clear, you should take the context into account. Young means 25, middle aged 50 and old 75. Only if the context doesn't help, you should return 'N/A'." + }, + { + "role": "user", + "content": "I am a 22 year old software engineer living in California with my girlfriend. When I got Covid-19 last year I was really worried about my job and my girlfriend." + }, + { + "role": "assistant", + "content": "22" + }, + { + "role": "user", + "content": "I am a nurse living in Berlin. When I got Covid-19 I had to stay home what really hit me. Luckily as a young person I recovered quickly." + }, + { + "role": "assistant", + "content": "25" + }, + { + "role": "user", + "content": "I got Covid-19 last year and I was really worried about my job. The diesease itself wasn't even that hard but it stressed me out, that I wasn't allowed to go to my job!" + }, + { + "role": "assistant", + "content": "N/A" + }, + { + "role": "user", + "content": "I am an old man, so Covid-19 wasn't all easy on me." + }, + { + "role": "assistant", + "content": "75" + } + ], + [ + "origin", + { + "role": "system", + "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract the origin country of the author. If the origin isn't clear, you should take the context into account. Only if the context doesn't help, you should return 'N/A'. Return only the country of origin using two words a most." + }, + { + "role": "user", + "content": "I am a 25 year old software engineer living in California with my girlfriend. When I got Covid-19 last year I was really worried about my job and my girlfriend." + }, + { + "role": "assistant", + "content": "United States of America" + }, + { + "role": "user", + "content": "I am a nurse living in Berlin. When I got Covid-19 I had to stay home what really hit me. Luckily as a young person I recovered quickly." + }, + { + "role": "assistant", + "content": "Germany" + }, + { + "role": "user", + "content": "I got Covid-19 last year and I was really worried about my job. The diesease itself wasn't even that hard but it stressed me out, that I wasn't allowed to go to my job!" + }, + { + "role": "assistant", + "content": "N/A" + } + ], + [ + "preexisting_condition", + { + "role": "system", + "content": "You are an expert in text understanding and your job is to take a given text about an illness and to extract previous diseases of the author. These diseases have to be EXPLICITLY MENTIONED. And they have to have occured BEFORE the illness the text is about! Return only the preexisting disease using two words a most." + }, + { + "role": "user", + "content": "I got Covid-19 last year, which was hard since I since ever had to fight Asthma." + }, + { + "role": "assistant", + "content": "Asthma" + }, + { + "role": "user", + "content": "I infected me with Covid-19 right after I recovered from a heavy cold." + }, + { + "role": "assistant", + "content": "Cold" + }, + { + "role": "user", + "content": "I got Covid-19 last year and I was really worried about my job. The diesease itself wasn't even that hard but it stressed me out, that I wasn't allowed to go to my job!" + }, + { + "role": "assistant", + "content": "N/A" + }, + { + "role": "user", + "content": "I got Covid-19 last year after I already got it right at the start in 2020." + }, + { + "role": "assistant", + "content": "Covid-19" + } + ] ] - ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 10, "fields": { - "name": "PREPROCESSING_SPELLCHECK", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text analysis with a focus on spelling accuracy. Your task is to identify any spelling errors in the provided text and correct them. Ensure the corrected text is accurate and readable. Please make sure to give out the full text without shorten it." - }, - { - "role": "user", - "content": "I remeber the day I first learnt about the importnce of spellchek. It was an eye-opener for me." - }, - { - "role": "assistant", - "content": "I remember the day I first learned about the importance of spellcheck. It was an eye-opener for me." - } - ] + "name": "PREPROCESSING_SPELLCHECK", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text analysis with a focus on spelling accuracy. Your task is to identify any spelling errors in the provided text and correct them. Ensure the corrected text is accurate and readable. Please make sure to give out the full text without shortening it." + }, + { + "role": "user", + "content": "I remeber the day I first learnt about the importnce of spellchek. It was an eye-opener for me." + }, + { + "role": "assistant", + "content": "I remember the day I first learned about the importance of spellcheck. It was an eye-opener for me." + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 11, "fields": { - "name": "PREPROCESSING_PUNCTUATION", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text analysis with a focus on grammatical accuracy, specifically punctuation and comma usage. Your task is to identify any punctuation or comma errors in the provided text and correct them. Ensure the corrected text is accurate, readable, and follows standard punctuation rules. Please make sure to give out the full text without shortening it." - }, - { - "role": "user", - "content": "Despite the rainy weather many people attended the outdoor concert, which, was surprising. The band played hit after hit, and the crowd's enthusiasm, was infectious even the most reserved attendees found themselves dancing." - }, - { - "role": "assistant", - "content": "Despite the rainy weather, many people attended the outdoor concert, which was surprising. The band played hit after hit, and the crowd's enthusiasm was infectious; even the most reserved attendees found themselves dancing." - } - ] + "name": "PREPROCESSING_PUNCTUATION", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text analysis with a focus on grammatical accuracy, specifically punctuation and comma usage. Your task is to identify any punctuation or comma errors in the provided text and correct them. Ensure the corrected text is accurate, readable, and follows standard punctuation rules. Please make sure to give out the full text without shortening it." + }, + { + "role": "user", + "content": "Despite the rainy weather many people attended the outdoor concert, which, was surprising. The band played hit after hit, and the crowd's enthusiasm, was infectious even the most reserved attendees found themselves dancing." + }, + { + "role": "assistant", + "content": "Despite the rainy weather, many people attended the outdoor concert, which was surprising. The band played hit after hit, and the crowd's enthusiasm was infectious; even the most reserved attendees found themselves dancing." + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 12, "fields": { - "name": "PREPROCESSING_IDENTIFY_TIMESTAMPS", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text analysis. Your task is to identify and extract any timestamps (specific dates, months, years, recognized holidays, timeframes like '12 weeks later', or periods between specific dates) mentioned in the context of an individual experiencing symptoms or being diagnosed with an illness. Highlight these timestamps within the text by surrounding them with $$$ symbols. Ensure the full text is presented without any omissions, and only the timestamps are highlighted in this manner." - }, - { - "role": "user", - "content": "I started feeling unwell around the middle of March 2021. The symptoms were quite severe by the 20th of March, which is when I decided to get tested. The test results came back positive for Covid-19 on March 22nd, 2021." - }, - { - "role": "assistant", - "content": "I started feeling unwell around the middle of $$$March 2021$$$. The symptoms were quite severe by the $$$20th of March$$$, which is when I decided to get tested. The test results came back positive for Covid-19 on $$$March 22nd, 2021$$$." - }, - { - "role": "user", - "content": "I started feeling unusually fatigued right before Thanksgiving 2020. The fatigue worsened over the holiday, and by the following Monday, I had developed a fever. I was tested for Covid-19 two days later and received a positive result on November 30th, 2020." - }, - { - "role": "assistant", - "content": "I started feeling unusually fatigued right before $$$Thanksgiving 2020$$$. The fatigue worsened over the holiday, and by the following Monday, I had developed a fever. I was tested for Covid-19 $$$two days later$$$ and received a positive result on $$$November 30th, 2020$$$." - } - ] + "name": "PREPROCESSING_TIME_IDENTIFICATION", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text analysis. Your task is to identify timestamps of events in the given text. Those can be: specific dates, months, years, recognized holidays, relative dates like '12 weeks later' or 'as the week progressed'. Highlight these timestamps within the text by surrounding them with $$$ symbols. Return the full text with the highlighted time specifications." + }, + { + "role": "user", + "content": "I started feeling unwell around the middle of March 2021. The symptoms were quite severe by the 20th of March, which is when I decided to get tested. The test results came back positive for Covid-19 on March 22nd, 2021." + }, + { + "role": "assistant", + "content": "I started feeling unwell around the $$$middle of March 2021$$$. The symptoms were quite severe by the $$$20th of March$$$, which is when I decided to get tested. The test results came back positive for Covid-19 on $$$March 22nd, 2021$$$." + }, + { + "role": "user", + "content": "I started feeling unusually fatigued right before Thanksgiving 2020. The fatigue worsened over the holiday, and by the following Monday, I had developed a fever. I was tested for Covid-19 two days later and received a positive result on November 30th, 2020." + }, + { + "role": "assistant", + "content": "I started feeling unusually fatigued right before $$$Thanksgiving 2020$$$. The fatigue worsened over the holiday, and by the $$$following Monday$$$, I had developed a fever. I was tested for Covid-19 $$$two days later$$$ and received a positive result on $$$November 30th, 2020$$$." + }, + { + "role": "user", + "content": "Two days after new years eve, I started feeling sick. In the following week my symptoms worsened." + }, + { + "role": "assistant", + "content": "$$$Two days after$$$ $$$new years eve$$$, I started feeling sick. $$$In the following week$$$ my symptoms worsened." + }, + { + "role": "user", + "content": "I was tested positive for Covid-19 on by birthday, the 12th of June. The day before I already felt light-headed. Two weeks after my birthday, I was admitted to the hospital." + }, + { + "role": "assistant", + "content": "I was tested positive for Covid-19 on by birthday, $$$the 12th of June$$$. $$$The day before$$$ I already felt light-headed. $$$Two weeks after my birthday$$$, I was admitted to the hospital." + }, + { + "role": "user", + "content": "Over the course of the next few days, my symptoms progressed, and I started experiencing high fever. After two weeks of isolation, I finally started feeling better." + }, + { + "role": "assistant", + "content": "$$$Over the course of the next few days$$$, my symptoms progressed, and I started experiencing high fever. $$$After two weeks$$$ of isolation, I finally started feeling better." + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 13, "fields": { - "name": "PREPROCESSING_TRANSFORM_TIMESTAMPS", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text analysis and date formatting. Your task is to identify any timestamps related to when an individual experienced symptoms or was diagnosed with an illness. Convert and present these timestamps in the specific format of YYYY/MM/DD. All relevant time specifications are already highlighted with $$$ $$$. To guarantee the completeness of the date you must make assumptions about the year, month and day based on the context. If the time specification is a duration, based on context, you must make assumptions about the start date of the duration. If there is no information about the year, month or day, you are allowed to assume the current year, month or day. Ensure the full text is presented without any omissions, and try to transform every timestamps as concrete as possible. Please make sure to give out the full text without shortening it." - }, - { - "role": "user", - "content": "I noticed the first symptoms shortly after my birthday in $$$April$$$, and exactly $$$12 weeks later$$$, my condition had deteriorated significantly. I was officially diagnosed with Lyme disease on $$$August 7th$$$. In $$$early 2025$$$, it will be gone!" - }, - { - "role": "assistant", - "content": "I noticed the first symptoms shortly after my birthday on 2024/04/01, and exactly 12 weeks later, on 2024/06/24, my condition had deteriorated significantly. I was officially diagnosed with Lyme disease on 2024/08/07. It will be gone on 2025/01/01!" - }, - { - "role": "user", - "content": "During the period between $$$the 01.02 and the 03.02$$$, I felt unusually tired, but I thought it was just stress. However, after attending a large event $$$two weeks later$$$, I developed a fever and was tested positive for the flu." - }, - { - "role": "assistant", - "content": "During the period between 2024/02/01 and 2024/02/03, I felt unusually tired, but I thought it was just stress. However, after attending a large event two weeks later, on 2024/02/17, I developed a fever and was tested positive for the flu." - } - ] + "name": "PREPROCESSING_TRANSFORM_TIMESTAMPS", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text analysis and date formatting. Convert these timestamps in the specific format of YYYY/MM/DD. Some relevant time specifications are highlighted with $$$ symbols in the text, focus on those. To guarantee the completeness of the date you must make assumptions about the year, month and day based on the context. If the time specification is a duration, based on context, you must make assumptions about the start date of the duration. If there is no information about the year, month or day, you are allowed to assume the current year, month or day. Ensure the full text is presented without any omissions and transform every timestamps as concrete as possible. Please make sure to give out the full text without shortening it." + }, + { + "role": "user", + "content": "I noticed the first symptoms shortly after my birthday in $$$April$$$, and exactly $$$12 weeks later$$$, my condition had deteriorated significantly. I was officially diagnosed with Lyme disease on $$$August 7th$$$. In $$$early 2025$$$, it will be gone!" + }, + { + "role": "assistant", + "content": "I noticed the first symptoms shortly after my birthday on 2024/04/01, and exactly 12 weeks later, on 2024/06/24, my condition had deteriorated significantly. I was officially diagnosed with Lyme disease on 2024/08/07. It will be gone on 2025/01/01!" + }, + { + "role": "user", + "content": "During the period between $$$the 01.02 and the 03.02$$$, I felt unusually tired, but I thought it was just stress. However, after attending a large event $$$two weeks later$$$, I developed a fever and was tested positive for the flu." + }, + { + "role": "assistant", + "content": "During the period between 2024/02/01 and 2024/02/03, I felt unusually tired, but I thought it was just stress. However, after attending a large event two weeks later, on 2024/02/17, I developed a fever and was tested positive for the flu." + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 14, "fields": { - "name": "PREPROCESSING_TIME_CALCULATION", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text analysis and date calculations. Your task is to identify timestamps related to health events or diagnoses and convert these into concrete dates in the format of YYYY/MM/DD. For relative timestamps (like 'a few weeks after' or 'months before'), calculate the exact dates based on provided or assumed known dates. Ensure the text is complete without omission, with all relevant timestamps accurately transformed to the specified format. Please make sure to give out the full text without shortening it." - }, - { - "role": "user", - "content": "After experiencing severe headaches starting in mid-$$$March 2022$$$, I went to see a neurologist. The MRI scan scheduled $$$three weeks later$$$ confirmed that I had a benign brain tumor. Post-surgery, I began my recovery phase, which lasted until $$$four months later$$$. During a follow-up visit $$$two months after my recovery$$$, my doctor confirmed that my condition had improved significantly." - }, - { - "role": "assistant", - "content": "After experiencing severe headaches starting on 2022/03/15, I went to see a neurologist. The MRI scan scheduled on 2022/04/05 confirmed that I had a benign brain tumor. Post-surgery, I began my recovery phase, which lasted until 2022/08/05. During a follow-up visit on 2022/10/05, my doctor confirmed that my condition had improved significantly." - }, - { - "role": "user", - "content": "Early July 2020, I started experiencing severe coughing and a high fever. It turned out I had contracted Covid-19. And in $$$early August$$$ I had lost my sense of taste." - }, - { - "role": "assistant", - "content": "On the 2020/06/01, I started experiencing severe coughing and a high fever. It turned out I had contracted Covid-19. And on 2020/08/01 I had lost my sense of taste." - } - ] + "name": "PREPROCESSING_TIME_RELATIVE", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text analysis and temporal relations. Your task is to convert relative time information (like 'a few weeks after' or 'in the following days') into concrete dates in the format of YYYY/MM/DD. For relative timestamps like 'two days after my birthday' or 'the day before that', calculate the exact dates based on other dates in the text. If this information is missing, make reasonable assumptions. Some relevant time specifications are highlighted with $$$ symbols in the text, focus on those. Return the full text with the transformed timestamps." + }, + { + "role": "user", + "content": "After experiencing severe headaches starting on 2022/03/15$, I went to see a neurologist. The MRI scan scheduled $$$three weeks later$$$ confirmed that I had a benign brain tumor. Post-surgery, I began my recovery phase, which lasted until $$$four months later$$$. During a follow-up visit $$$two weeks after my recovery$$$, my doctor confirmed that my condition had improved significantly." + }, + { + "role": "assistant", + "content": "After experiencing severe headaches starting on 2022/03/15, I went to see a neurologist. The MRI scan scheduled on 2022/04/05 confirmed that I had a benign brain tumor. Post-surgery, I began my recovery phase, which lasted until 2022/08/05. During a follow-up visit on 2022/08/19, my doctor confirmed that my condition had improved significantly." + }, + { + "role": "user", + "content": "On the 2020/01/06, I started experiencing severe coughing and a high fever. It turned out I had contracted Covid-19. And in $$$the following weeks$$$ I had lost my sense of taste." + }, + { + "role": "assistant", + "content": "On the 2020/06/01, I started experiencing severe coughing and a high fever. It turned out I had contracted Covid-19. And during 2020/06/01 and 2020/06/15 I lost my sense of taste." + }, + { + "role": "user", + "content": "I was diagnosed with diabetes on 2023/05/01. $$$A few days later$$$, I began experiencing extreme fatigue and dizziness." + }, + { + "role": "assistant", + "content": "I was diagnosed with diabetes on 2023/05/01. On 2023/05/04, I began experiencing extreme fatigue and dizziness." + }, + { + "role": "user", + "content": "I started chemotherapy on 2022/09/15. $$$Two weeks after the treatment$$$, my hair started falling out." + }, + { + "role": "assistant", + "content": "I started chemotherapy on 2022/09/15. On 2022/09/29, my hair started falling out." + }, + { + "role": "user", + "content": "I had surgery to remove a tumor on 2023/03/10. $$$In the following week$$$, I developed an infection at the surgical site." + }, + { + "role": "assistant", + "content": "I had surgery to remove a tumor on 2023/03/10. Between 2023/03/10 and 2023/0317, I developed an infection at the surgical site." + }, + { + "role": "user", + "content": "I began taking new medication for my condition on 2022/07/01. $$$A month after starting the medication$$$, I noticed significant improvements in my symptoms." + }, + { + "role": "assistant", + "content": "I began taking new medication for my condition on 2022/07/01. On 2022/08/01, I noticed significant improvements in my symptoms." + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 15, "fields": { - "name": "PREPROCESSING_TIME_INTERPRETATION", - "category": "few-shot", - "text": [ - { - "role": "system", - "content": "You are an expert in text analysis with a specialization in date formatting and interpretation. Your task is to transform general time references related to health events or any other context into specific dates in the format of YYYY/MM/DD. Specifically, convert 'early [month]' to the 1st of the month, 'mid [month]' to the 15th of the month, and 'end of [month]' to the last day of the month (use 30 for April, June, September, and November; 31 for January, March, May, July, August, October, and December; and 28 or 29 for February, depending on leap years). If the year is not mentioned, assume the current year. Provide the full text without omission, ensuring all general time references are accurately transformed into the specified format. Example transformations: 'early January 2020' becomes 2020/01/01, 'mid January 2020' becomes 2020/01/15, and 'end of January 2020' becomes 2020/01/31." - }, - { - "role": "user", - "content": "In early January, I noticed a persistent cough. By mid January, the cough had worsened, leading me to seek medical advice. A specialist finally saw me at the end of January, diagnosing me with a chronic condition." - }, - { - "role": "assistant", - "content": "In 2020/01/01, I noticed a persistent cough. By 2020/01/15, the cough had worsened, leading me to seek medical advice. A specialist finally saw me at 2020/01/31, diagnosing me with a chronic condition." - }, - { - "role": "user", - "content": "In the middle of 2023 I got a stroke. By the end of the year I had recovered. But in early 2024 I had a relapse." - }, - { - "role": "assistant", - "content": "On 2023/06/01, I got a stroke. By 2023/12/01, I had recovered, On 2024/01/01, I had a relapse." - } - ] + "name": "PREPROCESSING_TIME_GENERAL", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text analysis and date formatting. Your task is to transform unspecific time references into specific dates in the format of YYYY/MM/DD. Some relevant time specifications are highlighted with $$$ symbols in the text, focus on those. Specifically, convert 'early [month]' to the 1st of the month, 'mid [month]' to the 15th of the month, and 'end of [month]' to the last day of the month (use 30 for April, June, September, and November; 31 for January, March, May, July, August, October, and December; and 28 or 29 for February, depending on leap years). If the year is not mentioned, assume the current year. Return the full text, with the transformed timestamps." + }, + { + "role": "user", + "content": "$$$In early January$$$, I noticed a persistent cough. By $$$mid January$$$, the cough had worsened, leading me to seek medical advice. A specialist finally saw me $$$at the end of January$$$, diagnosing me with a chronic condition." + }, + { + "role": "assistant", + "content": "In 2020/01/01, I noticed a persistent cough. By 2020/01/15, the cough had worsened, leading me to seek medical advice. A specialist finally saw me at 2020/01/31, diagnosing me with a chronic condition." + }, + { + "role": "user", + "content": "In the $$$middle of 2023$$$ I got a stroke. By the $$$end of the year$$$ I had recovered. But in $$$early 2024$$$ I had a relapse." + }, + { + "role": "assistant", + "content": "On 2023/06/01, I got a stroke. By 2023/12/01, I had recovered. On 2024/01/01, I had a relapse." + }, + { + "role": "user", + "content": "I plan to start my new fitness routine in $$$early February 2024$$$. By $$$mid February$$$, I hope to see some initial results. By the $$$end of February$$$, I want to have a consistent habit." + }, + { + "role": "assistant", + "content": "I plan to start my new fitness routine on 2024/02/01. By 2024/02/15, I hope to see some initial results. By 2024/02/29, I want to have a consistent habit." + }, + { + "role": "user", + "content": "I usually experience seasonal allergies in $$$early April$$$. By $$$mid April$$$, they become severe, and by the $$$end of May$$$, I need to take stronger medication." + }, + { + "role": "assistant", + "content": "I usually experience seasonal allergies on 2024/04/01. By 2024/04/15, they become severe, and by 2024/05/31, I need to take stronger medication." + }, + { + "role": "user", + "content": "I started feeling unwell around the $$$middle of March 2021$$$. The symptoms were quite severe by the $$$20th of March$$$, which is when I decided to get tested. The test results came back positive for Covid-19 on $$$March 22nd, 2021$$$." + }, + { + "role": "assistant", + "content": "I started feeling unwell around the 2021/03/15. The symptoms were quite severe by the 2021/03/20, which is when I decided to get tested. The test results came back positive for Covid-19 on 2021/03/22." + } + ] } - }, - { +}, +{ "model": "extraction.prompt", "pk": 16, "fields": { - "name": "CREATE_PATIENT_JOURNEY", - "category": "zero-shot", - "text": [ - { - "role": "user", - "content": "Please outline the course of your Covid-19 infection, what you did (and when you did that) because of it and which doctors you may consulted. Please give some information about the time, in a few cases directly as a date and in the other as something in the lines of 'in the next days', 'the week after that' or similar. Give your outline as a continuous text. Also include if you later went for getting a vaccine and if so, how often. You don't have to include deals about who you are. Please include 100 to 400 words, but not more than 400." - } - ] + "name": "CREATE_PATIENT_JOURNEY", + "category": "zero-shot", + "text": [ + { + "role": "user", + "content": "Please outline the course of your Covid-19 infection, what you did (and when you did that) because of it and which doctors you may consulted. Please give some information about the time, in a few cases directly as a date and in the other as something in the lines of 'in the next days', 'the week after that' or similar. Give your outline as a continuous text. Also include if you later went for getting a vaccine and if so, how often. You don't have to include deals about who you are. Please include 100 to 400 words, but not more than 400." + } + ] + } +}, +{ + "model": "extraction.prompt", + "pk": 17, + "fields": { + "name": "PREPROCESSING_TIME_HOLIDAYS", + "category": "few shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text analysis and date formatting. Your task is to identify time information that is related to holidays and to convert them into concrete timestamps. Some relevant time specifications are highlighted with $$$ symbols in the text, focus on those. There are also time specifications highlighted, that are not holidays, ignore them. If no information about the year is given, assume the current year. Use the format YYYY/MM/DD. Only transform the time information that is related to holidays. Keep all other time information highlighted." + }, + { + "role": "user", + "content": "$$$Two days after$$$ $$$new years eve$$$, I started feeling sick." + }, + { + "role": "assistant", + "content": "$$$Two days after$$$ 2023/12/31, I started feeling sick." + }, + { + "role": "user", + "content": "I started feeling unusually fatigued right before $$$Thanksgiving 2020$$$." + }, + { + "role": "assistant", + "content": "I started feeling unusually fatigued right before 2020/11/26." + }, + { + "role": "assistant", + "content": "I always feel excited on 2023/10/31 and then exhausted the next day." + }, + { + "role": "user", + "content": "I visited my grandparents $$$two weeks before$$$ $$$Christmas$$$ last year." + }, + { + "role": "assistant", + "content": "I visited my grandparents $$$two weeks before$$$ 2022/12/25 last year." + }, + { + "role": "user", + "content": "We planned a big family reunion for $$$Easter$$$, but it got postponed." + }, + { + "role": "assistant", + "content": "We planned a big family reunion for 2023/04/09, but it got postponed." + }, + { + "role": "user", + "content": "My annual check-up is always scheduled for $$$mid June$$$, just after $$$Flag Day$$$." + }, + { + "role": "assistant", + "content": "My annual check-up is always scheduled for $$$mid June$$$, just after 2023/06/14." + } + ] + } +}, +{ + "model": "extraction.prompt", + "pk": 18, + "fields": { + "name": "PREPROCESSING_TIME_PROPAGATE", + "category": "few-shot", + "text": [ + { + "role": "system", + "content": "You are an expert in text understanding and information extraction. Your task is to propagate temporal information throughout a given text. For each sentence, append the latest known timestamp in the format --YYYY/MM/DD-- before the period. Instructions:1. Identify the most recent date mentioned up to and including the current sentence. This includes relative dates like 'after two weeks' or 'in the following das'. Translate these specifications into concrete dates, if you encounter them. 2. Only if no new temporal information is given, reuse the added timestamp from the previous sentence. 3.Append this date at the end of the current sentence before the period." + }, + { + "role": "user", + "content": "0: After experiencing the first symptoms of Covid-19 on 2020/09/13, I isolated myself at home.\n1: My symptoms started with a mild fever and worsened over the following days.\n2: By 2020/09/15, I developed difficulty breathing and consulted a doctor via telemedicine.\n3: The doctor advised me to monitor my symptoms closely and prescribed medications." + }, + { + "role": "assistant", + "content": "0: After experiencing the first symptoms of Covid-19 on 2020/09/13, I isolated myself at home--2020/09/13--.1: My symptoms started with a mild fever and worsened over the following days--2020/09/13--.2: By 2020/09/15, I developed difficulty breathing and consulted a doctor via telemedicine--2020/09/15--.3: The doctor advised me to monitor my symptoms closely and prescribed medications--2020/09/15--." + }, + { + "role": "user", + "content": "15: After a week in the hospital, my breathing got better, and they let me go home on 2022/01/25. 16: I was super happy to be back in my own bed, but I still felt exhausted." + }, + { + "role": "assistant", + "content": "15: After a week in the hospital, my breathing got better, and they let me go home on 2022/01/25--2022/01/25--.16: I was super happy to be back in my own bed, but I still felt exhausted--2022/01/25--." + } + ] } - } +} ] diff --git a/tracex_project/extraction/forms.py b/tracex_project/extraction/forms.py index cdc656f4..7b3cd92e 100644 --- a/tracex_project/extraction/forms.py +++ b/tracex_project/extraction/forms.py @@ -1,4 +1,6 @@ """Implementation of forms for the extraction app.""" +from typing import List, Tuple + from django import forms from extraction.models import PatientJourney @@ -10,7 +12,18 @@ class JourneyUploadForm(forms.ModelForm): """Form for uploading your own patient journey.""" class Meta: - """Metaclass for JourneyForm, provides additional parameters for the form.""" + """ + Configuration class for the JourneyUploadForm. + + This class specifies that the form is associated with the PatientJourney model and that it has a field for the + 'name' attribute of the PatientJourney model. It also sets the help text and widget for the 'name' field. + + Attributes: + model: The model that this form is associated with. + fields: The fields that this form includes. + help_texts: Help texts for the form fields. + widgets: Widgets to use for the form fields. + """ model = PatientJourney fields = ["name"] @@ -33,7 +46,7 @@ class Meta: class JourneySelectForm(forms.Form): - """Form for selecting ground truth patient journey.""" + """Django form for selecting a patient journey from available choices in the database.""" selected_patient_journey = forms.ChoiceField( choices=[], @@ -41,26 +54,29 @@ class JourneySelectForm(forms.Form): ) def __init__(self, *args, **kwargs): - """Initializes the PatientJourneySelectForm.""" + """Initializes the form and sets the choices for the 'selected_patient_journey' field.""" super().__init__(*args, **kwargs) self.fields[ "selected_patient_journey" ].choices = self.get_patient_journey_choices() @staticmethod - def get_patient_journey_choices(): - """Retrieves the available patient journey choices from the database.""" + def get_patient_journey_choices() -> List[Tuple[str, str]]: + """Returns a list of tuples containing the names of all patient journeys from the database.""" patient_journeys = PatientJourney.manager.all() - choices = [(pj.name, pj.name) for pj in patient_journeys] + choices = [ + (patient_journey.name, patient_journey.name) + for patient_journey in patient_journeys + ] return choices class FilterForm(BaseEventForm): - """Form for selecting filter for extraction result""" + """Django form for selecting and validating extraction result filters.""" def clean(self): - """Validate form data.""" + """Validates the form data and checks module compatibility with the chosen activity key.""" cleaned_data = super().clean() modules = cleaned_data.get("modules_optional") + cleaned_data.get( "modules_required" @@ -83,13 +99,13 @@ def clean(self): f"For the chosen activity key the module {error_module} has to run.\ Select this module or change the activity key.", ) - self.__validate_modules_optional(modules) + self.__validate_metrics_analyzer_dependency(modules) return cleaned_data @staticmethod - def __validate_modules_optional(modules): - """Validate optional modules""" + def __validate_metrics_analyzer_dependency(modules): + """Checks if 'metrics_analyzer' is selected without 'time_extraction' and raises a validation error if so.""" if "metrics_analyzer" in modules and "time_extraction" not in modules: raise forms.ValidationError( "Metrics Analyzer depends on Time Extractor. Please select both or deselect Metrics Analyzer.", @@ -98,9 +114,10 @@ def __validate_modules_optional(modules): class ResultForm(BaseEventForm): - """Form for displaying results of event extraction.""" + """Django form for initializing and displaying extraction results.""" def __init__(self, *args, **kwargs): + """Initializes the form with event types, locations, and selected modules, and disables module selection.""" super().__init__(*args, **kwargs) initial = kwargs.pop("initial", None) diff --git a/tracex_project/extraction/logic/module.py b/tracex_project/extraction/logic/module.py index 11249403..f739c81c 100644 --- a/tracex_project/extraction/logic/module.py +++ b/tracex_project/extraction/logic/module.py @@ -1,5 +1,6 @@ """Module providing the abstract base class for all modules.""" from abc import ABC +from typing import Dict, List, Optional import pandas as pd @@ -27,16 +28,18 @@ def execute( self, _input, *, - patient_journey=None, - patient_journey_sentences=None, - cohort=None, + patient_journey: Optional[str] = None, + patient_journey_sentences: Optional[List[str]] = None, + cohort: Optional[Dict[str, str]] = None, ) -> pd.DataFrame: """ Executes the logic of the module. Override this to define your own module. - Every module receives the patient journey as parameter which is set to the instance variable of each module. - This method should always return a dataframe, so other modules can use the result. + + Keyword arguments: + _input -- Any additional input to the module. + patient_journey -- The patient journey as text. + patient_journey_sentences -- The same patient journey as a list of sentences. """ - print(f"Starting Module {self.name}.") self.patient_journey = patient_journey self.patient_journey_sentences = patient_journey_sentences self.cohort = cohort @@ -44,14 +47,19 @@ def execute( return pd.DataFrame() def execute_and_save( - self, _input, *, patient_journey=None, patient_journey_sentences=None + self, + _input, + *, + patient_journey: Optional[str] = None, + patient_journey_sentences: Optional[list[str]] = None, ) -> int: """ Executes the logic of the module and saves the result to the database. Override this to define your own module. - Every module receives the patient journey as parameter which is set to the instance variable of each module. - This method should always save the result to the database, and return the id. + + Keyword arguments: + patient_journey -- The patient journey as text. + patient_journey_sentences -- The same patient journey as a list of sentences. """ - print(f"Starting Module {self.name}.") self.patient_journey = patient_journey self.patient_journey_sentences = patient_journey_sentences diff --git a/tracex_project/extraction/logic/modules/module_activity_labeler.py b/tracex_project/extraction/logic/modules/module_activity_labeler.py index 43ef163a..405fdf00 100644 --- a/tracex_project/extraction/logic/modules/module_activity_labeler.py +++ b/tracex_project/extraction/logic/modules/module_activity_labeler.py @@ -1,5 +1,6 @@ """This is the module that extracts the activity labels from the patient journey.""" from pathlib import Path +from typing import List, Optional import pandas as pd from django.conf import settings @@ -11,7 +12,7 @@ class ActivityLabeler(Module): """ - This is the module that extracts the activity labels from the patient journey. + This is the module that starts the pipeline with structuring the patient journey in activities. """ def __init__(self): @@ -23,10 +24,10 @@ def __init__(self): def execute( self, _input=None, - patient_journey=None, - patient_journey_sentences=None, + patient_journey: str = None, + patient_journey_sentences: List[str] = None, cohort=None, - ): + ) -> pd.DataFrame: """ Extracts the activity labels from the patient journey with the following steps: 1. Number the patient journey sentences to enable selecting a specific range of sentences. @@ -39,19 +40,19 @@ def execute( cohort=cohort, ) - condition = cohort["condition"] if cohort is not None else None + condition = getattr(cohort, "condition", None) - patient_journey_numbered = self.__number_patient_journey_sentences( + patient_journey_numbered: str = self.__number_patient_journey_sentences( patient_journey_sentences ) - activity_labels = self.__extract_activities(patient_journey_numbered, condition) + activity_labels: pd.DataFrame = self.__extract_activities(patient_journey_numbered, condition) return activity_labels @staticmethod - def __number_patient_journey_sentences(patient_journey_sentences): + def __number_patient_journey_sentences(patient_journey_sentences: List[str]) -> str: """ - Number the patient journey sentences in the format: + Number the patient journey sentences as one String in the format: 1: ... 2: ... And so on. @@ -64,7 +65,7 @@ def __number_patient_journey_sentences(patient_journey_sentences): return patient_journey_numbered @staticmethod - def __extract_activities(patient_journey_numbered, condition): + def __extract_activities(patient_journey_numbered: str, condition: Optional[str]) -> pd.DataFrame: """ Converts a patient journey, where every sentence is numbered, to a DataFrame with the activity labels by extracting the activity labels from the patient journey. @@ -72,16 +73,11 @@ def __extract_activities(patient_journey_numbered, condition): column_name = "activity" messages = Prompt.objects.get(name="TEXT_TO_ACTIVITY_MESSAGES").text + user_message: List[str] = patient_journey_numbered if condition is not None: - messages.append( - { - "role": "user", - "content": f"Focus on those events that are related to the course of the disease of {condition}." - f"\n\n{patient_journey_numbered}", - } - ) - else: - messages.append({"role": "user", "content": patient_journey_numbered}) + user_message = f"Focus on those events that are related to the course of the disease of {condition}.\n\n\ + {user_message}" + messages.append({"role": "user", "content": user_message}) activity_labels = u.query_gpt(messages).split("\n") df = pd.DataFrame(activity_labels, columns=[column_name]) df[["activity", "sentence_id"]] = df["activity"].str.split(" #", expand=True) diff --git a/tracex_project/extraction/logic/modules/module_cohort_tagger.py b/tracex_project/extraction/logic/modules/module_cohort_tagger.py index e86ae26c..db730491 100644 --- a/tracex_project/extraction/logic/modules/module_cohort_tagger.py +++ b/tracex_project/extraction/logic/modules/module_cohort_tagger.py @@ -1,5 +1,6 @@ """This is the module that cohort tags from the patient journey.""" from pathlib import Path +from typing import Dict, List, Optional from django.conf import settings from extraction.models import Prompt @@ -10,7 +11,8 @@ class CohortTagger(Module): """ - This is the module that extracts the cohort tags from the patient journey. + This is the module that extracts the cohort information from the patient journey. + The cohort tags are condition, age, biological sex, origin and preexisting condition. """ def __init__(self): @@ -20,25 +22,26 @@ def __init__(self): @log_execution_time(Path(settings.BASE_DIR / "tracex/logs/execution_time.log")) def execute_and_save( - self, df, patient_journey=None, patient_journey_sentences=None - ): - """ - Extracts the cohort from the patient journey and saves the result in the database. - """ + self, + df=None, + patient_journey: str = None, + patient_journey_sentences: List[str] = None, + ) -> Optional[Dict[str, str]]: + """Extracts the cohort information from the patient journey and saves the result in the database.""" super().execute_and_save( df, patient_journey=patient_journey, - patient_journey_sentences=patient_journey_sentences + patient_journey_sentences=patient_journey_sentences, ) - cohort_tags = self.__extract_cohort_tags(patient_journey) - cohort_dict = self.__prepare_cohort_dict(cohort_tags) + cohort_dict = self.__extract_cohort_tags(patient_journey) + cohort_dict = self.__remove_placeholder(cohort_dict) return cohort_dict @staticmethod - def __extract_cohort_tags(patient_journey): - """Extracts information about condition, gender, age, origin and preexisting condition.""" + def __extract_cohort_tags(patient_journey) -> Dict[str, str]: + """Extracts information about condition, sex, age, origin and preexisting condition.""" cohort_data = {} for message_list in Prompt.objects.get(name="COHORT_TAG_MESSAGES").text: messages = message_list[1:] @@ -51,7 +54,7 @@ def __extract_cohort_tags(patient_journey): return cohort_data @staticmethod - def __prepare_cohort_dict(cohort_data): + def __remove_placeholder(cohort_data) -> Optional[Dict[str, str]]: """Prepares the cohort tags dictionary for saving into database.""" cohort_dict = { key: value for key, value in cohort_data.items() if value != "N/A" diff --git a/tracex_project/extraction/logic/modules/module_event_type_classifier.py b/tracex_project/extraction/logic/modules/module_event_type_classifier.py index 189dc091..b0b1ccab 100644 --- a/tracex_project/extraction/logic/modules/module_event_type_classifier.py +++ b/tracex_project/extraction/logic/modules/module_event_type_classifier.py @@ -1,6 +1,7 @@ """This module classifies the event types of the activities.""" from pathlib import Path from django.conf import settings +import pandas as pd from extraction.logic.module import Module from extraction.models import Prompt @@ -23,9 +24,13 @@ def __init__(self): @log_execution_time(Path(settings.BASE_DIR / "tracex/logs/execution_time.log")) def execute( - self, df, patient_journey=None, patient_journey_sentences=None, cohort=None - ): - """Classifies the event types for the corresponding activity labels from a patient journey.""" + self, + df: pd.DataFrame, + patient_journey=None, + patient_journey_sentences=None, + cohort=None, + ) -> pd.DataFrame: + """Classifies corresponding event types for all activity labels in a dataframe.""" super().execute( df, patient_journey=patient_journey, @@ -33,8 +38,7 @@ def execute( cohort=cohort, ) - column_name = "event_type" - df[column_name] = df["activity"].apply(self.__classify_event_type) + df["event_type"] = df["activity"].apply(self.__classify_event_type) return df diff --git a/tracex_project/extraction/logic/modules/module_location_extractor.py b/tracex_project/extraction/logic/modules/module_location_extractor.py index 13e1e7d9..da100c8d 100644 --- a/tracex_project/extraction/logic/modules/module_location_extractor.py +++ b/tracex_project/extraction/logic/modules/module_location_extractor.py @@ -1,6 +1,7 @@ """This module that extracts the location information for each activity.""" from pathlib import Path from django.conf import settings +import pandas as pd from extraction.logic.module import Module from extraction.models import Prompt @@ -21,9 +22,13 @@ def __init__(self): @log_execution_time(Path(settings.BASE_DIR / "tracex/logs/execution_time.log")) def execute( - self, df, patient_journey=None, patient_journey_sentences=None, cohort=None - ): - """Extracts the location information for each activity.""" + self, + df: pd.DataFrame, + patient_journey=None, + patient_journey_sentences=None, + cohort=None, + ) -> pd.DataFrame: + """Extracts the location information for each activity in a dataframe.""" super().execute( df, patient_journey=patient_journey, @@ -31,13 +36,12 @@ def execute( cohort=cohort, ) - column_name = "attribute_location" - df[column_name] = df["activity"].apply(self.__classify_location) + df["attribute_location"] = df["activity"].apply(self.__classify_location) return df @staticmethod - def __classify_location(activity_label): + def __classify_location(activity_label: str) -> str: """Classify the location for a given activity.""" messages = Prompt.objects.get(name="LOCATION_MESSAGES").text messages.append({"role": "user", "content": activity_label}) diff --git a/tracex_project/extraction/logic/modules/module_metrics_analyzer.py b/tracex_project/extraction/logic/modules/module_metrics_analyzer.py index df293255..b75ba701 100644 --- a/tracex_project/extraction/logic/modules/module_metrics_analyzer.py +++ b/tracex_project/extraction/logic/modules/module_metrics_analyzer.py @@ -1,5 +1,6 @@ """This module measures the outpupt of the pipeline based on specified metrics.""" from pathlib import Path +from typing import Tuple import pandas as pd from django.conf import settings @@ -26,10 +27,14 @@ def __init__(self): @log_execution_time(Path(settings.BASE_DIR / "tracex/logs/execution_time.log")) def execute( - self, df, patient_journey=None, patient_journey_sentences=None, cohort=None - ): - """Executing the measurement of metrics. The metrics output will be written on disk as a csv file. - The dataframe without the metrics is returned for visualization.""" + self, + df: pd.DataFrame, + patient_journey=None, + patient_journey_sentences=None, + cohort=None, + ) -> pd.DataFrame: + """Measures the output of the pipeline based on specified metrics. + These metrics are 'activity relevance' and 'timestamp correctness'.""" super().execute( df, patient_journey=patient_journey, @@ -56,7 +61,7 @@ def execute( return metrics_df @staticmethod - def __rate_activity_relevance(activity, condition): + def __rate_activity_relevance(activity: str, condition: str | None) -> str: category_mapping = { "No Relevance": 0, "Low Relevance": 1, @@ -86,7 +91,9 @@ def __rate_activity_relevance(activity, condition): return category - def __rate_timestamps_correctness(self, activity, start, end): + def __rate_timestamps_correctness( + self, activity: str, start: pd.DateTime, end: pd.DateTime + ) -> Tuple[str, float]: messages = Prompt.objects.get(name="METRIC_TIMESTAMP_MESSAGES").text messages.append( { diff --git a/tracex_project/extraction/logic/modules/module_patient_journey_preprocessor.py b/tracex_project/extraction/logic/modules/module_patient_journey_preprocessor.py index 8a3d870d..5dd0dffc 100644 --- a/tracex_project/extraction/logic/modules/module_patient_journey_preprocessor.py +++ b/tracex_project/extraction/logic/modules/module_patient_journey_preprocessor.py @@ -22,7 +22,7 @@ def __init__(self): @log_execution_time(Path(settings.BASE_DIR / "tracex/logs/execution_time.log")) def execute( self, _input=None, patient_journey=None, patient_journey_sentences=None - ): + ) -> str: """Preprocesses the patient input for better data quality.""" super().execute( _input, @@ -54,12 +54,10 @@ def execute( preprocessed_text, "TIME_PROPAGATE" ) - patient_journey_sentences = self.__make_sentences(preprocessed_text) - - return patient_journey_sentences + return preprocessed_text @staticmethod - def __apply_preprocessing_step(text, prompt_name): + def __apply_preprocessing_step(text: str, prompt_name: str) -> str: """Applies a preprocessing step based on the step name.""" messages = Prompt.objects.get(name=f"PREPROCESSING_{prompt_name}").text new_user_message = {"role": "user", "content": text} @@ -67,11 +65,3 @@ def __apply_preprocessing_step(text, prompt_name): preprocessed_text = u.query_gpt(messages) return preprocessed_text - - @staticmethod - def __make_sentences(text): - """Splits the input into a list of its sentences.""" - text = text.replace("\n", " ") - text = text.split(". ") - - return text diff --git a/tracex_project/extraction/logic/modules/module_time_extractor.py b/tracex_project/extraction/logic/modules/module_time_extractor.py index cba73f6a..91a73bbc 100644 --- a/tracex_project/extraction/logic/modules/module_time_extractor.py +++ b/tracex_project/extraction/logic/modules/module_time_extractor.py @@ -1,5 +1,6 @@ """This module extracts the time information from the patient journey.""" from pathlib import Path +from typing import List from django.conf import settings import pandas as pd @@ -22,9 +23,14 @@ def __init__(self): @log_execution_time(Path(settings.BASE_DIR / "tracex/logs/execution_time.log")) def execute( - self, df, patient_journey=None, patient_journey_sentences=None, cohort=None - ): - """This function extracts the time information from the patient journey.""" + self, + df: pd.DataFrame, + patient_journey=None, + patient_journey_sentences: List[str] = None, + cohort=None, + ) -> pd.DataFrame: + """This function extracts the time information from the patient journey. + For each activity label, the start date, end date and duration are extracted.""" super().execute( df, patient_journey=patient_journey, @@ -39,7 +45,7 @@ def execute( return df - def __extract_start_date(self, row): + def __extract_start_date(self, row: pd.Series) -> str: """Extract the start date for a given activity.""" lower, upper = u.get_snippet_bounds( index=(int(row["sentence_id"])), length=len(self.patient_journey_sentences) @@ -59,7 +65,7 @@ def __extract_start_date(self, row): return start - def __extract_end_date(self, row): + def __extract_end_date(self, row: pd.Series) -> str: """Extract the end date for a given activity.""" lower, upper = u.get_snippet_bounds( index=(int(row["sentence_id"])), length=len(self.patient_journey_sentences) @@ -82,7 +88,7 @@ def __extract_end_date(self, row): return end @staticmethod - def __calculate_duration(row): + def __calculate_duration(row: pd.Series) -> str: """Calculate the duration of an activity.""" duration = row["time:end_timestamp"] - row["time:timestamp"] hours, remainder = divmod(duration.total_seconds(), 3600) @@ -91,10 +97,28 @@ def __calculate_duration(row): return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}" @staticmethod - def __post_processing(df): + def __post_processing(df: pd.DataFrame) -> pd.DataFrame: """Fill missing values for dates with default values.""" - def fix_end_dates(row): + def convert_to_datetime(df: pd.DataFrame, column: pd.Series) -> pd.DataFrame: + df[column] = pd.to_datetime( + df[column], format="%Y%m%dT%H%M", errors="coerce" + ) + + return df + + def set_default_date_if_na(df: pd.DataFrame, column: pd.Series) -> pd.DataFrame: + if df[column].isna().all(): + df[column] = df[column].fillna(pd.Timestamp("2020-01-01 00:00")) + + return df + + def fill_missing_values(df: pd.DataFrame, column: pd.Series) -> pd.DataFrame: + df[column] = df[column].ffill().bfill() + + return df + + def fix_end_dates(row: pd.Series) -> pd.Series: if ( row["time:end_timestamp"] is pd.NaT and row["time:timestamp"] is not pd.NaT @@ -103,26 +127,16 @@ def fix_end_dates(row): return row - df["time:timestamp"] = pd.to_datetime( - df["time:timestamp"], format="%Y%m%dT%H%M", errors="coerce" - ) - df["time:end_timestamp"] = pd.to_datetime( - df["time:end_timestamp"], format="%Y%m%dT%H%M", errors="coerce" - ) + df = convert_to_datetime(df, "time:timestamp") + df = convert_to_datetime(df, "time:end_timestamp") - if df["time:timestamp"].isna().all(): - df["time:timestamp"] = df["time:timestamp"].fillna( - pd.Timestamp("2020-01-01 00:00") - ) + df = set_default_date_if_na(df, "time:timestamp") - if df["time:end_timestamp"].isna().all(): - df["time:end_timestamp"] = df["time:end_timestamp"].fillna( - pd.Timestamp("2020-01-01 00:00") - ) + df = df.apply(fix_end_dates, axis=1) - df["time:timestamp"] = df["time:timestamp"].ffill().bfill() - df["time:end_timestamp"] = df["time:end_timestamp"].ffill().bfill() + df = set_default_date_if_na(df, "time:end_timestamp") - df = df.apply(fix_end_dates, axis=1) + df = fill_missing_values(df, "time:timestamp") + df = fill_missing_values(df, "time:end_timestamp") return df diff --git a/tracex_project/extraction/logic/orchestrator.py b/tracex_project/extraction/logic/orchestrator.py index 1cb1ab2f..fd59ed6a 100644 --- a/tracex_project/extraction/logic/orchestrator.py +++ b/tracex_project/extraction/logic/orchestrator.py @@ -1,8 +1,15 @@ -"""Module providing the orchestrator and corresponding configuration, that manages the modules.""" +""" +Module providing the orchestrator and corresponding configuration, that manages the modules. + +Classes: +ExtractionConfiguration -- Dataclass for the configuration of the orchestrator. +Orchestrator -- Singleton class for managing the modules. +""" from dataclasses import dataclass -from typing import Optional, List, Dict +from typing import Any, List, Optional, Dict from django.utils.dateparse import parse_duration from django.core.exceptions import ObjectDoesNotExist +import pandas as pd from extraction.logic.modules import ( Preprocessor, @@ -14,7 +21,7 @@ MetricsAnalyzer, ) from extraction.models import Trace, PatientJourney, Event, Cohort, Metric -from tracex.logic.utils import DataFrameUtilities +from tracex.logic.utils import DataFrameUtilities, Conversion @dataclass @@ -23,6 +30,9 @@ class ExtractionConfiguration: Dataclass for the configuration of the orchestrator. This specifies all modules that can be executed, what event types are used to classify the activity labels, what locations are used to classify the activity labels and what the patient journey is, on which the pipeline is executed. + + Public Methods: + update -- Update the configuration with a dictionary mapping its attributes to new values. """ def __init__( @@ -46,18 +56,36 @@ def __init__( "metrics_analyzer": MetricsAnalyzer, } - def update(self, **kwargs): + def update(self, **kwargs) -> None: """Update the configuration with a dictionary.""" valid_keys = set(vars(self).keys()) for key, value in kwargs.items(): if key in valid_keys: setattr(self, key, value) - else: - print(f"Ignoring unknown key: {key}") class Orchestrator: - """Singleton class for managing the modules.""" + """ + Singleton class for managing the modules. + + Public Methods: + get_instance -- Return the singleton instance of the orchestrator. + reset_instance -- Reset the singleton instance of the orchestrator. + set_configuration -- Set the configuration for the orchestrator instance. + get_configuration -- Return the configuration for the orchestrator instance. + set_data -- Set the data for the orchestrator instance. + get_data -- Return the data for the orchestrator instance. + set_cohort -- Set the cohort for the orchestrator instance. + get_cohort -- Return the cohort for the orchestrator instance. + set_db_objects_id -- Set the database id objects for the orchestrator instance. + get_db_objects_id -- Return the database id objects for the orchestrator instance. + reduce_modules -- Update the modules of the orchestrator instance. + initialize_modules -- Bring the modules into the right order and initialize them. + run -- Run the modules and set default values for modules not executed. + save_results_to_db -- Save the trace to the database. + set_default_values -- Set default values if a specific module was deselected. + update_progress -- Update the progress of the extraction. + """ _instance = None @@ -67,7 +95,7 @@ def __new__(cls, configuration: ExtractionConfiguration = None): return cls._instance - def __init__(self, configuration=None): + def __init__(self, configuration: Optional[ExtractionConfiguration] = None): if configuration is not None: self.configuration = configuration self.data = None @@ -92,7 +120,7 @@ def get_configuration(self): """Return the configuration for the orchestrator instance.""" return self.configuration - def set_data(self, data): + def set_data(self, data: pd.DataFrame) -> None: """Set the data for the orchestrator instance.""" self.data = data @@ -100,7 +128,7 @@ def get_data(self): """Return the data for the orchestrator instance.""" return self.data - def set_cohort(self, cohort): + def set_cohort(self, cohort: Dict[str, str] | None): """Set the cohort for the orchestrator instance.""" self.cohort = cohort @@ -112,17 +140,15 @@ def set_db_objects_id(self, object_name: str, object_id: int): """Set the database id objects for the orchestrator instance.""" self.db_objects_id[object_name] = object_id - def get_db_objects_id(self, object_name): + def get_db_objects_id(self, object_name: str) -> int: """Return the database id objects for the orchestrator instance.""" return self.db_objects_id[object_name] - def update_modules(self, modules_list): - """Update the modules of the orchestrator instance.""" - modules_dictionary = self.get_configuration().modules - updated_modules = { - key: modules_dictionary[key] - for key in modules_dictionary - if key in modules_list + def reduce_modules_to(self, modules: List) -> ExtractionConfiguration: + """Reduce the modules of the orchestrator instance to the modules in the keyword argument.""" + old_modules: Dict[str, Any] = self.get_configuration().modules + updated_modules: Dict[str, Any] = { + key: old_modules[key] for key in old_modules if key in modules } self.get_configuration().update(modules=updated_modules) @@ -134,39 +160,39 @@ def initialize_modules(self): key: self.get_configuration().modules[key]() for key in self.get_configuration().modules } - print("Initialization of modules successful.") return modules - def run(self, view=None): - """Run the modules.""" + def run(self, view=None) -> pd.DataFrame: + """Run the modules and set default values for modules not executed.""" modules = self.initialize_modules() - current_step = 1 + execution_step: int = 1 - patient_journey_sentences = self.get_configuration().patient_journey.split(". ") + patient_journey = self.get_configuration().patient_journey if "preprocessing" in modules: - self.update_progress(view, current_step, "Preprocessing") - patient_journey_sentences = modules["preprocessing"].execute( + self.update_progress(view, execution_step, "Preprocessing") + patient_journey = modules["preprocessing"].execute( patient_journey=self.get_configuration().patient_journey ) - current_step += 1 - patient_journey = ". ".join(patient_journey_sentences) + execution_step += 1 + patient_journey_sentences: List[str] = Conversion.text_to_sentence_list( + patient_journey + ) - if "cohort_tagging" in modules: - self.update_progress(view, current_step, "Cohort Tagger") - self.set_cohort( - modules["cohort_tagging"].execute_and_save( - self.get_data(), - patient_journey=patient_journey, - patient_journey_sentences=patient_journey_sentences, - ) + self.update_progress(view, execution_step, "Cohort Tagger") + self.set_cohort( + modules["cohort_tagging"].execute_and_save( + self.get_data(), + patient_journey=patient_journey, + patient_journey_sentences=patient_journey_sentences, ) - current_step += 1 + ) + execution_step += 1 - for module_name in [ - name for name in modules if name not in ("cohort_tagging", "preprocessing") + for remaining_module_key in [ + key for key in modules if key not in ("cohort_tagging", "preprocessing") ]: - module = modules[module_name] - self.update_progress(view, current_step, module.name) + module = modules[remaining_module_key] + self.update_progress(view, execution_step, module.name) self.set_data( module.execute( self.get_data(), @@ -175,7 +201,7 @@ def run(self, view=None): cohort=self.get_cohort(), ) ) - current_step += 1 + execution_step += 1 if self.get_data() is not None: try: @@ -186,7 +212,7 @@ def run(self, view=None): self.get_data().insert(0, "case:concept:name", latest_id + 1) self.set_default_values() - def save_results_to_db(self): + def save_results_to_db(self) -> None: """Save the trace to the database.""" patient_journey: PatientJourney = PatientJourney.manager.get( pk=self.get_db_objects_id("patient_journey") @@ -225,8 +251,8 @@ def save_results_to_db(self): patient_journey.trace.add(trace) patient_journey.save() - def set_default_values(self): - """Set default values if a specific module was deselected.""" + def set_default_values(self) -> None: + """Set default values for all modules not executed.""" config_modules = self.get_configuration().modules data = self.get_data() @@ -240,21 +266,12 @@ def set_default_values(self): data["activity_relevance"] = None data["timestamp_correctness"] = None data["correctness_confidence"] = None - if "cohort_tagging" not in config_modules: - cohort_default_values = { - "age": None, - "sex": None, - "origin": None, - "condition": None, - "preexisting_condition": None, - } - self.set_cohort(cohort_default_values) - - def update_progress(self, view, current_step, module_name): + + def update_progress(self, view, execution_step: int, module_name: str) -> None: """Update the progress of the extraction.""" if view is not None: percentage = round( - (current_step / (len(self.get_configuration().modules) + 1)) * 100 + (execution_step / (len(self.get_configuration().modules) + 1)) * 100 ) view.request.session["progress"] = percentage view.request.session["status"] = module_name diff --git a/tracex_project/extraction/migrations/0022_remove_cohort_gender_cohort_sex_alter_cohort_age_and_more.py b/tracex_project/extraction/migrations/0022_remove_cohort_gender_cohort_sex_alter_cohort_age_and_more.py deleted file mode 100644 index 0d55171b..00000000 --- a/tracex_project/extraction/migrations/0022_remove_cohort_gender_cohort_sex_alter_cohort_age_and_more.py +++ /dev/null @@ -1,41 +0,0 @@ -# Generated by Django 4.2.7 on 2024-05-17 15:37 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - dependencies = [ - ("extraction", "0021_alter_event_event_type_alter_event_location_and_more"), - ] - - operations = [ - migrations.RemoveField( - model_name="cohort", - name="gender", - ), - migrations.AddField( - model_name="cohort", - name="sex", - field=models.CharField(blank=True, max_length=25, null=True), - ), - migrations.AlterField( - model_name="cohort", - name="age", - field=models.IntegerField(blank=True, null=True), - ), - migrations.AlterField( - model_name="cohort", - name="condition", - field=models.CharField(blank=True, max_length=50, null=True), - ), - migrations.AlterField( - model_name="cohort", - name="origin", - field=models.CharField(blank=True, max_length=50, null=True), - ), - migrations.AlterField( - model_name="cohort", - name="preexisting_condition", - field=models.CharField(blank=True, max_length=100, null=True), - ), - ] diff --git a/tracex_project/extraction/migrations/0023_remove_trace_cohort_cohort_trace.py b/tracex_project/extraction/migrations/0023_remove_trace_cohort_cohort_trace.py deleted file mode 100644 index 56804b57..00000000 --- a/tracex_project/extraction/migrations/0023_remove_trace_cohort_cohort_trace.py +++ /dev/null @@ -1,23 +0,0 @@ -# Generated by Django 5.0.2 on 2024-05-17 17:07 - -import django.db.models.deletion -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('extraction', '0022_remove_cohort_gender_cohort_sex_alter_cohort_age_and_more'), - ] - - operations = [ - migrations.RemoveField( - model_name='trace', - name='cohort', - ), - migrations.AddField( - model_name='cohort', - name='trace', - field=models.OneToOneField(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='cohort', to='extraction.trace'), - ), - ] diff --git a/tracex_project/extraction/models.py b/tracex_project/extraction/models.py index e0e5b76c..0ea01a7f 100644 --- a/tracex_project/extraction/models.py +++ b/tracex_project/extraction/models.py @@ -1,4 +1,4 @@ -"""This module contains the models for the extraction app.""" +"""This module contains the models for the tracex database.""" from django.db import models from tracex.logic.constants import EVENT_TYPES, LOCATIONS @@ -18,8 +18,9 @@ class PatientJourney(models.Model): def __str__(self): return f"{self.name} (id: {self.id})" # pylint: disable=no-member + class Trace(models.Model): - """Model for a single trace, belonging to a patient journey.""" + """Model for the trace of a patient journey.""" patient_journey = models.ForeignKey( PatientJourney, on_delete=models.CASCADE, related_name="trace" @@ -32,7 +33,7 @@ def __str__(self): class Cohort(models.Model): - """Model for the Cohort of a patient journey.""" + """Model for the cohort of a patient journey.""" trace = models.OneToOneField( Trace, on_delete=models.CASCADE, related_name="cohort", null=True @@ -49,7 +50,7 @@ def __str__(self): class Event(models.Model): - """Model for a single event, only relevant in context with other events belonging to the same trace.""" + """Django model representing a single event in a trace.""" trace = models.ForeignKey(Trace, on_delete=models.CASCADE, related_name="events") activity = models.TextField() @@ -66,7 +67,7 @@ def __str__(self): class Prompt(models.Model): - """Model for the prompt to be used in the GPT query.""" + """Django model representing a prompt for a GPT query.""" DEFAULT_NAME = "" DEFAULT_CATEGORY = "zero-shot" @@ -80,7 +81,7 @@ def __str__(self): class Metric(models.Model): - """Model for metrics which are being tracked by the metrics analyzer""" + """Django model representing metrics tracked by the metrics analyzer.""" event = models.OneToOneField( Event, on_delete=models.CASCADE, related_name="metrics" diff --git a/tracex_project/extraction/templates/choose_input_method.html b/tracex_project/extraction/templates/choose_input_method.html index 06477b76..8947848e 100644 --- a/tracex_project/extraction/templates/choose_input_method.html +++ b/tracex_project/extraction/templates/choose_input_method.html @@ -4,27 +4,32 @@ Choose Input Method {% load static %} - + - - -

Choose Input Method

-

You can either upload a patient journey or select an existing patient journey from the database.

-
-
- - - -
-
- - - -
-
- - - +
+ +

Choose Input Method

+
+
+

You can either upload a patient journey or select an existing patient journey from the database.

+
+ + +
+
+ diff --git a/tracex_project/extraction/templates/filter_journey.html b/tracex_project/extraction/templates/filter_journey.html index 7cbf8250..d596dc00 100644 --- a/tracex_project/extraction/templates/filter_journey.html +++ b/tracex_project/extraction/templates/filter_journey.html @@ -24,7 +24,7 @@

Execute Extraction Pipeline

{% else %}

Prepare Extraction Pipeline

-

Here you can set what parts of the pipeline should be run, what should be visibile in the output and how the +

Here you can set what parts of the pipeline should be run, what should be visible in the output and how the output is shown.

{% endif %} @@ -37,10 +37,15 @@

Pipeline modules to run

- +
+
+
@@ -51,8 +56,8 @@

Pipeline modules to run

The Preprocessor brings the Patient Journey in a form that is better understandable for the pipeline. If it is not run, errors are more likely to @@ -64,21 +69,8 @@

Pipeline modules to run

- The Cohort Tagger extracts the disease the patient journey is - about alongside useful case attributes for further filter options. If it is not run, - no case attributes will be extracted. -
-
- -
- -
- The Time Extractor finds Timestamps for the extracted activities. If it's not run, the timestamps will be set to default values. @@ -88,9 +80,9 @@

Pipeline modules to run

-
@@ -187,9 +179,9 @@

Output filters

@@ -198,9 +190,9 @@

Output filters

@@ -209,8 +201,8 @@

Output filters

@@ -219,9 +211,9 @@

Output filters

@@ -230,8 +222,8 @@

Output filters

@@ -240,8 +232,8 @@

Output filters

'N/A' only occurs, if 'Event Type Classifier' is not selected.
@@ -257,7 +249,7 @@

Output filters

@@ -266,8 +258,8 @@

Output filters

@@ -276,8 +268,8 @@

Output filters

@@ -286,7 +278,7 @@

Output filters

'N/A' only occurs, if 'Location Extractor' is not selected.
@@ -326,18 +318,23 @@

DFG activity key

-
+
-
- + + - - + + diff --git a/tracex_project/extraction/templates/journey_details.html b/tracex_project/extraction/templates/journey_details.html index 6c0e334a..078aad53 100644 --- a/tracex_project/extraction/templates/journey_details.html +++ b/tracex_project/extraction/templates/journey_details.html @@ -16,7 +16,7 @@

Content of "{{ patient_journey.name }}"

{% csrf_token %} - + diff --git a/tracex_project/extraction/templates/result.html b/tracex_project/extraction/templates/result.html index 5679c435..6b8bd974 100644 --- a/tracex_project/extraction/templates/result.html +++ b/tracex_project/extraction/templates/result.html @@ -37,8 +37,6 @@

Success! The TracEX pipeline produced the following results:

Something went wrong during the event log generation: No buffer content available.

{% endif %} -

- {% if event_log_table %}