diff --git a/Dockerfile b/Dockerfile index b4b4fb4a..a49eb92d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,17 +3,12 @@ # A container for the core semantic-search capability. # ###################################################### -FROM python:3.12.1-alpine3.19 +FROM python:alpine3.20 # Install required packages RUN apk update && \ - apk add g++ make libexpat=2.6.2-r0 libssl3=3.1.4-r6 libcrypto3=3.1.4-r6 - - -#upgrade openssl \ - -#RUN apk add openssl=3.1.4-r5 + apk add g++ make RUN pip install --upgrade pip diff --git a/requirements.txt b/requirements.txt index 2bbadabe..694cf680 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ requests redis requests-cache six - +retrying # Click for command line arguments # We use Click 7.0 because that's what one of the pinned packages above use. click diff --git a/src/dug/config.py b/src/dug/config.py index 3014e96f..24aef3dc 100644 --- a/src/dug/config.py +++ b/src/dug/config.py @@ -28,7 +28,7 @@ class Config: nboost_port: int = 8000 program_sort_list: str = "" - program_name_mappings: dict=field( + program_description: dict=field( default_factory=lambda:{}) # Preprocessor config that will be passed to annotate.Preprocessor constructor @@ -141,8 +141,7 @@ def from_env(cls): "redis_host": "REDIS_HOST", "redis_port": "REDIS_PORT", "redis_password": "REDIS_PASSWORD", - "program_sort_list": "PROGRAM_SORT_LIST", - "program_name_mappings" : "PROGRAM_NAME_MAPPINGS" + "program_description": "PROGRAM_DESCRIPTION", } kwargs = {} diff --git a/src/dug/core/async_search.py b/src/dug/core/async_search.py index 8f62c75b..9fdb65d9 100644 --- a/src/dug/core/async_search.py +++ b/src/dug/core/async_search.py @@ -2,10 +2,10 @@ import logging from elasticsearch import AsyncElasticsearch from elasticsearch.helpers import async_scan -import ssl,os,json - +import ssl,json from dug.config import Config + logger = logging.getLogger('dug') @@ -528,7 +528,6 @@ async def search_program(self, program_name=None, offset=0, size=None): async def search_program_list(self): - query_body = { "size": 0, # We don't need the documents themselves, so set the size to 0 "aggs": { @@ -554,15 +553,22 @@ async def search_program_list(self): # The unique data_types and their counts of unique collection_ids will be in the 'aggregations' field of the response unique_data_types = search_results['aggregations']['unique_program_names']['buckets'] data=unique_data_types - program_keys =self._cfg.program_sort_list.split(',') - #key_mapping = self._cfg.program_name_mappings - #key_mapping = json.loads(key_mapping) - key_index_map = {key: index for index, key in enumerate(program_keys)} - unique_data_types = sorted(data, key=lambda x: key_index_map.get(x['key'], len(program_keys))) - #for item in unique_data_types: - # if item['key'] in key_mapping: - # item['key'] = key_mapping[item['key']] - return unique_data_types + print(data) + # Sorting the data alphabetically based on 'key' + sorted_data = sorted(data, key=lambda x: x['key']) + + #Add description as another field in exisiting data based on the program name + descriptions_json = self._cfg.program_description + descriptions = json.loads(descriptions_json) + description_dict = {item['key']: {'description': item['description'], 'parent_program': item['parent_program']} for item in descriptions} + + # Add descriptions and parent programs to the sorted data + for item in sorted_data: + desc_info = description_dict.get(item['key'], {'description': '', 'parent_program': []}) + item['description'] = desc_info['description'] + item['parent_program'] = desc_info['parent_program'] + + return sorted_data def _get_var_query(self, concept, fuzziness, prefix_length, query): diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 50f57877..a23ad51c 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -129,13 +129,13 @@ def sapbert_annotator_api(): "name": "attack; cardiovascular", "curie": "UBERON:0007100", "category": "biolink:Disease", - "score": "0.15857231617", + "score": 0.85857231617, }, { "name": "Angina attack", "curie": "XAO:0000336", "category": "biolink:Disease", - "score": "0.206502258778", + "score": 0.806502258778, }, ] ),