Skip to content

Commit

Permalink
Merge pull request #360 from helxplatform/ProgramName_mappings
Browse files Browse the repository at this point in the history
added program description and sorted the program names alphabetically
  • Loading branch information
YaphetKG authored Jul 18, 2024
2 parents 70ae8f7 + fad5059 commit 82bb5e6
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 25 deletions.
9 changes: 2 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,12 @@
# A container for the core semantic-search capability.
#
######################################################
FROM python:3.12.1-alpine3.19
FROM python:alpine3.20


# Install required packages
RUN apk update && \
apk add g++ make libexpat=2.6.2-r0 libssl3=3.1.4-r6 libcrypto3=3.1.4-r6


#upgrade openssl \

#RUN apk add openssl=3.1.4-r5
apk add g++ make


RUN pip install --upgrade pip
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ requests
redis
requests-cache
six

retrying
# Click for command line arguments
# We use Click 7.0 because that's what one of the pinned packages above use.
click
Expand Down
5 changes: 2 additions & 3 deletions src/dug/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class Config:
nboost_port: int = 8000

program_sort_list: str = ""
program_name_mappings: dict=field(
program_description: dict=field(
default_factory=lambda:{})

# Preprocessor config that will be passed to annotate.Preprocessor constructor
Expand Down Expand Up @@ -141,8 +141,7 @@ def from_env(cls):
"redis_host": "REDIS_HOST",
"redis_port": "REDIS_PORT",
"redis_password": "REDIS_PASSWORD",
"program_sort_list": "PROGRAM_SORT_LIST",
"program_name_mappings" : "PROGRAM_NAME_MAPPINGS"
"program_description": "PROGRAM_DESCRIPTION",
}

kwargs = {}
Expand Down
30 changes: 18 additions & 12 deletions src/dug/core/async_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
import logging
from elasticsearch import AsyncElasticsearch
from elasticsearch.helpers import async_scan
import ssl,os,json

import ssl,json
from dug.config import Config


logger = logging.getLogger('dug')


Expand Down Expand Up @@ -528,7 +528,6 @@ async def search_program(self, program_name=None, offset=0, size=None):


async def search_program_list(self):

query_body = {
"size": 0, # We don't need the documents themselves, so set the size to 0
"aggs": {
Expand All @@ -554,15 +553,22 @@ async def search_program_list(self):
# The unique data_types and their counts of unique collection_ids will be in the 'aggregations' field of the response
unique_data_types = search_results['aggregations']['unique_program_names']['buckets']
data=unique_data_types
program_keys =self._cfg.program_sort_list.split(',')
#key_mapping = self._cfg.program_name_mappings
#key_mapping = json.loads(key_mapping)
key_index_map = {key: index for index, key in enumerate(program_keys)}
unique_data_types = sorted(data, key=lambda x: key_index_map.get(x['key'], len(program_keys)))
#for item in unique_data_types:
# if item['key'] in key_mapping:
# item['key'] = key_mapping[item['key']]
return unique_data_types
print(data)
# Sorting the data alphabetically based on 'key'
sorted_data = sorted(data, key=lambda x: x['key'])

#Add description as another field in exisiting data based on the program name
descriptions_json = self._cfg.program_description
descriptions = json.loads(descriptions_json)
description_dict = {item['key']: {'description': item['description'], 'parent_program': item['parent_program']} for item in descriptions}

# Add descriptions and parent programs to the sorted data
for item in sorted_data:
desc_info = description_dict.get(item['key'], {'description': '', 'parent_program': []})
item['description'] = desc_info['description']
item['parent_program'] = desc_info['parent_program']

return sorted_data


def _get_var_query(self, concept, fuzziness, prefix_length, query):
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,13 @@ def sapbert_annotator_api():
"name": "attack; cardiovascular",
"curie": "UBERON:0007100",
"category": "biolink:Disease",
"score": "0.15857231617",
"score": 0.85857231617,
},
{
"name": "Angina attack",
"curie": "XAO:0000336",
"category": "biolink:Disease",
"score": "0.206502258778",
"score": 0.806502258778,
},
]
),
Expand Down

0 comments on commit 82bb5e6

Please sign in to comment.