From 2a1aa935cf3c3efe899e73050a33e3063ef0d926 Mon Sep 17 00:00:00 2001 From: YaphetKG Date: Mon, 5 Feb 2024 15:39:30 -0500 Subject: [PATCH 1/5] fixes for local dev --- docker-compose.yaml | 8 +++----- src/dug/server.py | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 8e8d27d..664524d 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -40,11 +40,9 @@ services: REDIS_PASSWORD: "$REDIS_PASSWORD" FLASK_ENV: "development" PYTHONUNBUFFERED: "TRUE" - entrypoint: [ "gunicorn", - "--workers=$API_WORKERS", "--name=dug", - "--bind=0.0.0.0:$API_PORT", "--timeout=$API_TIMEOUT", - "--log-level=DEBUG", "--enable-stdio-inheritance", - "-k", "uvicorn.workers.UvicornWorker", "--reload", "dug.server:APP" ] + entrypoint: [ "uvicorn", + "--host", "0.0.0.0" , "--port" , "$API_PORT", + "--log-level=debug", "--reload-dir", "/home/dug/dug/", "--reload", "dug.server:APP" ] volumes: - ./src:/home/dug/dug/ ports: diff --git a/src/dug/server.py b/src/dug/server.py index f7a8466..7b7753e 100644 --- a/src/dug/server.py +++ b/src/dug/server.py @@ -13,7 +13,7 @@ APP = FastAPI( title="Dug Search API", - root_path=os.environ.get("ROOT_PATH", "/"), + # root_path=os.environ.get("ROOT_PATH", "/"), ) APP.add_middleware( From deaf4cf32feeef5e538ecdf7b1d0a3ffb18df262 Mon Sep 17 00:00:00 2001 From: YSK Date: Mon, 12 Feb 2024 09:33:24 -0500 Subject: [PATCH 2/5] end-point for study-id --- Dockerfile | 2 +- src/dug/core/async_search.py | 29 +++++++++++++++++++++++++++++ src/dug/server.py | 18 ++++++++++++++++++ 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 3980ddf..301ca34 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,7 +11,7 @@ RUN apk update && \ apk add g++ make #upgrade openssl \ -RUN apk add openssl=3.1.4-r4 +RUN apk add openssl=3.1.4-r5 RUN pip install --upgrade pip # Create a non-root user. diff --git a/src/dug/core/async_search.py b/src/dug/core/async_search.py index b39e6a9..554d222 100644 --- a/src/dug/core/async_search.py +++ b/src/dug/core/async_search.py @@ -690,3 +690,32 @@ async def search_kg(self, unique_id, query, offset=0, size=None, ) search_results.update({'total_items': total_items['count']}) return search_results + + async def search_study(self, unique_id, query="", offset=0, size=None): + """ + In knowledge graph search the concept MUST match the unique ID + The query MUST match search_targets. The updated query allows for + fuzzy matching and for the default OR behavior for the query. + """ + query = { + "bool": { + "must": [ + {"term": { + "collection_id.keyword": unique_id + } + } + ] + } + } + body = {'query': query} + total_items = await self.es.count(body=body, index="variables_index") + search_results = await self.es.search( + index="variables_index", + body=body, + filter_path=['hits.hits._id', 'hits.hits._type', + 'hits.hits._source'], + from_=offset, + size=size + ) + search_results.update({'total_items': total_items['count']}) + return search_results diff --git a/src/dug/server.py b/src/dug/server.py index 7b7753e..79c2c47 100644 --- a/src/dug/server.py +++ b/src/dug/server.py @@ -49,6 +49,12 @@ class SearchKgQuery(BaseModel): index: str = "kg_index" size:int = 100 +class SearchStudyQuery(BaseModel): + #query: str + unique_id: str + #index: str = "variables_index" + size:int = 100 + search = Search(Config.from_env()) @APP.on_event("shutdown") @@ -106,6 +112,18 @@ async def search_var(search_query: SearchVariablesQuery): "status": "success" } +@APP.get('/search_study') +async def search_study(unique_id: str): + return { + "message": "Search result", + # Although index in provided by the query we will keep it around for backward compatibility, but + # search concepts should always search against "variables_index" + "result": await search.search_study(unique_id=unique_id), + "status": "success" + } + + + if __name__ == '__main__': uvicorn.run(APP) From dba5babbe42c368910167b98a8359f528797096a Mon Sep 17 00:00:00 2001 From: YSK Date: Mon, 12 Feb 2024 15:35:35 -0500 Subject: [PATCH 3/5] end-point for study-id and study-name --- src/dug/core/async_search.py | 35 +++++++++++++++++++++-------------- src/dug/server.py | 22 +++++++++++++++++++--- 2 files changed, 40 insertions(+), 17 deletions(-) diff --git a/src/dug/core/async_search.py b/src/dug/core/async_search.py index 554d222..639361f 100644 --- a/src/dug/core/async_search.py +++ b/src/dug/core/async_search.py @@ -691,29 +691,36 @@ async def search_kg(self, unique_id, query, offset=0, size=None, search_results.update({'total_items': total_items['count']}) return search_results - async def search_study(self, unique_id, query="", offset=0, size=None): + async def search_study(self, study_id=None, study_name=None, offset=0, size=None): """ - In knowledge graph search the concept MUST match the unique ID - The query MUST match search_targets. The updated query allows for - fuzzy matching and for the default OR behavior for the query. + Search for studies by unique_id (ID or name) and/or study_name. """ - query = { + # Define the base query + # Define the base query + query_body = { "bool": { - "must": [ - {"term": { - "collection_id.keyword": unique_id - } - } - ] + "must": [] } } - body = {'query': query} + + # Add conditions based on user input + if study_id: + query_body["bool"]["must"].append({ + "match": {"collection_id": study_id} + }) + + if study_name: + query_body["bool"]["must"].append({ + "match": {"collection_name": study_name} + }) + + print("query_body",query_body) + body = {'query': query_body} total_items = await self.es.count(body=body, index="variables_index") search_results = await self.es.search( index="variables_index", body=body, - filter_path=['hits.hits._id', 'hits.hits._type', - 'hits.hits._source'], + filter_path=['hits.hits._id', 'hits.hits._type', 'hits.hits._source'], from_=offset, size=size ) diff --git a/src/dug/server.py b/src/dug/server.py index 79c2c47..7ccf614 100644 --- a/src/dug/server.py +++ b/src/dug/server.py @@ -8,6 +8,7 @@ from dug.core.async_search import Search from pydantic import BaseModel import asyncio +from typing import Optional logger = logging.getLogger (__name__) @@ -51,9 +52,11 @@ class SearchKgQuery(BaseModel): class SearchStudyQuery(BaseModel): #query: str - unique_id: str + study_id: Optional[str] = None + study_name: Optional[str] = None #index: str = "variables_index" size:int = 100 + search = Search(Config.from_env()) @@ -112,7 +115,7 @@ async def search_var(search_query: SearchVariablesQuery): "status": "success" } -@APP.get('/search_study') +'''@APP.get('/search_study') async def search_study(unique_id: str): return { "message": "Search result", @@ -120,8 +123,21 @@ async def search_study(unique_id: str): # search concepts should always search against "variables_index" "result": await search.search_study(unique_id=unique_id), "status": "success" - } + }''' +@APP.get('/search_study') +async def search_study(study_id: Optional[str] = None, study_name: Optional[str] = None): + """ + Search for studies by unique_id (ID or name) and/or study_name. + """ + result = await search.search_study(study_id=study_id, study_name=study_name) + return { + "message": "Search result", + # Although index in provided by the query we will keep it around for backward compatibility, but + # search concepts should always search against "variables_index" + "result": result, + "status": "success" + } From 176f58496be62b23192515388ead2465b26f291d Mon Sep 17 00:00:00 2001 From: YSK Date: Mon, 12 Feb 2024 15:52:47 -0500 Subject: [PATCH 4/5] end-point for study-id and study-name --- src/dug/server.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/dug/server.py b/src/dug/server.py index 7ccf614..a0cfa6c 100644 --- a/src/dug/server.py +++ b/src/dug/server.py @@ -115,15 +115,7 @@ async def search_var(search_query: SearchVariablesQuery): "status": "success" } -'''@APP.get('/search_study') -async def search_study(unique_id: str): - return { - "message": "Search result", - # Although index in provided by the query we will keep it around for backward compatibility, but - # search concepts should always search against "variables_index" - "result": await search.search_study(unique_id=unique_id), - "status": "success" - }''' + @APP.get('/search_study') async def search_study(study_id: Optional[str] = None, study_name: Optional[str] = None): From 4d0a7fcc1665dfe7de1f12b6dca07b4f47290aae Mon Sep 17 00:00:00 2001 From: YSK Date: Wed, 10 Apr 2024 15:33:26 -0400 Subject: [PATCH 5/5] Added program name search funtionality --- src/dug/core/async_search.py | 54 ++++++++++++++++++++++++++++++++++++ src/dug/server.py | 21 +++++++++++++- 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/src/dug/core/async_search.py b/src/dug/core/async_search.py index 639361f..7308354 100644 --- a/src/dug/core/async_search.py +++ b/src/dug/core/async_search.py @@ -726,3 +726,57 @@ async def search_study(self, study_id=None, study_name=None, offset=0, size=None ) search_results.update({'total_items': total_items['count']}) return search_results + + + async def search_program(self, program_name=None, offset=0, size=None): + """ + Search for studies by unique_id (ID or name) and/or study_name. + """ + + query_body = { + "query": { + "bool": { + "must": [] + } + }, + "aggs": { + "unique_collection_ids": { + "terms": { + "field": "collection_id.keyword" + } + } + } + } + + # specify the fields to be returned + query_body["_source"] = ["collection_id", "collection_name", "collection_action"] + + # search for program_name based on uses input + if program_name: + query_body["query"]["bool"]["must"].append({ + "match": {"data_type": program_name} + }) + + print("query_body", query_body) + + # Prepare the query body for execution + body = query_body + #print(body) + + # Execute the search query + search_results = await self.es.search( + index="variables_index", + body=body, + filter_path=['hits.hits._id', 'hits.hits._type', 'hits.hits._source', 'aggregations.unique_collection_ids.buckets'], + from_=offset, + size=size + ) + + # The unique collection_ids will be in the 'aggregations' field of the response + unique_collection_ids = search_results['aggregations']['unique_collection_ids']['buckets'] + + #print("Unique collection_ids:", unique_collection_ids) + + + #print(search_results) + return search_results \ No newline at end of file diff --git a/src/dug/server.py b/src/dug/server.py index a0cfa6c..7d3fc25 100644 --- a/src/dug/server.py +++ b/src/dug/server.py @@ -56,7 +56,12 @@ class SearchStudyQuery(BaseModel): study_name: Optional[str] = None #index: str = "variables_index" size:int = 100 - +class SearchProgramQuery(BaseModel): + #query: str + program_id: Optional[str] = None + program_name: Optional[str] = None + #index: str = "variables_index" + size:int = 100 search = Search(Config.from_env()) @@ -132,6 +137,20 @@ async def search_study(study_id: Optional[str] = None, study_name: Optional[str] } +@APP.get('/search_program') +async def search_program( program_name: Optional[str] = None): + """ + Search for studies by unique_id (ID or name) and/or study_name. + """ + result = await search.search_program(program_name=program_name) + return { + "message": "Search result", + # Although index in provided by the query we will keep it around for backward compatibility, but + # search concepts should always search against "variables_index" + "result": result, + "status": "success" + } + if __name__ == '__main__': uvicorn.run(APP)