From afe8ccaaa627a40593679374c3832e640d2cb02f Mon Sep 17 00:00:00 2001 From: ccurme Date: Wed, 28 Aug 2024 17:56:50 -0400 Subject: [PATCH] community[patch]: Add ID field back to Azure AI Search results (#25828) Commandeering https://github.com/langchain-ai/langchain/pull/23243 as maintainers don't have ability to modify that PR. Fixes https://github.com/langchain-ai/langchain/issues/22827 --------- Co-authored-by: Ming Quah --- .../vectorstores/azuresearch.py | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/libs/community/langchain_community/vectorstores/azuresearch.py b/libs/community/langchain_community/vectorstores/azuresearch.py index 45d5566fb56e0..e972c3a9bd839 100644 --- a/libs/community/langchain_community/vectorstores/azuresearch.py +++ b/libs/community/langchain_community/vectorstores/azuresearch.py @@ -1283,6 +1283,11 @@ def semantic_hybrid_search_with_score_and_rerank( Document( page_content=result.pop(FIELDS_CONTENT), metadata={ + **( + {FIELDS_ID: result.pop(FIELDS_ID)} + if FIELDS_ID in result + else {} + ), **( json.loads(result[FIELDS_METADATA]) if FIELDS_METADATA in result @@ -1362,6 +1367,11 @@ async def asemantic_hybrid_search_with_score_and_rerank( Document( page_content=result.pop(FIELDS_CONTENT), metadata={ + **( + {FIELDS_ID: result.pop(FIELDS_ID)} + if FIELDS_ID in result + else {} + ), **( json.loads(result[FIELDS_METADATA]) if FIELDS_METADATA in result @@ -1752,16 +1762,26 @@ def _reorder_results_with_maximal_marginal_relevance( def _result_to_document(result: Dict) -> Document: + # Fields metadata + if FIELDS_METADATA in result: + if isinstance(result[FIELDS_METADATA], dict): + fields_metadata = result[FIELDS_METADATA] + else: + fields_metadata = json.loads(result[FIELDS_METADATA]) + else: + fields_metadata = { + key: value for key, value in result.items() if key != FIELDS_CONTENT_VECTOR + } + # IDs + if FIELDS_ID in result: + fields_id = {FIELDS_ID: result.pop(FIELDS_ID)} + else: + fields_id = {} return Document( page_content=result.pop(FIELDS_CONTENT), - metadata=( - result[FIELDS_METADATA] - if isinstance(result[FIELDS_METADATA], dict) - else json.loads(result[FIELDS_METADATA]) - ) - if FIELDS_METADATA in result - else { - key: value for key, value in result.items() if key != FIELDS_CONTENT_VECTOR + metadata={ + **fields_id, + **fields_metadata, }, )