From ed1e7bc182a4838984b3269f448fd90889d87ef6 Mon Sep 17 00:00:00 2001 From: Jehangir Amjad Date: Mon, 6 Mar 2023 20:18:26 +0000 Subject: [PATCH 1/3] adding some detection logs to debugInfo on the frontend --- server/integration_tests/nlnext_test.py | 2 - server/routes/nl.py | 80 ++++++++++++++++------ server/services/nl.py | 9 ++- static/js/apps/nl_interface/debug_info.tsx | 11 +++ 4 files changed, 76 insertions(+), 26 deletions(-) diff --git a/server/integration_tests/nlnext_test.py b/server/integration_tests/nlnext_test.py index 05e669cdaf..2c9c50c808 100644 --- a/server/integration_tests/nlnext_test.py +++ b/server/integration_tests/nlnext_test.py @@ -16,7 +16,6 @@ import multiprocessing import os import sys -import time from flask_testing import LiveServerTestCase import requests @@ -74,7 +73,6 @@ def run_sequence(self, check_debug_info=True): ctx = {} for i, q in enumerate(queries): - time.sleep(5) print('Issuing ', test_dir, f'query[{i}]', q) resp = requests.post(self.get_server_url() + f'/nl/data?q={q}', json={ diff --git a/server/routes/nl.py b/server/routes/nl.py index ca74ec63c1..b4fce8a3ac 100644 --- a/server/routes/nl.py +++ b/server/routes/nl.py @@ -17,7 +17,7 @@ import json import logging import os -from typing import Dict, List +from typing import Dict, List, Tuple import flask from flask import Blueprint @@ -108,11 +108,13 @@ def _remove_places(query, places_str_found: List[str]): return ' '.join(query.split()) -def _get_place_from_dcids(place_dcids: List[str]) -> List[Place]: +def _get_place_from_dcids(place_dcids: List[str]) -> Tuple[List[Place], Dict]: places = [] place_types_dict = dc.property_values(place_dcids, 'typeOf') place_names_dict = dc.property_values(place_dcids, 'name') + debug_logs = {} + dc_resolve_failures = [] # Iterate in the same order as place_dcids. for p_dcid in place_dcids: @@ -126,16 +128,24 @@ def _get_place_from_dcids(place_dcids: List[str]) -> List[Place]: logging.info( f"Place DCID ({p_dcid}) did not correspond to a place_type and/or place name." ) - return places + dc_resolve_failures.append(p_dcid) + debug_logs["dc_resolution_failure"] = dc_resolve_failures + debug_logs["dc_resolved_places"] = places + return (places, debug_logs) -def _infer_place_dcids(places_str_found: List[str]) -> List[str]: + +def _infer_place_dcids(places_str_found: List[str]) -> Tuple[List[str], Dict]: + debug_logs = {} # TODO: propagate several of the logging errors in this function to place detection # state displayed in debugInfo. if not places_str_found: logging.info("places_found is empty. Nothing to retrieve from Maps API.") - return [] + return ([], debug_logs) + override_places = [] + maps_api_failures = [] + no_dcids_found = [] place_dcids = [] # Iterate over all the places until a valid place DCID is found. for p_str in places_str_found: @@ -147,6 +157,7 @@ def _infer_place_dcids(places_str_found: List[str]) -> List[str]: f"{p_str} was found in OVERRIDE_PLACE_TO_DCID_FOR_MAPS_API. Recording its DCID {place_dcid} without querying Maps API." ) place_dcids.append(place_dcid) + override_places.append((p_str.lower(), place_dcid)) continue logging.info(f"Searching Maps API with: {p_str}") @@ -168,13 +179,20 @@ def _infer_place_dcids(places_str_found: List[str]) -> List[str]: logging.info( f"Maps API found a place {place_id} but no DCID match found for place string: {p_str}." ) + no_dcids_found.append(place_id) else: - logging.info("Maps API did not find a place for place string: {p_str}.") + logging.info(f"Maps API did not find a place for place string: {p_str}.") + maps_api_failures.append(p_str) if not place_dcids: logging.info( - f"No place DCIDs were found. Using places_found = {places_str_found}") - return place_dcids + f"No place DCIDs were found. Using places_found = {places_str_found}.") + + debug_logs["dcids_resolved"] = place_dcids + debug_logs["dcid_overrides_found"] = override_places + debug_logs["maps_api_failures"] = maps_api_failures + debug_logs["dcid_not_found_for_place_ids"] = no_dcids_found + return (place_dcids, debug_logs) def _empty_svs_score_dict(): @@ -183,8 +201,8 @@ def _empty_svs_score_dict(): def _result_with_debug_info(data_dict: Dict, status: str, query_detection: Detection, - uttr_history: List[Dict], - debug_counters: Dict) -> Dict: + uttr_history: List[Dict], debug_counters: Dict, + query_detection_debug_logs: str) -> Dict: """Using data_dict and query_detection, format the dictionary response.""" svs_dict = { 'SV': query_detection.svs_detected.sv_dcids, @@ -262,6 +280,8 @@ def _result_with_debug_info(data_dict: Dict, status: str, places_found_formatted, 'query_with_places_removed': query_detection.places_detected.query_without_place_substr, + 'query_detection_debug_logs': + query_detection_debug_logs, }) if query_detection.places_detected.main_place: @@ -278,7 +298,7 @@ def _result_with_debug_info(data_dict: Dict, status: str, return data_dict -def _detection(orig_query, cleaned_query) -> Detection: +def _detection(orig_query, cleaned_query) -> Tuple[Detection, Dict]: model = current_app.config['NL_MODEL'] # Step 1: find all relevant places and the name/type of the main place found. @@ -294,16 +314,18 @@ def _detection(orig_query, cleaned_query) -> Detection: main_place = None resolved_places = [] + infer_dcids_debug = "No place inference (no places found)" + place_dcid_debug = "No place resolution (no place dcids found)" # Look to find place DCIDs. if places_str_found: - place_dcids = _infer_place_dcids(places_str_found) + (place_dcids, infer_dcids_debug) = _infer_place_dcids(places_str_found) logging.info(f"Found {len(place_dcids)} place dcids: {place_dcids}.") # Step 2: replace the places in the query sentence with "". query = _remove_places(cleaned_query.lower(), places_str_found) if place_dcids: - resolved_places = _get_place_from_dcids(place_dcids) + (resolved_places, place_dcid_debug) = _get_place_from_dcids(place_dcids) logging.info( f"Resolved {len(resolved_places)} place dcids: {resolved_places}.") @@ -319,13 +341,26 @@ def _detection(orig_query, cleaned_query) -> Detection: main_place=main_place) # Step 3: Identify the SV matched based on the query. + sv_debug_logs = {} svs_scores_dict = _empty_svs_score_dict() try: - svs_scores_dict = model.detect_svs(query) + (svs_scores_dict, sv_debug_logs) = model.detect_svs(query) except ValueError as e: logging.info(e) logging.info("Using an empty svs_scores_dict") + # Update the various place detection and query transformation debug logs dict. + query_detection_debug_logs = {} + query_detection_debug_logs["place_dcid_inference"] = infer_dcids_debug + query_detection_debug_logs["place_resolution"] = place_dcid_debug + query_detection_debug_logs["places_found_str"] = places_str_found + query_detection_debug_logs["main_place_inferred"] = main_place + query_detection_debug_logs["query_transformations"] = { + "place_detection_input": cleaned_query.lower(), + "place_detection_with_places_removed": query, + } + query_detection_debug_logs["query_transformations"].update(sv_debug_logs) + # Set the SVDetection. sv_detection = SVDetection( query=query, @@ -379,11 +414,12 @@ def _detection(orig_query, cleaned_query) -> Detection: NLClassifier(type=ClassificationType.UNKNOWN, attributes=SimpleClassificationAttributes())) - return Detection(original_query=orig_query, - cleaned_query=cleaned_query, - places_detected=place_detection, - svs_detected=sv_detection, - classifications=classifications) + return (Detection(original_query=orig_query, + cleaned_query=cleaned_query, + places_detected=place_detection, + svs_detected=sv_detection, + classifications=classifications), + query_detection_debug_logs) @bp.route('/') @@ -448,7 +484,8 @@ def data(): # Query detection routine: # Returns detection for Place, SVs and Query Classifications. - query_detection = _detection(str(escape(original_query)), query) + (query_detection, + query_detection_debug_logs) = _detection(str(escape(original_query)), query) # Generate new utterance. prev_utterance = nl_utterance.load_utterance(context_history) @@ -503,7 +540,8 @@ def data(): loop.run_until_complete(bt.write_row(session_info)) data_dict = _result_with_debug_info(data_dict, status_str, query_detection, - context_history, dbg_counters) + context_history, dbg_counters, + query_detection_debug_logs) return data_dict diff --git a/server/services/nl.py b/server/services/nl.py index d2b036e50f..73c4eb158c 100644 --- a/server/services/nl.py +++ b/server/services/nl.py @@ -16,7 +16,7 @@ from collections import OrderedDict import logging import re -from typing import Dict, List, Union +from typing import Dict, List, Tuple, Union import numpy as np import pandas as pd @@ -398,18 +398,21 @@ def heuristic_correlation_classification( return NLClassifier(type=ClassificationType.CORRELATION, attributes=attributes) - def detect_svs(self, query) -> Dict[str, Union[Dict, List]]: + def detect_svs(self, query) -> Tuple[Dict[str, Union[Dict, List]], Dict]: + debug_logs = {} # Remove stop words. # Check comment at the top of this file above `ALL_STOP_WORDS` to understand # the potential areas for improvement. For now, this removal blanket removes # any words in ALL_STOP_WORDS which includes contained_in places and their # plurals and any other query attribution/classification trigger words. logging.info(f"SV Detection: Query provided to SV Detection: {query}") + debug_logs["sv_detection_query_input"] = query query = utils.remove_stop_words(query, ALL_STOP_WORDS) + debug_logs["sv_detection_query_stop_words_removal"] = query logging.info(f"SV Detection: Query used after removing stop words: {query}") # Make API call to the NL models/embeddings server. - return dc.nl_search_sv(query) + return (dc.nl_search_sv(query), debug_logs) def detect_place(self, query): return self.place_detector.detect_places_heuristics(query) diff --git a/static/js/apps/nl_interface/debug_info.tsx b/static/js/apps/nl_interface/debug_info.tsx index 7de5819dbe..7780add52a 100644 --- a/static/js/apps/nl_interface/debug_info.tsx +++ b/static/js/apps/nl_interface/debug_info.tsx @@ -106,6 +106,7 @@ export function DebugInfo(props: DebugInfoProps): JSX.Element { mainPlaceDCID: props.debugData["main_place_dcid"], mainPlaceName: props.debugData["main_place_name"], queryWithoutPlaces: props.debugData["query_with_places_removed"], + queryDetectionDebugLogs: props.debugData["query_detection_debug_logs"], svScores: props.debugData["sv_matching"], svSentences: props.debugData["svs_to_sentences"], rankingClassification: props.debugData["ranking_classification"], @@ -205,6 +206,16 @@ export function DebugInfo(props: DebugInfoProps): JSX.Element { Overview classification: {debugInfo.overviewClassification} + + Query Detection Debug Logs: + + + +
+                {JSON.stringify(debugInfo.queryDetectionDebugLogs, null, 2)}
+              
+ +
All Variables Matched (with scores): From 8e35d345fdca1b7862069fc6b84b06486b82195d Mon Sep 17 00:00:00 2001 From: Jehangir Amjad Date: Tue, 7 Mar 2023 16:28:43 +0000 Subject: [PATCH 2/3] responding to comments and fixing a pending bug --- server/routes/nl.py | 99 ++++++++++++---------- server/services/nl.py | 6 +- static/js/apps/nl_interface/debug_info.tsx | 20 ++--- 3 files changed, 68 insertions(+), 57 deletions(-) diff --git a/server/routes/nl.py b/server/routes/nl.py index b4fce8a3ac..d128f6fa74 100644 --- a/server/routes/nl.py +++ b/server/routes/nl.py @@ -108,12 +108,12 @@ def _remove_places(query, places_str_found: List[str]): return ' '.join(query.split()) -def _get_place_from_dcids(place_dcids: List[str]) -> Tuple[List[Place], Dict]: +def _get_place_from_dcids(place_dcids: List[str], + debug_logs: Dict) -> List[Place]: places = [] place_types_dict = dc.property_values(place_dcids, 'typeOf') place_names_dict = dc.property_values(place_dcids, 'name') - debug_logs = {} dc_resolve_failures = [] # Iterate in the same order as place_dcids. for p_dcid in place_dcids: @@ -130,18 +130,17 @@ def _get_place_from_dcids(place_dcids: List[str]) -> Tuple[List[Place], Dict]: ) dc_resolve_failures.append(p_dcid) - debug_logs["dc_resolution_failure"] = dc_resolve_failures - debug_logs["dc_resolved_places"] = places - return (places, debug_logs) + debug_logs.update({ + "dc_resolution_failure": dc_resolve_failures, + "dc_resolved_places": places, + }) + return places -def _infer_place_dcids(places_str_found: List[str]) -> Tuple[List[str], Dict]: - debug_logs = {} - # TODO: propagate several of the logging errors in this function to place detection - # state displayed in debugInfo. +def _infer_place_dcids(places_str_found: List[str], + debug_logs: Dict) -> List[str]: if not places_str_found: logging.info("places_found is empty. Nothing to retrieve from Maps API.") - return ([], debug_logs) override_places = [] maps_api_failures = [] @@ -188,11 +187,14 @@ def _infer_place_dcids(places_str_found: List[str]) -> Tuple[List[str], Dict]: logging.info( f"No place DCIDs were found. Using places_found = {places_str_found}.") - debug_logs["dcids_resolved"] = place_dcids - debug_logs["dcid_overrides_found"] = override_places - debug_logs["maps_api_failures"] = maps_api_failures - debug_logs["dcid_not_found_for_place_ids"] = no_dcids_found - return (place_dcids, debug_logs) + # Update the debug_logs dict. + debug_logs.update({ + "dcids_resolved": place_dcids, + "dcid_overrides_found": override_places, + "maps_api_failures": maps_api_failures, + "dcid_not_found_for_place_ids": no_dcids_found + }) + return place_dcids def _empty_svs_score_dict(): @@ -298,7 +300,8 @@ def _result_with_debug_info(data_dict: Dict, status: str, return data_dict -def _detection(orig_query, cleaned_query) -> Tuple[Detection, Dict]: +def _detection(orig_query: str, cleaned_query: str, + query_detection_debug_logs: Dict) -> Detection: model = current_app.config['NL_MODEL'] # Step 1: find all relevant places and the name/type of the main place found. @@ -314,18 +317,19 @@ def _detection(orig_query, cleaned_query) -> Tuple[Detection, Dict]: main_place = None resolved_places = [] - infer_dcids_debug = "No place inference (no places found)" - place_dcid_debug = "No place resolution (no place dcids found)" + # Start updating the query_detection_debug_logs. Create space for place dcid inference + # and place resolution. If they remain empty, the function belows were never triggered. + query_detection_debug_logs["place_dcid_inference"] = {} + query_detection_debug_logs["place_resolution"] = {} # Look to find place DCIDs. if places_str_found: - (place_dcids, infer_dcids_debug) = _infer_place_dcids(places_str_found) + place_dcids = _infer_place_dcids( + places_str_found, query_detection_debug_logs["place_dcid_inference"]) logging.info(f"Found {len(place_dcids)} place dcids: {place_dcids}.") - # Step 2: replace the places in the query sentence with "". - query = _remove_places(cleaned_query.lower(), places_str_found) - if place_dcids: - (resolved_places, place_dcid_debug) = _get_place_from_dcids(place_dcids) + resolved_places = _get_place_from_dcids( + place_dcids, query_detection_debug_logs["place_resolution"]) logging.info( f"Resolved {len(resolved_places)} place dcids: {resolved_places}.") @@ -333,6 +337,9 @@ def _detection(orig_query, cleaned_query) -> Tuple[Detection, Dict]: main_place = resolved_places[0] logging.info(f"Using main_place as: {main_place}") + # Step 2: replace the resolved places in the query sentence with "". + query = _remove_places(cleaned_query.lower(), places_str_found) + # Set PlaceDetection. place_detection = PlaceDetection(query_original=orig_query, query_without_place_substr=query, @@ -340,26 +347,29 @@ def _detection(orig_query, cleaned_query) -> Tuple[Detection, Dict]: places_found=resolved_places, main_place=main_place) - # Step 3: Identify the SV matched based on the query. - sv_debug_logs = {} - svs_scores_dict = _empty_svs_score_dict() - try: - (svs_scores_dict, sv_debug_logs) = model.detect_svs(query) - except ValueError as e: - logging.info(e) - logging.info("Using an empty svs_scores_dict") - # Update the various place detection and query transformation debug logs dict. - query_detection_debug_logs = {} - query_detection_debug_logs["place_dcid_inference"] = infer_dcids_debug - query_detection_debug_logs["place_resolution"] = place_dcid_debug query_detection_debug_logs["places_found_str"] = places_str_found query_detection_debug_logs["main_place_inferred"] = main_place query_detection_debug_logs["query_transformations"] = { "place_detection_input": cleaned_query.lower(), "place_detection_with_places_removed": query, } - query_detection_debug_logs["query_transformations"].update(sv_debug_logs) + if not query_detection_debug_logs["place_dcid_inference"]: + query_detection_debug_logs[ + "place_dcid_inference"] = "Place DCID Inference did not trigger (no place strings found)." + if not query_detection_debug_logs["place_resolution"]: + query_detection_debug_logs[ + "place_resolution"] = "Place resolution did not trigger (no place dcids found)." + + # Step 3: Identify the SV matched based on the query. + sv_debug_logs = {} + svs_scores_dict = _empty_svs_score_dict() + try: + svs_scores_dict = model.detect_svs( + query, query_detection_debug_logs["query_transformations"]) + except ValueError as e: + logging.info(e) + logging.info("Using an empty svs_scores_dict") # Set the SVDetection. sv_detection = SVDetection( @@ -414,12 +424,11 @@ def _detection(orig_query, cleaned_query) -> Tuple[Detection, Dict]: NLClassifier(type=ClassificationType.UNKNOWN, attributes=SimpleClassificationAttributes())) - return (Detection(original_query=orig_query, - cleaned_query=cleaned_query, - places_detected=place_detection, - svs_detected=sv_detection, - classifications=classifications), - query_detection_debug_logs) + return Detection(original_query=orig_query, + cleaned_query=cleaned_query, + places_detected=place_detection, + svs_detected=sv_detection, + classifications=classifications) @bp.route('/') @@ -482,10 +491,12 @@ def data(): _detection("", ""), escaped_context_history, {}) + query_detection_debug_logs = {} + query_detection_debug_logs["original_query"] = query # Query detection routine: # Returns detection for Place, SVs and Query Classifications. - (query_detection, - query_detection_debug_logs) = _detection(str(escape(original_query)), query) + query_detection = _detection(str(escape(original_query)), query, + query_detection_debug_logs) # Generate new utterance. prev_utterance = nl_utterance.load_utterance(context_history) diff --git a/server/services/nl.py b/server/services/nl.py index 73c4eb158c..e73bb3bbe3 100644 --- a/server/services/nl.py +++ b/server/services/nl.py @@ -398,8 +398,8 @@ def heuristic_correlation_classification( return NLClassifier(type=ClassificationType.CORRELATION, attributes=attributes) - def detect_svs(self, query) -> Tuple[Dict[str, Union[Dict, List]], Dict]: - debug_logs = {} + def detect_svs(self, query: str, + debug_logs: Dict) -> Dict[str, Union[Dict, List]]: # Remove stop words. # Check comment at the top of this file above `ALL_STOP_WORDS` to understand # the potential areas for improvement. For now, this removal blanket removes @@ -412,7 +412,7 @@ def detect_svs(self, query) -> Tuple[Dict[str, Union[Dict, List]], Dict]: logging.info(f"SV Detection: Query used after removing stop words: {query}") # Make API call to the NL models/embeddings server. - return (dc.nl_search_sv(query), debug_logs) + return dc.nl_search_sv(query) def detect_place(self, query): return self.place_detector.detect_places_heuristics(query) diff --git a/static/js/apps/nl_interface/debug_info.tsx b/static/js/apps/nl_interface/debug_info.tsx index 7780add52a..3e5ccb61fc 100644 --- a/static/js/apps/nl_interface/debug_info.tsx +++ b/static/js/apps/nl_interface/debug_info.tsx @@ -206,16 +206,6 @@ export function DebugInfo(props: DebugInfoProps): JSX.Element { Overview classification: {debugInfo.overviewClassification} - - Query Detection Debug Logs: - - - -
-                {JSON.stringify(debugInfo.queryDetectionDebugLogs, null, 2)}
-              
- -
All Variables Matched (with scores): @@ -231,6 +221,16 @@ export function DebugInfo(props: DebugInfoProps): JSX.Element { {svToSentences(debugInfo.svScores, debugInfo.svSentences)} + + Query Detection Debug Logs: + + + +
+                {JSON.stringify(debugInfo.queryDetectionDebugLogs, null, 2)}
+              
+ +
Debug Counters From dde6c9b7e2012db7e3d81133642c93789687e4e5 Mon Sep 17 00:00:00 2001 From: Jehangir Amjad Date: Tue, 7 Mar 2023 21:42:32 +0000 Subject: [PATCH 3/3] responding to comments --- server/routes/nl.py | 19 +++++++++++++++---- server/services/nl.py | 2 +- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/server/routes/nl.py b/server/routes/nl.py index 67f64efad0..80623e9e26 100644 --- a/server/routes/nl.py +++ b/server/routes/nl.py @@ -17,7 +17,7 @@ import json import logging import os -from typing import Dict, List, Tuple +from typing import Dict, List import flask from flask import Blueprint @@ -333,13 +333,24 @@ def _detection(orig_query: str, cleaned_query: str, logging.info( f"Resolved {len(resolved_places)} place dcids: {resolved_places}.") + # Step 2: replace the place strings with "" if place_dcids were found. + # Typically, this could also be done under the check for resolved_places + # but we don't expected the resolution from place dcids to fail (typically). + # Also, even if the resolution fails, if there is a place dcid found, it should + # be considered good enough to remove the place strings. + # TODO: investigate whether it is better to only remove place strings for which + # a DCID is found and leave the others in the query string. This is now relevant + # because we support multiple place detection+resolution. Previously, even if + # just one place was used (resolved), it made sense to remove all place strings. + # But now that multiple place strings are being resolved, if there is a failure + # in resolving a place, perhaps it should not be removed? This would be a change + # and would need to be validated first. + query = _remove_places(cleaned_query.lower(), places_str_found) + if resolved_places: main_place = resolved_places[0] logging.info(f"Using main_place as: {main_place}") - # Step 2: replace the resolved places in the query sentence with "". - query = _remove_places(cleaned_query.lower(), places_str_found) - # Set PlaceDetection. place_detection = PlaceDetection(query_original=orig_query, query_without_place_substr=query, diff --git a/server/services/nl.py b/server/services/nl.py index e73bb3bbe3..0b22f38cab 100644 --- a/server/services/nl.py +++ b/server/services/nl.py @@ -16,7 +16,7 @@ from collections import OrderedDict import logging import re -from typing import Dict, List, Tuple, Union +from typing import Dict, List, Union import numpy as np import pandas as pd