From bddec6f206156b6351d19acb6b33ac149e4be83a Mon Sep 17 00:00:00 2001 From: Prashanth R Date: Fri, 14 Jul 2023 10:04:43 -0700 Subject: [PATCH] NL: fix a few bugs (#2945) 1. Bug in extend_svs where we don't always check SV compatibility caused https://github.com/datacommonsorg/website/issues/2938 Screenshot 2023-07-13 at 7 54 21 PM 3. Fix https://github.com/datacommonsorg/website/issues/2932 by limiting the number of extended SVs we plot in a bar chart to 16, and also keep the "main SV" pinned as the first entry so its clear. Screenshot 2023-07-13 at 7 52 53 PM 5. Fix https://github.com/datacommonsorg/website/issues/2925 by ensuring that contained-in doesn't match substrings --- .../query_2/chart_config.json | 6 +- .../demo_fallback/query_3/chart_config.json | 4 +- .../demo_fallback/query_4/chart_config.json | 4 +- .../demo_fallback/query_5/chart_config.json | 5 +- .../nodejs_query/scatter/screenshot.json | 54 +-------------- .../usa_map_types/query_2/chart_config.json | 29 ++++++-- .../usa_map_types/query_3/chart_config.json | 31 +++++++++ server/lib/nl/common/variable.py | 66 ++++++++++++++----- .../lib/nl/detection/heuristic_classifiers.py | 8 ++- server/lib/nl/fulfillment/base.py | 3 + server/tests/lib/nl/heuristics_test.py | 13 ++++ shared/lib/constants.py | 3 + static/js/apps/nl_interface/nl_commentary.tsx | 4 +- 13 files changed, 140 insertions(+), 90 deletions(-) diff --git a/server/integration_tests/test_data/demo2_cities_feb2023/query_2/chart_config.json b/server/integration_tests/test_data/demo2_cities_feb2023/query_2/chart_config.json index 0a1e969028..bce4b5ad86 100644 --- a/server/integration_tests/test_data/demo2_cities_feb2023/query_2/chart_config.json +++ b/server/integration_tests/test_data/demo2_cities_feb2023/query_2/chart_config.json @@ -72,9 +72,9 @@ "geoId/0677000" ], "statVarKey": [ + "Percent_Person_WithAsthma_multiple_place_bar_block", "Percent_Person_WithAllTeethLoss_multiple_place_bar_block", "Percent_Person_WithArthritis_multiple_place_bar_block", - "Percent_Person_WithAsthma_multiple_place_bar_block", "Percent_Person_WithCancerExcludingSkinCancer_multiple_place_bar_block", "Percent_Person_WithChronicKidneyDisease_multiple_place_bar_block", "Percent_Person_WithChronicObstructivePulmonaryDisease_multiple_place_bar_block", @@ -99,11 +99,11 @@ "tiles": [ { "statVarKey": [ + "Percent_Person_Smoking", "Percent_Person_BingeDrinking", "Percent_Person_Obesity", "Percent_Person_PhysicalInactivity", - "Percent_Person_SleepLessThan7Hours", - "Percent_Person_Smoking" + "Percent_Person_SleepLessThan7Hours" ], "title": "Percentage of Population That Smokes compared with other variables in Sunnyvale", "type": "LINE" diff --git a/server/integration_tests/test_data/demo_fallback/query_3/chart_config.json b/server/integration_tests/test_data/demo_fallback/query_3/chart_config.json index 1d56a0abc8..e3a37fab62 100644 --- a/server/integration_tests/test_data/demo_fallback/query_3/chart_config.json +++ b/server/integration_tests/test_data/demo_fallback/query_3/chart_config.json @@ -210,12 +210,12 @@ "geoId/06" ], "statVarKey": [ + "Count_CriminalActivities_PropertyCrime_multiple_place_bar_block", "Count_CriminalActivities_AggravatedAssault_multiple_place_bar_block", "Count_CriminalActivities_Burglary_multiple_place_bar_block", "Count_CriminalActivities_ForcibleRape_multiple_place_bar_block", "Count_CriminalActivities_LarcenyTheft_multiple_place_bar_block", "Count_CriminalActivities_MotorVehicleTheft_multiple_place_bar_block", - "Count_CriminalActivities_PropertyCrime_multiple_place_bar_block", "Count_CriminalActivities_Robbery_multiple_place_bar_block", "Count_CriminalActivities_ViolentCrime_multiple_place_bar_block" ], @@ -227,12 +227,12 @@ "geoId/06" ], "statVarKey": [ + "Count_CriminalActivities_PropertyCrime_multiple_place_bar_block_pc", "Count_CriminalActivities_AggravatedAssault_multiple_place_bar_block_pc", "Count_CriminalActivities_Burglary_multiple_place_bar_block_pc", "Count_CriminalActivities_ForcibleRape_multiple_place_bar_block_pc", "Count_CriminalActivities_LarcenyTheft_multiple_place_bar_block_pc", "Count_CriminalActivities_MotorVehicleTheft_multiple_place_bar_block_pc", - "Count_CriminalActivities_PropertyCrime_multiple_place_bar_block_pc", "Count_CriminalActivities_Robbery_multiple_place_bar_block_pc", "Count_CriminalActivities_ViolentCrime_multiple_place_bar_block_pc" ], diff --git a/server/integration_tests/test_data/demo_fallback/query_4/chart_config.json b/server/integration_tests/test_data/demo_fallback/query_4/chart_config.json index dcb1e83914..efb70bee7f 100644 --- a/server/integration_tests/test_data/demo_fallback/query_4/chart_config.json +++ b/server/integration_tests/test_data/demo_fallback/query_4/chart_config.json @@ -48,8 +48,8 @@ "country/USA" ], "statVarKey": [ - "Percent_Person_BingeDrinking_multiple_place_bar_block", "Percent_Person_Obesity_multiple_place_bar_block", + "Percent_Person_BingeDrinking_multiple_place_bar_block", "Percent_Person_PhysicalInactivity_multiple_place_bar_block", "Percent_Person_SleepLessThan7Hours_multiple_place_bar_block", "Percent_Person_Smoking_multiple_place_bar_block" @@ -70,6 +70,7 @@ "country/USA" ], "statVarKey": [ + "Percent_Person_WithPhysicalHealthNotGood_multiple_place_bar_block", "Percent_Person_WithArthritis_multiple_place_bar_block", "Percent_Person_WithAsthma_multiple_place_bar_block", "Percent_Person_WithCancerExcludingSkinCancer_multiple_place_bar_block", @@ -80,7 +81,6 @@ "Percent_Person_WithHighBloodPressure_multiple_place_bar_block", "Percent_Person_WithHighCholesterol_multiple_place_bar_block", "Percent_Person_WithMentalHealthNotGood_multiple_place_bar_block", - "Percent_Person_WithPhysicalHealthNotGood_multiple_place_bar_block", "Percent_Person_WithStroke_multiple_place_bar_block" ], "title": "Physical Health Issues compared with other variables in United States (${date})", diff --git a/server/integration_tests/test_data/demo_fallback/query_5/chart_config.json b/server/integration_tests/test_data/demo_fallback/query_5/chart_config.json index 896b0979dc..5c26a2a57b 100644 --- a/server/integration_tests/test_data/demo_fallback/query_5/chart_config.json +++ b/server/integration_tests/test_data/demo_fallback/query_5/chart_config.json @@ -245,9 +245,7 @@ { "statVarKey": [ "Amount_EconomicActivity_GrossDomesticProduction_Nominal", - "Amount_EconomicActivity_GrossDomesticProduction_Nominal_PerCapita", - "Amount_EconomicActivity_GrossDomesticProduction_RealValue", - "GrowthRate_Amount_EconomicActivity_GrossDomesticProduction" + "Amount_EconomicActivity_GrossDomesticProduction_RealValue" ], "title": "GDP (Nominal Value) compared with other variables in United States", "type": "LINE" @@ -255,7 +253,6 @@ { "statVarKey": [ "Amount_EconomicActivity_GrossDomesticProduction_Nominal_pc", - "Amount_EconomicActivity_GrossDomesticProduction_Nominal_PerCapita_pc", "Amount_EconomicActivity_GrossDomesticProduction_RealValue_pc" ], "title": "Per Capita GDP (Nominal Value) compared with other variables in United States", diff --git a/server/integration_tests/test_data/nodejs_query/scatter/screenshot.json b/server/integration_tests/test_data/nodejs_query/scatter/screenshot.json index 086af50f32..7110ad7614 100644 --- a/server/integration_tests/test_data/nodejs_query/scatter/screenshot.json +++ b/server/integration_tests/test_data/nodejs_query/scatter/screenshot.json @@ -12,57 +12,9 @@ "url": "https://www.cdc.gov/places/index.html" } ], - "svg": "", "title": "Percentage of Population That Is Obese (2018) vs. Population Below Poverty Level Status in Past Year Per Capita (2020) in Counties of California", - "type": "SCATTER" - }, - { - "data_csv": "placeName,placeDcid,xDate,xValue-Count_Person_AbovePovertyLevelInThePast12Months,yDate,yValue-Percent_Person_Obesity,xPopulation-Count_Person\r\n\"Alameda County, CA\",geoId/06001,2020,0.8922327369590627,2018,23.2,1662323\r\n\"Alpine County, CA\",geoId/06003,2020,0.9115281501340483,2018,29.4,1119\r\n\"Amador County, CA\",geoId/06005,2020,0.8119402240351271,2018,28.9,40083\r\n\"Butte County, CA\",geoId/06007,2020,0.8370247809573949,2018,32.9,212744\r\n\"Calaveras County, CA\",geoId/06009,2020,0.8689643258184331,2018,26.4,46308\r\n\"Colusa County, CA\",geoId/06011,2020,0.8703033676593376,2018,30.8,21558\r\n\"Contra Costa County, CA\",geoId/06013,2020,0.9071119199050969,2018,22.8,1152333\r\n\"Del Norte County, CA\",geoId/06015,2020,0.7309067505720824,2018,31.7,27968\r\n\"El Dorado County, CA\",geoId/06017,2020,0.8947414798496826,2018,26.2,192925\r\n\"Fresno County, CA\",geoId/06019,2020,0.7701619912919939,2018,34,1000918\r\n\"Glenn County, CA\",geoId/06021,2020,0.831877806456175,2018,31,28283\r\n\"Humboldt County, CA\",geoId/06023,2020,0.7900012594738363,2018,26.9,134977\r\n\"Imperial County, CA\",geoId/06025,2020,0.7325633643428914,2018,37,180267\r\n\"Inyo County, CA\",geoId/06027,2020,0.8750415604566109,2018,27.5,18046\r\n\"Kern County, CA\",geoId/06029,2020,0.7601396553216133,2018,33.9,901362\r\n\"Kings County, CA\",geoId/06031,2020,0.7524690226076022,2018,34.6,152692\r\n\"Lake County, CA\",geoId/06033,2020,0.8059678344887483,2018,29.8,64479\r\n\"Lassen County, CA\",geoId/06035,2020,0.6021788379530917,2018,28.9,30016\r\n\"Los Angeles County, CA\",geoId/06037,2020,0.8531069855253611,2018,26.6,9943046\r\n\"Madera County, CA\",geoId/06039,2020,0.7564797383383726,2018,33.1,157761\r\n\"Marin County, CA\",geoId/06041,2020,0.9182340323006855,2018,21,257332\r\n\"Mariposa County, CA\",geoId/06043,2020,0.8417249417249417,2018,26.3,17160\r\n\"Mendocino County, CA\",geoId/06045,2020,0.833013792542499,2018,28.4,86061\r\n\"Merced County, CA\",geoId/06047,2020,0.7776596049446378,2018,37,279252\r\n\"Modoc County, CA\",geoId/06049,2020,0.8052036973639165,2018,29.9,8763\r\n\"Mono County, CA\",geoId/06051,2020,0.8620476124948396,2018,27.4,14534\r\n\"Monterey County, CA\",geoId/06053,2020,0.8490390015455807,2018,28.4,430906\r\n\"Napa County, CA\",geoId/06055,2020,0.9201412128121208,2018,25.3,135965\r\n\"Nevada County, CA\",geoId/06057,2020,0.8927675039656245,2018,21.9,99606\r\n\"Orange County, CA\",geoId/06059,2020,0.8890243544309073,2018,21.6,3166857\r\n\"Placer County, CA\",geoId/06061,2020,0.8954138230549696,2018,21.6,402950\r\n\"Plumas County, CA\",geoId/06063,2020,0.8535350872568145,2018,25.6,18967\r\n\"Riverside County, CA\",geoId/06065,2020,0.842817015026587,2018,29,2489188\r\n\"Sacramento County, CA\",geoId/06067,2020,0.8383057135123971,2018,28.2,1559146\r\n\"San Benito County, CA\",geoId/06069,2020,0.8642260557333542,2018,28.3,64055\r\n\"San Bernardino County, CA\",geoId/06071,2020,0.8182970541978446,2018,34.9,2189183\r\n\"San Diego County, CA\",geoId/06073,2020,0.8660816876108614,2018,24.4,3332427\r\n\"San Francisco County, CA\",geoId/06075,2020,0.8930194344373337,2018,16.8,866606\r\n\"San Joaquin County, CA\",geoId/06077,2020,0.8254456246166827,2018,33.4,767967\r\n\"San Luis Obispo County, CA\",geoId/06079,2020,0.8395671906720662,2018,28.4,282249\r\n\"San Mateo County, CA\",geoId/06081,2020,0.9394322623525005,2018,21.3,758308\r\n\"Santa Barbara County, CA\",geoId/06083,2020,0.8317744611773382,2018,26.8,444766\r\n\"Santa Clara County, CA\",geoId/06085,2020,0.9217027903550146,2018,19,1907105\r\n\"Santa Cruz County, CA\",geoId/06087,2020,0.8524627211262388,2018,21.7,269925\r\n\"Shasta County, CA\",geoId/06089,2020,0.8354494014869266,2018,26.6,179027\r\n\"Sierra County, CA\",geoId/06091,2020,0.8900684931506849,2018,27.6,2920\r\n\"Siskiyou County, CA\",geoId/06093,2020,0.8279569892473119,2018,28.4,43245\r\n\"Solano County, CA\",geoId/06095,2020,0.8821573606900333,2018,27.1,446935\r\n\"Sonoma County, CA\",geoId/06097,2020,0.9119899391407846,2018,25.3,489819\r\n\"Stanislaus County, CA\",geoId/06099,2020,0.8520654230922355,2018,33.1,550081\r\n\"Sutter County, CA\",geoId/06101,2020,0.8572599470872023,2018,27.8,96385\r\n\"Tehama County, CA\",geoId/06103,2020,0.7974540267311688,2018,34.3,64494\r\n\"Trinity County, CA\",geoId/06105,2020,0.7749672560576293,2018,29.6,12216\r\n\"Tulare County, CA\",geoId/06107,2020,0.7641205086626269,2018,35.4,468680\r\n\"Tuolumne County, CA\",geoId/06109,2020,0.8269833990644777,2018,27,54515\r\n\"Ventura County, CA\",geoId/06111,2020,0.9017895451201409,2018,24,841387\r\n\"Yolo County, CA\",geoId/06113,2020,0.781092987693876,2018,26.6,219728\r\n\"Yuba County, CA\",geoId/06115,2020,0.802881736526946,2018,28.3,80160", - "srcs": [ - { - "name": "census.gov", - "url": "https://www.census.gov/programs-surveys/acs/data/data-via-ftp.html" - }, - { - "name": "cdc.gov", - "url": "https://www.cdc.gov/places/index.html" - } - ], - "svg": "", - "title": "Percentage of Population That Is Obese (2018) vs. Population Above Poverty Level Status in Past Year Per Capita (2020) in Counties of California", - "type": "SCATTER" - }, - { - "data_csv": "placeName,placeDcid,xDate,xValue-Count_Person_BelowPovertyLevelInThePast12Months_WhiteAlone,yDate,yValue-Percent_Person_Obesity,xPopulation-Count_Person\r\n\"Alameda County, CA\",geoId/06001,2020,0.02704287915164502,2018,23.2,1662323\r\n\"Alpine County, CA\",geoId/06003,2020,0.03485254691689008,2018,29.4,1119\r\n\"Amador County, CA\",geoId/06005,2020,0.05882793204101489,2018,28.9,40083\r\n\"Butte County, CA\",geoId/06007,2020,0.13738577821231152,2018,32.9,212744\r\n\"Calaveras County, CA\",geoId/06009,2020,0.09803921568627451,2018,26.4,46308\r\n\"Colusa County, CA\",geoId/06011,2020,0.09550978754986549,2018,30.8,21558\r\n\"Contra Costa County, CA\",geoId/06013,2020,0.03444316877152698,2018,22.8,1152333\r\n\"Del Norte County, CA\",geoId/06015,2020,0.09214101830663615,2018,31.7,27968\r\n\"El Dorado County, CA\",geoId/06017,2020,0.07238045872748478,2018,26.2,192925\r\n\"Fresno County, CA\",geoId/06019,2020,0.11413122753312459,2018,34,1000918\r\n\"Glenn County, CA\",geoId/06021,2020,0.0992822543577414,2018,31,28283\r\n\"Humboldt County, CA\",geoId/06023,2020,0.13483037850893115,2018,26.9,134977\r\n\"Imperial County, CA\",geoId/06025,2020,0.12452639695562694,2018,37,180267\r\n\"Inyo County, CA\",geoId/06027,2020,0.07508589161032916,2018,27.5,18046\r\n\"Kern County, CA\",geoId/06029,2020,0.12571863468839378,2018,33.9,901362\r\n\"Kings County, CA\",geoId/06031,2020,0.09379666256254421,2018,34.6,152692\r\n\"Lake County, CA\",geoId/06033,2020,0.11164875385784519,2018,29.8,64479\r\n\"Lassen County, CA\",geoId/06035,2020,0.07592617270788912,2018,28.9,30016\r\n\"Los Angeles County, CA\",geoId/06037,2020,0.06102586672132463,2018,26.6,9943046\r\n\"Madera County, CA\",geoId/06039,2020,0.0913216827986638,2018,33.1,157761\r\n\"Marin County, CA\",geoId/06041,2020,0.03844061368193617,2018,21,257332\r\n\"Mariposa County, CA\",geoId/06043,2020,0.12977855477855477,2018,26.3,17160\r\n\"Mendocino County, CA\",geoId/06045,2020,0.11688221145466587,2018,28.4,86061\r\n\"Merced County, CA\",geoId/06047,2020,0.07973085242003639,2018,37,279252\r\n\"Modoc County, CA\",geoId/06049,2020,0.13214652516261555,2018,29.9,8763\r\n\"Mono County, CA\",geoId/06051,2020,0.07217558827576717,2018,27.4,14534\r\n\"Monterey County, CA\",geoId/06053,2020,0.038379600191225,2018,28.4,430906\r\n\"Napa County, CA\",geoId/06055,2020,0.04665171183760527,2018,25.3,135965\r\n\"Nevada County, CA\",geoId/06057,2020,0.08496476115896633,2018,21.9,99606\r\n\"Orange County, CA\",geoId/06059,2020,0.049727537429066104,2018,21.6,3166857\r\n\"Placer County, CA\",geoId/06061,2020,0.05408115150763122,2018,21.6,402950\r\n\"Plumas County, CA\",geoId/06063,2020,0.0910001581694522,2018,25.6,18967\r\n\"Riverside County, CA\",geoId/06065,2020,0.059290017467543636,2018,29,2489188\r\n\"Sacramento County, CA\",geoId/06067,2020,0.059031033655603776,2018,28.2,1559146\r\n\"San Benito County, CA\",geoId/06069,2020,0.06394504722504098,2018,28.3,64055\r\n\"San Bernardino County, CA\",geoId/06071,2020,0.07530800303126783,2018,34.9,2189183\r\n\"San Diego County, CA\",geoId/06073,2020,0.06533136359776223,2018,24.4,3332427\r\n\"San Francisco County, CA\",geoId/06075,2020,0.03414354389422644,2018,16.8,866606\r\n\"San Joaquin County, CA\",geoId/06077,2020,0.060986995534964394,2018,33.4,767967\r\n\"San Luis Obispo County, CA\",geoId/06079,2020,0.08361057080804538,2018,28.4,282249\r\n\"San Mateo County, CA\",geoId/06081,2020,0.027069475727540787,2018,21.3,758308\r\n\"Santa Barbara County, CA\",geoId/06083,2020,0.08026917525170539,2018,26.8,444766\r\n\"Santa Clara County, CA\",geoId/06085,2020,0.02511660343819559,2018,19,1907105\r\n\"Santa Cruz County, CA\",geoId/06087,2020,0.07375011577289987,2018,21.7,269925\r\n\"Shasta County, CA\",geoId/06089,2020,0.12006568841571383,2018,26.6,179027\r\n\"Sierra County, CA\",geoId/06091,2020,0.08664383561643836,2018,27.6,2920\r\n\"Siskiyou County, CA\",geoId/06093,2020,0.1293097467915366,2018,28.4,43245\r\n\"Solano County, CA\",geoId/06095,2020,0.03889603633638001,2018,27.1,446935\r\n\"Sonoma County, CA\",geoId/06097,2020,0.0551591506250268,2018,25.3,489819\r\n\"Stanislaus County, CA\",geoId/06099,2020,0.09296267277001023,2018,33.1,550081\r\n\"Sutter County, CA\",geoId/06101,2020,0.08888312496757794,2018,27.8,96385\r\n\"Tehama County, CA\",geoId/06103,2020,0.1459825720222036,2018,34.3,64494\r\n\"Trinity County, CA\",geoId/06105,2020,0.2066142763588736,2018,29.6,12216\r\n\"Tulare County, CA\",geoId/06107,2020,0.13835239395749765,2018,35.4,468680\r\n\"Tuolumne County, CA\",geoId/06109,2020,0.0883243144088783,2018,27,54515\r\n\"Ventura County, CA\",geoId/06111,2020,0.06652943294821527,2018,24,841387\r\n\"Yolo County, CA\",geoId/06113,2020,0.09770261414111993,2018,26.6,219728\r\n\"Yuba County, CA\",geoId/06115,2020,0.09366267465069861,2018,28.3,80160", - "srcs": [ - { - "name": "census.gov", - "url": "https://www.census.gov/programs-surveys/acs/data/data-via-ftp.html" - }, - { - "name": "cdc.gov", - "url": "https://www.cdc.gov/places/index.html" - } - ], - "svg": "", - "title": "Percentage of Population That Is Obese (2018) vs. Population Below Poverty Level Status in Past Year and White Alone Per Capita (2020) in Counties of California", - "type": "SCATTER" - }, - { - "data_csv": "placeName,placeDcid,xDate,xValue-Count_Person_BelowPovertyLevelInThePast12Months_TwoOrMoreRaces,yDate,yValue-Percent_Person_Obesity,xPopulation-Count_Person\r\n\"Alameda County, CA\",geoId/06001,2020,0.006745379808857845,2018,23.2,1662323\r\n\"Alpine County, CA\",geoId/06003,2020,0.002680965147453083,2018,29.4,1119\r\n\"Amador County, CA\",geoId/06005,2020,0.008432502557193822,2018,28.9,40083\r\n\"Butte County, CA\",geoId/06007,2020,0.015995750761478585,2018,32.9,212744\r\n\"Calaveras County, CA\",geoId/06009,2020,0.008551438196423945,2018,26.4,46308\r\n\"Colusa County, CA\",geoId/06011,2020,0.009416457927451527,2018,30.8,21558\r\n\"Contra Costa County, CA\",geoId/06013,2020,0.008903676281075001,2018,22.8,1152333\r\n\"Del Norte County, CA\",geoId/06015,2020,0.012621567505720823,2018,31.7,27968\r\n\"El Dorado County, CA\",geoId/06017,2020,0.003975638201373591,2018,26.2,192925\r\n\"Fresno County, CA\",geoId/06019,2020,0.015532740943813579,2018,34,1000918\r\n\"Glenn County, CA\",geoId/06021,2020,0.008167450411908214,2018,31,28283\r\n\"Humboldt County, CA\",geoId/06023,2020,0.017721537743467405,2018,26.9,134977\r\n\"Imperial County, CA\",geoId/06025,2020,0.01994818796562876,2018,37,180267\r\n\"Inyo County, CA\",geoId/06027,2020,0.0058184639255236615,2018,27.5,18046\r\n\"Kern County, CA\",geoId/06029,2020,0.015345665781339795,2018,33.9,901362\r\n\"Kings County, CA\",geoId/06031,2020,0.009830246509312865,2018,34.6,152692\r\n\"Lake County, CA\",geoId/06033,2020,0.010809720994432295,2018,29.8,64479\r\n\"Lassen County, CA\",geoId/06035,2020,0.004231076759061834,2018,28.9,30016\r\n\"Los Angeles County, CA\",geoId/06037,2020,0.008745308027338906,2018,26.6,9943046\r\n\"Madera County, CA\",geoId/06039,2020,0.005254784135496099,2018,33.1,157761\r\n\"Marin County, CA\",geoId/06041,2020,0.0049741190368862015,2018,21,257332\r\n\"Mariposa County, CA\",geoId/06043,2020,0.008508158508158508,2018,26.3,17160\r\n\"Mendocino County, CA\",geoId/06045,2020,0.017894284286726857,2018,28.4,86061\r\n\"Merced County, CA\",geoId/06047,2020,0.009657943362983971,2018,37,279252\r\n\"Modoc County, CA\",geoId/06049,2020,0.0050211114914983455,2018,29.9,8763\r\n\"Mono County, CA\",geoId/06051,2020,0.023668639053254437,2018,27.4,14534\r\n\"Monterey County, CA\",geoId/06053,2020,0.007147730595535917,2018,28.4,430906\r\n\"Napa County, CA\",geoId/06055,2020,0.004817416246828228,2018,25.3,135965\r\n\"Nevada County, CA\",geoId/06057,2020,0.005592032608477401,2018,21.9,99606\r\n\"Orange County, CA\",geoId/06059,2020,0.005547771812873142,2018,21.6,3166857\r\n\"Placer County, CA\",geoId/06061,2020,0.004732597096413947,2018,21.6,402950\r\n\"Plumas County, CA\",geoId/06063,2020,0.01286444877945906,2018,25.6,18967\r\n\"Riverside County, CA\",geoId/06065,2020,0.007604889626657368,2018,29,2489188\r\n\"Sacramento County, CA\",geoId/06067,2020,0.013114230482584697,2018,28.2,1559146\r\n\"San Benito County, CA\",geoId/06069,2020,0.013004449301381626,2018,28.3,64055\r\n\"San Bernardino County, CA\",geoId/06071,2020,0.013268877019417747,2018,34.9,2189183\r\n\"San Diego County, CA\",geoId/06073,2020,0.009435465503070286,2018,24.4,3332427\r\n\"San Francisco County, CA\",geoId/06075,2020,0.006235821122863216,2018,16.8,866606\r\n\"San Joaquin County, CA\",geoId/06077,2020,0.02055166432932665,2018,33.4,767967\r\n\"San Luis Obispo County, CA\",geoId/06079,2020,0.006543867294481114,2018,28.4,282249\r\n\"San Mateo County, CA\",geoId/06081,2020,0.004115741888520232,2018,21.3,758308\r\n\"Santa Barbara County, CA\",geoId/06083,2020,0.013973640071408337,2018,26.8,444766\r\n\"Santa Clara County, CA\",geoId/06085,2020,0.004744363839432018,2018,19,1907105\r\n\"Santa Cruz County, CA\",geoId/06087,2020,0.006075761785681207,2018,21.7,269925\r\n\"Shasta County, CA\",geoId/06089,2020,0.010350394074636787,2018,26.6,179027\r\n\"Sierra County, CA\",geoId/06091,2020,0,2018,27.6,2920\r\n\"Siskiyou County, CA\",geoId/06093,2020,0.01808301537750029,2018,28.4,43245\r\n\"Solano County, CA\",geoId/06095,2020,0.00792508977815566,2018,27.1,446935\r\n\"Sonoma County, CA\",geoId/06097,2020,0.0056980231473258485,2018,25.3,489819\r\n\"Stanislaus County, CA\",geoId/06099,2020,0.011830984891316006,2018,33.1,550081\r\n\"Sutter County, CA\",geoId/06101,2020,0.009088551123100067,2018,27.8,96385\r\n\"Tehama County, CA\",geoId/06103,2020,0.014497472633113158,2018,34.3,64494\r\n\"Trinity County, CA\",geoId/06105,2020,0,2018,29.6,12216\r\n\"Tulare County, CA\",geoId/06107,2020,0.011730818468891355,2018,35.4,468680\r\n\"Tuolumne County, CA\",geoId/06109,2020,0.006732092084747317,2018,27,54515\r\n\"Ventura County, CA\",geoId/06111,2020,0.006050723388880503,2018,24,841387\r\n\"Yolo County, CA\",geoId/06113,2020,0.01465903298623753,2018,26.6,219728\r\n\"Yuba County, CA\",geoId/06115,2020,0.01843812375249501,2018,28.3,80160", - "srcs": [ - { - "name": "census.gov", - "url": "https://www.census.gov/programs-surveys/acs/data/data-via-ftp.html" - }, - { - "name": "cdc.gov", - "url": "https://www.cdc.gov/places/index.html" - } - ], - "svg": "", - "title": "Percentage of Population That Is Obese (2018) vs. Population Below Poverty Level Status in Past Year and Two or More Races Per Capita (2020) in Counties of California", - "type": "SCATTER" + "type": "SCATTER", + "svg": "" } ] -} +} \ No newline at end of file diff --git a/server/integration_tests/test_data/usa_map_types/query_2/chart_config.json b/server/integration_tests/test_data/usa_map_types/query_2/chart_config.json index 8a49532aad..6cc3341900 100644 --- a/server/integration_tests/test_data/usa_map_types/query_2/chart_config.json +++ b/server/integration_tests/test_data/usa_map_types/query_2/chart_config.json @@ -39,9 +39,24 @@ "tiles": [ { "statVarKey": [ - "Median_Income_Household_HouseholderRaceWhiteAlone" + "Median_Income_Household_HouseholderRaceAsianAlone" ], - "title": "Median Income of White Households in Census Tracts of Placer County (${date})", + "title": "Median Income of Asian Households in Census Tracts of Placer County (${date})", + "type": "MAP" + } + ] + } + ] + }, + { + "columns": [ + { + "tiles": [ + { + "statVarKey": [ + "Median_Income_Household_HouseholderRaceHispanicOrLatino" + ], + "title": "Median Income of Hispanic or Latino Households in Census Tracts of Placer County (${date})", "type": "MAP" } ] @@ -54,9 +69,13 @@ "name": "Household Median Income", "statVar": "Median_Income_Household" }, - "Median_Income_Household_HouseholderRaceWhiteAlone": { - "name": "Median Income of White Households", - "statVar": "Median_Income_Household_HouseholderRaceWhiteAlone" + "Median_Income_Household_HouseholderRaceAsianAlone": { + "name": "Median Income of Asian Households", + "statVar": "Median_Income_Household_HouseholderRaceAsianAlone" + }, + "Median_Income_Household_HouseholderRaceHispanicOrLatino": { + "name": "Median Income of Hispanic or Latino Households", + "statVar": "Median_Income_Household_HouseholderRaceHispanicOrLatino" }, "Median_Income_Person": { "name": "Individual Median Income", diff --git a/server/integration_tests/test_data/usa_map_types/query_3/chart_config.json b/server/integration_tests/test_data/usa_map_types/query_3/chart_config.json index 5709084c06..27b62d9c82 100644 --- a/server/integration_tests/test_data/usa_map_types/query_3/chart_config.json +++ b/server/integration_tests/test_data/usa_map_types/query_3/chart_config.json @@ -24,6 +24,28 @@ ] } ] + }, + { + "columns": [ + { + "tiles": [ + { + "statVarKey": [ + "Count_Person_AbovePovertyLevelInThePast12Months" + ], + "title": "Population Above Poverty Level Status in Past Year in Census Zip Code Tabulation Areas of Washington (${date})", + "type": "MAP" + }, + { + "statVarKey": [ + "Count_Person_AbovePovertyLevelInThePast12Months_pc" + ], + "title": "Per Capita Population Above Poverty Level Status in Past Year in Census Zip Code Tabulation Areas of Washington (${date})", + "type": "MAP" + } + ] + } + ] } ], "statVarSpec": { @@ -35,6 +57,15 @@ "denom": "Count_Person", "name": "Population of Working Age With No Income", "statVar": "Count_Person_15OrMoreYears_NoIncome" + }, + "Count_Person_AbovePovertyLevelInThePast12Months": { + "name": "Population Above Poverty Level Status in Past Year", + "statVar": "Count_Person_AbovePovertyLevelInThePast12Months" + }, + "Count_Person_AbovePovertyLevelInThePast12Months_pc": { + "denom": "Count_Person", + "name": "Population Above Poverty Level Status in Past Year", + "statVar": "Count_Person_AbovePovertyLevelInThePast12Months" } } } diff --git a/server/lib/nl/common/variable.py b/server/lib/nl/common/variable.py index 654ff03a24..8530c54deb 100644 --- a/server/lib/nl/common/variable.py +++ b/server/lib/nl/common/variable.py @@ -17,7 +17,7 @@ from dataclasses import field import json import os -from typing import Dict, List +from typing import Dict, List, Set import server.lib.fetch as fetch import server.lib.nl.common.constants as constants @@ -27,6 +27,11 @@ # TODO: This is reading the file on every call. Improve it! _CHART_TITLE_CONFIG_RELATIVE_PATH = "../../../config/nl_page/chart_titles_by_sv.json" +# Have an upper limit so we don't do too many existence checks. +EXTENSION_SV_PRE_EXISTENCE_CHECK_LIMIT = 50 +# This is the number that we want to fit in a bar chart. +EXTENSION_SV_POST_EXISTENCE_CHECK_LIMIT = 15 + @dataclass class SV: @@ -79,6 +84,35 @@ def parse_svg(svg_dcid: str) -> SVG: return res +# Given an SV object and SV info from DC API, check whether +# their definitions are compatible. +def _is_compatible(sv_obj: SV, new_sv: Dict) -> bool: + if 'definition' not in new_sv: + return False + new_sv_obj = parse_sv(new_sv['definition']) + if new_sv_obj.mp != sv_obj.mp: + return False + if new_sv_obj.st != sv_obj.st: + return False + if new_sv_obj.pt != sv_obj.pt: + return False + if new_sv_obj.md != sv_obj.md: + return False + if len(new_sv_obj.pvs) != len(sv_obj.pvs): + return False + return True + + +# Limit to up to `limit` extended SVs. +def limit_extended_svs(sv: str, ext_svs: Set[str], limit: int) -> Dict: + # Put the main SV first. + res = [sv] + if sv in ext_svs: + ext_svs.remove(sv) + res.extend(sorted(ext_svs)[:limit]) + return res + + def extend_svs(svs: List[str]): """Extend stat vars by finding siblings. @@ -142,28 +176,24 @@ def extend_svs(svs: List[str]): svg_siblings_info = dc.get_variable_group_info(svg_siblings, []) for item in svg_siblings_info['data']: for sv_info in item['info'].get('childStatVars', []): - if 'definition' not in sv_info: - continue - curr_sv_obj = parse_sv(sv_info['definition']) - if curr_sv_obj.mp != sv_obj.mp: - continue - if curr_sv_obj.st != sv_obj.st: - continue - if curr_sv_obj.pt != sv_obj.pt: - continue - if curr_sv_obj.md != sv_obj.md: - continue - if len(curr_sv_obj.pvs) != len(sv_obj.pvs): - continue - res[sv].append(sv_info['id']) + if _is_compatible(sv_obj, sv_info): + res[sv].append(sv_info['id']) else: - # Can use the direct siblings of this sv - res[sv] = list(map(lambda x: x['id'], svg2childsvs[svg])) + # Can use the direct siblings of this sv, nevertheless perform + # SV compatibility check! + for new_sv_info in svg2childsvs[svg]: + if _is_compatible(sv_obj, new_sv_info): + res[sv].append(new_sv_info['id']) for sv2 in res[sv]: if sv2 == sv: continue reverse_map[sv2] = res[sv] - res_ordered = {sv: sorted(set(ext_svs)) for sv, ext_svs in res.items()} + + # Limit the number of extended SVs. + res_ordered = {} + for sv, ext_svs in res.items(): + res_ordered[sv] = limit_extended_svs( + sv, set(ext_svs), EXTENSION_SV_PRE_EXISTENCE_CHECK_LIMIT) return res_ordered diff --git a/server/lib/nl/detection/heuristic_classifiers.py b/server/lib/nl/detection/heuristic_classifiers.py index 650353a7af..6c6534bc62 100644 --- a/server/lib/nl/detection/heuristic_classifiers.py +++ b/server/lib/nl/detection/heuristic_classifiers.py @@ -305,13 +305,15 @@ def containedin(query: str) -> Union[NLClassifier, None]: query = query.lower() # Note again that place_type_to_enum is an OrderedDict. for place_type, place_enum in place_type_to_enum.items(): - if place_type in query: + # Match as a word so city won't match electricity. + if re.search(rf"\b{place_type}\b", query): + # if place_type in query: contained_in_place_type = place_enum break nospace_place_type = place_type.replace(' ', '') - if nospace_place_type in constants.PLACE_TYPE_TO_PLURALS and \ - constants.PLACE_TYPE_TO_PLURALS[nospace_place_type] in query: + plural = constants.PLACE_TYPE_TO_PLURALS.get(nospace_place_type) + if plural and re.search(rf"\b{plural}\b", query): contained_in_place_type = place_enum break diff --git a/server/lib/nl/fulfillment/base.py b/server/lib/nl/fulfillment/base.py index fabc04c1a1..16a5023feb 100644 --- a/server/lib/nl/fulfillment/base.py +++ b/server/lib/nl/fulfillment/base.py @@ -370,6 +370,9 @@ def _add_charts_for_extended_svs(state: PopulateState, places: List[Place], assert len(exist_state.chart_vars_list) == 1, f'{exist_state}' chart_vars = tracker.get_chart_vars(exist_state.chart_vars_list[0]) if len(chart_vars.svs) > 1: + chart_vars.svs = variable.limit_extended_svs( + exist_state.sv, set(chart_vars.svs), + variable.EXTENSION_SV_POST_EXISTENCE_CHECK_LIMIT) exist_svs_key = ''.join(sorted(chart_vars.svs)) if exist_svs_key in printed_sv_extensions: continue diff --git a/server/tests/lib/nl/heuristics_test.py b/server/tests/lib/nl/heuristics_test.py index b9ff4de274..3762e1f271 100644 --- a/server/tests/lib/nl/heuristics_test.py +++ b/server/tests/lib/nl/heuristics_test.py @@ -361,3 +361,16 @@ def test_no_false_positives(self, query): # If no matches, classifier returns None result = heuristic_classifiers.time_delta(query) self.assertIsNone(result) + + +# TODO: Need unit-tests for containedin. +class TestHeuristicContainedInClassifier(unittest.TestCase): + + @parameterized.expand([ + ("electricity in california"), + ("how tractable in poverty in santa clara?"), + ("how many housing contracts in san jose?"), + ("corporate publicities in utah"), + ]) + def test_no_false_positives(self, query): + self.assertIsNone(heuristic_classifiers.containedin(query)) diff --git a/shared/lib/constants.py b/shared/lib/constants.py index f2b81ef556..e7d21d6c26 100644 --- a/shared/lib/constants.py +++ b/shared/lib/constants.py @@ -349,7 +349,10 @@ "parish": "parishes", "city": "cities", "censustract": "census tracts", + "tract": "tracts", "censuszipcodetabulationarea": "census zip code tabulation areas", + "zip": "zips", + "zipcode": "zip codes", "town": "towns", "village": "villages", "censusdivision": "census divisions", diff --git a/static/js/apps/nl_interface/nl_commentary.tsx b/static/js/apps/nl_interface/nl_commentary.tsx index ff5c3cbd4f..3a65b1c23c 100644 --- a/static/js/apps/nl_interface/nl_commentary.tsx +++ b/static/js/apps/nl_interface/nl_commentary.tsx @@ -34,8 +34,8 @@ export function shouldHideCharts(respData: any): boolean { // we should hide. return ( ("origStr" in fb && "newStr" in fb) || - (respData["placeSource"] == "DEFAULT" && - respData["pastSourceContext"] != "Earth") + (respData["placeSource"] === "DEFAULT" && + respData["pastSourceContext"] !== "Earth") ); }