-
Notifications
You must be signed in to change notification settings - Fork 64
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/fix enrichment new catalogue #1083
Changes from 18 commits
81a8491
9c822d7
825bba1
acaea5a
8bb6da9
9e6491a
2137acb
1a30f24
55f392d
782ea31
c127691
ab9c05e
17f79a5
3796850
f5c71bc
ab02cdd
58df0b5
f7d54d0
b7da8ba
32b1545
23e50ea
fec43d9
b645461
752847d
57aee4b
41372bf
3155cf8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -86,7 +86,8 @@ def enrich_points(data, variables, data_geom_column='geometry', filters=dict(), | |
|
||
|
||
def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_variables, | ||
table_to_project, table_to_dataset, user_dataset, working_project, data_table, **kwargs): | ||
table_to_project, table_to_dataset, user_dataset, working_project, | ||
data_table, **kwargs): | ||
|
||
sqls = list() | ||
|
||
|
@@ -95,20 +96,19 @@ def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_v | |
sql = ''' | ||
SELECT data_table.{enrichment_id}, | ||
{variables}, | ||
ST_Area(enrichment_geo_table.geom) AS {variables_underscored}_area, | ||
NULL AS {variables_underscored}_population | ||
ST_Area(enrichment_geo_table.geom) AS {enrichment_table}_area | ||
FROM `{project}.{dataset}.{enrichment_table}` enrichment_table | ||
JOIN `{project}.{dataset}.{enrichment_geo_table}` enrichment_geo_table | ||
ON enrichment_table.geoid = enrichment_geo_table.geoid | ||
JOIN `{working_project}.{user_dataset}.{data_table}` data_table | ||
ON ST_Within(data_table.{data_geom_column}, enrichment_geo_table.geom) | ||
{filters}; | ||
'''.format(enrichment_id=enrichment_id, variables=', '.join(variables), | ||
variables_underscored='_'.join(variables), enrichment_table=table, | ||
enrichment_geo_table=table_to_geotable[table], user_dataset=user_dataset, | ||
working_project=working_project, data_table=data_table, | ||
'''.format(enrichment_id=enrichment_id, variables_underscored='_'.join(variables), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed unused variable from format method! |
||
enrichment_table=table, enrichment_geo_table=table_to_geotable[table], | ||
user_dataset=user_dataset, working_project=working_project, data_table=data_table, | ||
data_geom_column=kwargs['data_geom_column'], filters=filters_processed, | ||
project=table_to_project[table], dataset=table_to_dataset[table]) | ||
project=table_to_project[table], dataset=table_to_dataset[table], | ||
variables=', '.join(['enrichment_table.{}'.format(variable) for variable in variables])) | ||
|
||
sqls.append(sql) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -76,13 +76,12 @@ def test_enrichment_query_by_points_one_variable(self): | |
queries = _enrichment_queries(user_dataset, tablename, query_function, **kwargs) | ||
|
||
expected_queries = ['''SELECT data_table.enrichment_id, | ||
CRMCYBURG, | ||
ST_Area(enrichment_geo_table.geom) AS CRMCYBURG_area, | ||
NULL AS CRMCYBURG_population | ||
enrichment_table.CRMCYBURG, | ||
ST_Area(enrichment_geo_table.geom) AS view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018_area | ||
FROM `carto-do-customers.{user_dataset}\ | ||
.ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018` enrichment_table | ||
.view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018` enrichment_table | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we are trying to avoid using real names There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need real names because of functions are using the real catalog, so we need real examples of table names. Also, I cannot see any problems, because we are offering publicly this dataset through website and catalog |
||
JOIN `carto-do-customers.{user_dataset}\ | ||
.ags_geography_usa_blockgroup_2015` enrichment_geo_table | ||
.view_ags_geography_usa_blockgroup_2015` enrichment_geo_table | ||
ON enrichment_table.geoid = enrichment_geo_table.geoid | ||
JOIN `carto-do-customers.{user_dataset}.{tablename}` data_table | ||
ON ST_Within(data_table.{geometry_column}, enrichment_geo_table.geom) | ||
|
@@ -111,25 +110,23 @@ def test_enrichment_query_by_points_two_variables(self): | |
queries = _enrichment_queries(user_dataset, tablename, query_function, **kwargs) | ||
|
||
expected_queries = ['''SELECT data_table.enrichment_id, | ||
CRMCYBURG, | ||
ST_Area(enrichment_geo_table.geom) AS CRMCYBURG_area, | ||
NULL AS CRMCYBURG_population | ||
enrichment_table.CRMCYBURG, | ||
ST_Area(enrichment_geo_table.geom) AS view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018_area | ||
FROM `carto-do-customers.{user_dataset}\ | ||
.ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018` enrichment_table | ||
.view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018` enrichment_table | ||
JOIN `carto-do-customers.{user_dataset}\ | ||
.ags_geography_usa_blockgroup_2015` enrichment_geo_table | ||
.view_ags_geography_usa_blockgroup_2015` enrichment_geo_table | ||
ON enrichment_table.geoid = enrichment_geo_table.geoid | ||
JOIN `carto-do-customers.{user_dataset}.{tablename}` data_table | ||
ON ST_Within(data_table.{geometry_column}, enrichment_geo_table.geom) | ||
WHERE a='b';''', ''' | ||
SELECT data_table.enrichment_id, | ||
ticket_size_score, | ||
ST_Area(enrichment_geo_table.geom) AS ticket_size_score_area, | ||
NULL AS ticket_size_score_population | ||
enrichment_table.ticket_size_score, | ||
ST_Area(enrichment_geo_table.geom) AS view_mastercard_financial_mrli_usa_blockgroup_2019_monthly_2019_area | ||
FROM `carto-do-customers.{user_dataset}\ | ||
.mastercard_financial_mrli_usa_blockgroup_2019_monthly_2019` enrichment_table | ||
.view_mastercard_financial_mrli_usa_blockgroup_2019_monthly_2019` enrichment_table | ||
JOIN `carto-do-customers.{user_dataset}\ | ||
.mastercard_geography_usa_blockgroup_2019` enrichment_geo_table | ||
.view_mastercard_geography_usa_blockgroup_2019` enrichment_geo_table | ||
ON enrichment_table.geoid = enrichment_geo_table.geoid | ||
JOIN `carto-do-customers.{user_dataset}.{tablename}` data_table | ||
ON ST_Within(data_table.{geometry_column}, enrichment_geo_table.geom) | ||
|
@@ -158,12 +155,12 @@ def test_enrichment_query_by_polygons_one_variable(self): | |
|
||
queries = _enrichment_queries(user_dataset, tablename, query_function, **kwargs) | ||
|
||
expected_queries = ['''SELECT data_table.enrichment_id, avg(CRMCYBURG *\ | ||
expected_queries = ['''SELECT data_table.enrichment_id, avg(enrichment_table.CRMCYBURG *\ | ||
(ST_Area(ST_Intersection(enrichment_geo_table.geom, data_table.{geometry_column}))\ | ||
/ ST_area(data_table.{geometry_column}))) as CRMCYBURG | ||
FROM `carto-do-customers.{user_dataset}.ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018`\ | ||
FROM `carto-do-customers.{user_dataset}.view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018`\ | ||
enrichment_table | ||
JOIN `carto-do-customers.{user_dataset}.ags_geography_usa_blockgroup_2015` enrichment_geo_table | ||
JOIN `carto-do-customers.{user_dataset}.view_ags_geography_usa_blockgroup_2015` enrichment_geo_table | ||
ON enrichment_table.geoid = enrichment_geo_table.geoid | ||
JOIN `carto-do-customers.{user_dataset}.{tablename}` data_table | ||
ON ST_Intersects(data_table.{geometry_column}, enrichment_geo_table.geom) | ||
|
@@ -194,23 +191,23 @@ def test_enrichment_query_by_polygons_two_variables(self): | |
|
||
queries = _enrichment_queries(user_dataset, tablename, query_function, **kwargs) | ||
|
||
expected_queries = ['''SELECT data_table.enrichment_id, avg(CRMCYBURG *\ | ||
expected_queries = ['''SELECT data_table.enrichment_id, avg(enrichment_table.CRMCYBURG *\ | ||
(ST_Area(ST_Intersection(enrichment_geo_table.geom, data_table.{geometry_column}))\ | ||
/ ST_area(data_table.{geometry_column}))) as CRMCYBURG | ||
FROM `carto-do-customers.{user_dataset}.ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018`\ | ||
FROM `carto-do-customers.{user_dataset}.view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018`\ | ||
enrichment_table | ||
JOIN `carto-do-customers.{user_dataset}.ags_geography_usa_blockgroup_2015` enrichment_geo_table | ||
JOIN `carto-do-customers.{user_dataset}.view_ags_geography_usa_blockgroup_2015` enrichment_geo_table | ||
ON enrichment_table.geoid = enrichment_geo_table.geoid | ||
JOIN `carto-do-customers.{user_dataset}.{tablename}` data_table | ||
ON ST_Intersects(data_table.{geometry_column}, enrichment_geo_table.geom) | ||
WHERE a='b' | ||
group by data_table.enrichment_id;''', ''' | ||
SELECT data_table.enrichment_id, avg(ticket_size_score *\ | ||
SELECT data_table.enrichment_id, avg(enrichment_table.ticket_size_score *\ | ||
(ST_Area(ST_Intersection(enrichment_geo_table.geom, data_table.{geometry_column}))\ | ||
/ ST_area(data_table.{geometry_column}))) as ticket_size_score | ||
FROM `carto-do-customers.{user_dataset}.mastercard_financial_mrli_usa_blockgroup_2019_monthly_2019`\ | ||
FROM `carto-do-customers.{user_dataset}.view_mastercard_financial_mrli_usa_blockgroup_2019_monthly_2019`\ | ||
enrichment_table | ||
JOIN `carto-do-customers.{user_dataset}.mastercard_geography_usa_blockgroup_2019` enrichment_geo_table | ||
JOIN `carto-do-customers.{user_dataset}.view_mastercard_geography_usa_blockgroup_2019` enrichment_geo_table | ||
ON enrichment_table.geoid = enrichment_geo_table.geoid | ||
JOIN `carto-do-customers.{user_dataset}.{tablename}` data_table | ||
ON ST_Intersects(data_table.{geometry_column}, enrichment_geo_table.geom) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
__process_agg_operators
method should also take into account what happens if this argument is a string, as we're doing in theenrich_polygons
method. If it's a string, it throws'str' object has no attribute 'copy'
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed!