Skip to content

Commit

Permalink
[explore] proper filtering of NULLs and '' (apache#4651)
Browse files Browse the repository at this point in the history
* [WiP] [explore] proper filtering of NULLs and ''

TODO: handling of Druid equivalents

* Unit tests

* Some refactoring

* [druid] fix 'Unorderable types' when col has nuls

Error "unorderable types: str() < int()" occurs when grouping by a
numerical Druid colummn that contains null values.

* druid/pydruid returns strings in the datafram with NAs for nulls
* Superset has custom logic around get_fillna_for_col that fills in the
NULLs based on declared column type (FLOAT here), so now we have a mixed
bag of type in the series
* pandas chokes on pivot_table or groupby operations as it cannot sorts
mixed types

The approach here is to stringify and fillna('<NULL>') to get a
consistent series.

* typo

* Fix druid_func tests

* Addressing more comments

* last touches
  • Loading branch information
mistercrunch authored and timifasubaa committed May 31, 2018
1 parent e61e115 commit 274fc0b
Show file tree
Hide file tree
Showing 15 changed files with 153 additions and 57 deletions.
1 change: 1 addition & 0 deletions superset/assets/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
"moment": "^2.20.1",
"mousetrap": "^1.6.1",
"mustache": "^2.2.1",
"npm": "^5.7.1",
"nvd3": "1.8.6",
"object.entries": "^1.0.4",
"object.keys": "^0.1.0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,12 +225,13 @@ describe('FilterControl', () => {
wrapper.instance().fetchFilterValues(0, 'col1');
expect(wrapper.state().activeRequest).to.equal(spyReq);
// Sets active to null after success
$.ajax.getCall(0).args[0].success('choices');
$.ajax.getCall(0).args[0].success(['opt1', 'opt2', null, '']);
expect(wrapper.state().filters[0].valuesLoading).to.equal(false);
expect(wrapper.state().filters[0].valueChoices).to.equal('choices');
expect(wrapper.state().filters[0].valueChoices).to.deep.equal(['opt1', 'opt2', null, '']);
expect(wrapper.state().activeRequest).to.equal(null);
});


it('cancels active request if another is submitted', () => {
const spyReq = sinon.spy();
spyReq.abort = sinon.spy();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ describe('Filter', () => {
expect(wrapper.find(Select)).to.have.lengthOf(2);
expect(wrapper.find(Button)).to.have.lengthOf(1);
expect(wrapper.find(SelectControl)).to.have.lengthOf(1);
expect(wrapper.find('#select-op').prop('options')).to.have.lengthOf(8);
expect(wrapper.find('#select-op').prop('options')).to.have.lengthOf(10);
});

it('renders five op choices for table datasource', () => {
Expand All @@ -58,7 +58,7 @@ describe('Filter', () => {
filterable_cols: ['country_name'],
};
const druidWrapper = shallow(<Filter {...props} />);
expect(druidWrapper.find('#select-op').prop('options')).to.have.lengthOf(9);
expect(druidWrapper.find('#select-op').prop('options')).to.have.lengthOf(11);
});

it('renders six op choices for having filter', () => {
Expand Down
12 changes: 11 additions & 1 deletion superset/assets/spec/javascripts/utils/common_spec.jsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { it, describe } from 'mocha';
import { expect } from 'chai';
import { isTruthy } from '../../../src/utils/common';
import { isTruthy, optionFromValue } from '../../../src/utils/common';

describe('utils/common', () => {
describe('isTruthy', () => {
Expand Down Expand Up @@ -40,4 +40,14 @@ describe('utils/common', () => {
expect(isTruthy('false')).to.equal(false);
});
});
describe('optionFromValue', () => {
it('converts values as expected', () => {
expect(optionFromValue(false)).to.deep.equal({ value: false, label: '<false>' });
expect(optionFromValue(true)).to.deep.equal({ value: true, label: '<true>' });
expect(optionFromValue(null)).to.deep.equal({ value: '<NULL>', label: '<NULL>' });
expect(optionFromValue('')).to.deep.equal({ value: '', label: '<empty string>' });
expect(optionFromValue('foo')).to.deep.equal({ value: 'foo', label: 'foo' });
expect(optionFromValue(5)).to.deep.equal({ value: 5, label: '5' });
});
});
});
3 changes: 2 additions & 1 deletion superset/assets/src/SqlLab/actions.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import shortid from 'shortid';
import { now } from '../modules/dates';
import { t } from '../locales';
import { COMMON_ERR_MESSAGES } from '../common';

const $ = require('jquery');

Expand Down Expand Up @@ -163,7 +164,7 @@ export function runQuery(query) {
}
}
if (msg.indexOf('CSRF token') > 0) {
msg = t('Your session timed out, please refresh your page and try again.');
msg = COMMON_ERR_MESSAGES.SESSION_TIMED_OUT;
}
dispatch(queryFailed(query, msg));
},
Expand Down
9 changes: 7 additions & 2 deletions superset/assets/src/chart/chartAction.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { getExploreUrlAndPayload, getAnnotationJsonUrl } from '../explore/exploreUtils';
import { requiresQuery, ANNOTATION_SOURCE_TYPES } from '../modules/AnnotationTypes';
import { Logger, LOG_ACTIONS_LOAD_EVENT } from '../logger';
import { COMMON_ERR_MESSAGES } from '../common';

const $ = window.$ = require('jquery');

Expand Down Expand Up @@ -160,12 +161,16 @@ export function runQuery(formData, force = false, timeout = 60, key) {
errObject = err.responseJSON;
} else if (err.stack) {
errObject = {
error: 'Unexpected error: ' + err.description,
error: t('Unexpected error: ') + err.description,
stacktrace: err.stack,
};
} else if (err.responseText && err.responseText.indexOf('CSRF') >= 0) {
errObject = {
error: COMMON_ERR_MESSAGES.SESSION_TIMED_OUT,
};
} else {
errObject = {
error: 'Unexpected error.',
error: t('Unexpected error.'),
};
}
dispatch(chartUpdateFailed(errObject, key));
Expand Down
6 changes: 6 additions & 0 deletions superset/assets/src/common.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/* eslint-disable global-require */
import $ from 'jquery';
import { t } from './locales';

const utils = require('./modules/utils');

Expand Down Expand Up @@ -30,3 +31,8 @@ export function appSetup() {
window.jQuery = $;
require('bootstrap');
}

// Error messages used in many places across applications
export const COMMON_ERR_MESSAGES = {
SESSION_TIMED_OUT: t('Your session timed out, please refresh your page and try again.'),
};
2 changes: 1 addition & 1 deletion superset/assets/src/components/AlteredSliceTag.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ export default class AlteredSliceTag extends React.Component {
return '[]';
}
return value.map((v) => {
const filterVal = v.val.constructor === Array ? `[${v.val.join(', ')}]` : v.val;
const filterVal = v.val && v.val.constructor === Array ? `[${v.val.join(', ')}]` : v.val;
return `${v.col} ${v.op} ${filterVal}`;
}).join(', ');
} else if (controls[key] && controls[key].type === 'BoundsControl') {
Expand Down
8 changes: 7 additions & 1 deletion superset/assets/src/explore/components/controls/Filter.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ import React from 'react';
import PropTypes from 'prop-types';
import Select from 'react-select';
import { Button, Row, Col } from 'react-bootstrap';
import SelectControl from './SelectControl';
import { t } from '../../../locales';
import SelectControl from './SelectControl';

const operatorsArr = [
{ val: 'in', type: 'array', useSelect: true, multi: true },
Expand All @@ -16,6 +16,8 @@ const operatorsArr = [
{ val: '<', type: 'string', havingOnly: true },
{ val: 'regex', type: 'string', datasourceTypes: ['druid'] },
{ val: 'LIKE', type: 'string', datasourceTypes: ['table'] },
{ val: 'IS NULL', type: null },
{ val: 'IS NOT NULL', type: null },
];
const operators = {};
operatorsArr.forEach((op) => {
Expand Down Expand Up @@ -90,6 +92,10 @@ export default class Filter extends React.Component {

renderFilterFormControl(filter) {
const operator = operators[filter.op];
if (operator.type === null) {
// IS NULL or IS NOT NULL
return null;
}
if (operator.useSelect && !this.props.having) {
// TODO should use a simple Select, not a control here...
return (
Expand Down
27 changes: 27 additions & 0 deletions superset/assets/src/utils/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,30 @@ export function isTruthy(obj) {
}
return !!obj;
}

export function optionLabel(opt) {
if (opt === null) {
return '<NULL>';
} else if (opt === '') {
return '<empty string>';
} else if (opt === true) {
return '<true>';
} else if (opt === false) {
return '<false>';
} else if (typeof opt !== 'string' && opt.toString) {
return opt.toString();
}
return opt;
}

export function optionValue(opt) {
if (opt === null) {
return '<NULL>';
}
return opt;
}

export function optionFromValue(opt) {
// From a list of options, handles special values & labels
return { value: optionValue(opt), label: optionLabel(opt) };
}
30 changes: 30 additions & 0 deletions superset/connectors/base/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import json

from past.builtins import basestring
from sqlalchemy import (
and_, Boolean, Column, Integer, String, Text,
)
Expand Down Expand Up @@ -185,6 +186,35 @@ def data(self):
'verbose_map': verbose_map,
}

@staticmethod
def filter_values_handler(
values, target_column_is_numeric=False, is_list_target=False):
def handle_single_value(v):
# backward compatibility with previous <select> components
if isinstance(v, basestring):
v = v.strip('\t\n \'"')
if target_column_is_numeric:
# For backwards compatibility and edge cases
# where a column data type might have changed
v = utils.string_to_num(v)
if v == '<NULL>':
return None
elif v == '<empty string>':
return ''
return v
if isinstance(values, (list, tuple)):
values = [handle_single_value(v) for v in values]
else:
values = handle_single_value(values)
if is_list_target and not isinstance(values, (tuple, list)):
values = [values]
elif not is_list_target and isinstance(values, (tuple, list)):
if len(values) > 0:
values = values[0]
else:
values = None
return values

def get_query_str(self, query_obj):
"""Returns a query as a string
Expand Down
60 changes: 35 additions & 25 deletions superset/connectors/druid/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1277,13 +1277,30 @@ def run_query( # noqa / druid
client.query_builder.last_query.query_dict, indent=2)
return query_str

@staticmethod
def homogenize_types(df, groupby_cols):
"""Converting all GROUPBY columns to strings
When grouping by a numeric (say FLOAT) column, pydruid returns
strings in the dataframe. This creates issues downstream related
to having mixed types in the dataframe
Here we replace None with <NULL> and make the whole series a
str instead of an object.
"""
for col in groupby_cols:
df[col] = df[col].fillna('<NULL>').astype(str)
return df

def query(self, query_obj):
qry_start_dttm = datetime.now()
client = self.cluster.get_pydruid_client()
query_str = self.get_query_str(
client=client, query_obj=query_obj, phase=2)
df = client.export_pandas()

df = self.homogenize_types(df, query_obj.get('groupby', []))

if df is None or df.size == 0:
raise Exception(_('No data was returned.'))
df.columns = [
Expand Down Expand Up @@ -1322,40 +1339,31 @@ def increment_timestamp(ts):
query=query_str,
duration=datetime.now() - qry_start_dttm)

@staticmethod
def get_filters(raw_filters, num_cols): # noqa
@classmethod
def get_filters(cls, raw_filters, num_cols): # noqa
"""Given Superset filter data structure, returns pydruid Filter(s)"""
filters = None
for flt in raw_filters:
if not all(f in flt for f in ['col', 'op', 'val']):
col = flt.get('col')
op = flt.get('op')
eq = flt.get('val')
if (
not col or
not op or
(eq is None and op not in ('IS NULL', 'IS NOT NULL'))):
continue

col = flt['col']
op = flt['op']
eq = flt['val']
cond = None
if op in ('in', 'not in'):
eq = [
types.replace('"', '').strip()
if isinstance(types, string_types)
else types
for types in eq]
elif not isinstance(flt['val'], string_types):
eq = eq[0] if eq and len(eq) > 0 else ''

is_numeric_col = col in num_cols
if is_numeric_col:
if op in ('in', 'not in'):
eq = [utils.string_to_num(v) for v in eq]
else:
eq = utils.string_to_num(eq)

is_list_target = op in ('in', 'not in')
eq = cls.filter_values_handler(
eq, is_list_target=is_list_target,
target_column_is_numeric=is_numeric_col)
if op == '==':
cond = Dimension(col) == eq
elif op == '!=':
cond = Dimension(col) != eq
elif op in ('in', 'not in'):
fields = []

# ignore the filter if it has no value
if not len(eq):
continue
Expand All @@ -1365,10 +1373,8 @@ def get_filters(raw_filters, num_cols): # noqa
for s in eq:
fields.append(Dimension(col) == s)
cond = Filter(type='or', fields=fields)

if op == 'not in':
cond = ~cond

elif op == 'regex':
cond = Filter(type='regex', pattern=eq, dimension=col)
elif op == '>=':
Expand All @@ -1385,6 +1391,10 @@ def get_filters(raw_filters, num_cols): # noqa
col, None, eq,
upperStrict=True, alphaNumeric=is_numeric_col,
)
elif op == 'IS NULL':
cond = Dimension(col) == None # NOQA
elif op == 'IS NOT NULL':
cond = Dimension(col) != None # NOQA

if filters:
filters = Filter(type='and', fields=[
Expand Down
Loading

0 comments on commit 274fc0b

Please sign in to comment.