Skip to content

Commit

Permalink
SQL: Enable aggregations to create a separate bucket for missing valu…
Browse files Browse the repository at this point in the history
…es (#32832)

Enable aggregations to create a separate bucket for missing values.
  • Loading branch information
astefan committed Aug 27, 2018
1 parent c41c614 commit 3d9ca4b
Show file tree
Hide file tree
Showing 9 changed files with 154 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ public GroupByColumnKey(String id, String fieldName, Direction direction) {
public TermsValuesSourceBuilder asValueSource() {
return new TermsValuesSourceBuilder(id())
.field(fieldName())
.order(direction().asOrder());
.order(direction().asOrder())
.missingBucket(true);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ public DateHistogramValuesSourceBuilder asValueSource() {
return new DateHistogramValuesSourceBuilder(id())
.field(fieldName())
.dateHistogramInterval(new DateHistogramInterval(interval))
.timeZone(DateTimeZone.forTimeZone(timeZone));
.timeZone(DateTimeZone.forTimeZone(timeZone))
.missingBucket(true);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ public ScriptTemplate script() {
public TermsValuesSourceBuilder asValueSource() {
TermsValuesSourceBuilder builder = new TermsValuesSourceBuilder(id())
.script(script.toPainless())
.order(direction().asOrder());
.order(direction().asOrder())
.missingBucket(true);

if (script.outputType().isNumeric()) {
builder.valueType(ValueType.NUMBER);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,15 @@ protected static void loadDatasetIntoEs(RestClient client) throws Exception {
}

protected static void loadEmpDatasetIntoEs(RestClient client) throws Exception {
loadEmpDatasetIntoEs(client, "test_emp");
loadEmpDatasetIntoEs(client, "test_emp_copy");
loadEmpDatasetIntoEs(client, "test_emp", "employees");
loadEmpDatasetIntoEs(client, "test_emp_copy", "employees");
loadEmpDatasetIntoEs(client, "test_emp_with_nulls", "employees_with_nulls");
makeAlias(client, "test_alias", "test_emp", "test_emp_copy");
makeAlias(client, "test_alias_emp", "test_emp", "test_emp_copy");
}

public static void loadDocsDatasetIntoEs(RestClient client) throws Exception {
loadEmpDatasetIntoEs(client, "emp");
loadEmpDatasetIntoEs(client, "emp", "employees");
loadLibDatasetIntoEs(client, "library");
makeAlias(client, "employees", "emp");
}
Expand All @@ -62,7 +63,7 @@ private static void createString(String name, XContentBuilder builder) throws Ex
.endObject();
}

protected static void loadEmpDatasetIntoEs(RestClient client, String index) throws Exception {
protected static void loadEmpDatasetIntoEs(RestClient client, String index, String fileName) throws Exception {
Request request = new Request("PUT", "/" + index);
XContentBuilder createIndex = JsonXContent.contentBuilder().startObject();
createIndex.startObject("settings");
Expand Down Expand Up @@ -129,15 +130,18 @@ protected static void loadEmpDatasetIntoEs(RestClient client, String index) thro
request = new Request("POST", "/" + index + "/emp/_bulk");
request.addParameter("refresh", "true");
StringBuilder bulk = new StringBuilder();
csvToLines("employees", (titles, fields) -> {
csvToLines(fileName, (titles, fields) -> {
bulk.append("{\"index\":{}}\n");
bulk.append('{');
String emp_no = fields.get(1);
for (int f = 0; f < fields.size(); f++) {
if (f != 0) {
bulk.append(',');
// an empty value in the csv file is treated as 'null', thus skipping it in the bulk request
if (fields.get(f).trim().length() > 0) {
if (f != 0) {
bulk.append(',');
}
bulk.append('"').append(titles.get(f)).append("\":\"").append(fields.get(f)).append('"');
}
bulk.append('"').append(titles.get(f)).append("\":\"").append(fields.get(f)).append('"');
}
// append department
List<List<String>> list = dep_emp.get(emp_no);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ public abstract class SqlSpecTestCase extends SpecBaseIntegrationTestCase {
private String query;

@ClassRule
public static LocalH2 H2 = new LocalH2((c) -> c.createStatement().execute("RUNSCRIPT FROM 'classpath:/setup_test_emp.sql'"));
public static LocalH2 H2 = new LocalH2((c) -> {
c.createStatement().execute("RUNSCRIPT FROM 'classpath:/setup_test_emp.sql'");
c.createStatement().execute("RUNSCRIPT FROM 'classpath:/setup_test_emp_with_nulls.sql'");
});

@ParametersFactory(argumentFormatting = PARAM_FORMATTING)
public static List<Object[]> readScriptSpec() throws Exception {
Expand All @@ -39,6 +42,7 @@ public static List<Object[]> readScriptSpec() throws Exception {
tests.addAll(readScriptSpec("/arithmetic.sql-spec", parser));
tests.addAll(readScriptSpec("/string-functions.sql-spec", parser));
tests.addAll(readScriptSpec("/case-functions.sql-spec", parser));
tests.addAll(readScriptSpec("/agg_nulls.sql-spec", parser));
return tests;
}

Expand Down
14 changes: 14 additions & 0 deletions x-pack/qa/sql/src/main/resources/agg_nulls.sql-spec
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
selectGenderWithNullsAndGroupByGender
SELECT gender, COUNT(*) count FROM test_emp_with_nulls GROUP BY gender ORDER BY gender;
selectFirstNameWithNullsAndGroupByFirstName
SELECT first_name FROM test_emp_with_nulls GROUP BY first_name ORDER BY first_name;
selectCountWhereIsNull
SELECT COUNT(*) count FROM test_emp_with_nulls WHERE first_name IS NULL;
selectLanguagesCountWithNullsAndGroupByLanguage
SELECT languages l, COUNT(*) c FROM test_emp_with_nulls GROUP BY languages ORDER BY languages;
selectHireDateGroupByHireDate
SELECT hire_date HD, COUNT(*) c FROM test_emp_with_nulls GROUP BY hire_date ORDER BY hire_date DESC;
selectHireDateGroupByHireDate
SELECT hire_date HD, COUNT(*) c FROM test_emp_with_nulls GROUP BY hire_date ORDER BY hire_date DESC;
selectSalaryGroupBySalary
SELECT salary, COUNT(*) c FROM test_emp_with_nulls GROUP BY salary ORDER BY salary DESC;
7 changes: 4 additions & 3 deletions x-pack/qa/sql/src/main/resources/alias.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ test_alias | ALIAS
test_alias_emp | ALIAS
test_emp | BASE TABLE
test_emp_copy | BASE TABLE
test_emp_with_nulls | BASE TABLE
;

testGroupByOnAlias
Expand All @@ -98,10 +99,10 @@ F | 10099.28
;

testGroupByOnPattern
SELECT gender, PERCENTILE(emp_no, 97) p1 FROM test_* GROUP BY gender;
SELECT gender, PERCENTILE(emp_no, 97) p1 FROM test_* WHERE gender is NOT NULL GROUP BY gender;

gender:s | p1:d

F | 10099.28
M | 10095.75
F | 10099.32
M | 10095.98
;
101 changes: 101 additions & 0 deletions x-pack/qa/sql/src/main/resources/employees_with_nulls.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
birth_date,emp_no,first_name,gender,hire_date,languages,last_name,salary
1953-09-02T00:00:00Z,10001,Georgi,,1986-06-26T00:00:00Z,2,Facello,57305
1964-06-02T00:00:00Z,10002,Bezalel,,1985-11-21T00:00:00Z,5,Simmel,56371
1959-12-03T00:00:00Z,10003,Parto,,1986-08-28T00:00:00Z,4,Bamford,61805
1954-05-01T00:00:00Z,10004,Chirstian,,1986-12-01T00:00:00Z,5,Koblick,36174
1955-01-21T00:00:00Z,10005,Kyoichi,,1989-09-12T00:00:00Z,1,Maliniak,63528
1953-04-20T00:00:00Z,10006,Anneke,,1989-06-02T00:00:00Z,3,Preusig,60335
1957-05-23T00:00:00Z,10007,Tzvetan,,1989-02-10T00:00:00Z,4,Zielinski,74572
1958-02-19T00:00:00Z,10008,Saniya,,1994-09-15T00:00:00Z,2,Kalloufi,43906
1952-04-19T00:00:00Z,10009,Sumant,,1985-02-18T00:00:00Z,1,Peac,66174
1963-06-01T00:00:00Z,10010,Duangkaew,,1989-08-24T00:00:00Z,4,Piveteau,45797
1953-11-07T00:00:00Z,10011,Mary,F,1990-01-22T00:00:00Z,5,Sluis,31120
1960-10-04T00:00:00Z,10012,Patricio,M,1992-12-18T00:00:00Z,5,Bridgland,48942
1963-06-07T00:00:00Z,10013,Eberhardt,M,1985-10-20T00:00:00Z,1,Terkki,48735
1956-02-12T00:00:00Z,10014,Berni,M,1987-03-11T00:00:00Z,5,Genin,37137
1959-08-19T00:00:00Z,10015,Guoxiang,M,1987-07-02T00:00:00Z,5,Nooteboom,25324
1961-05-02T00:00:00Z,10016,Kazuhito,M,1995-01-27T00:00:00Z,2,Cappelletti,61358
1958-07-06T00:00:00Z,10017,Cristinel,F,1993-08-03T00:00:00Z,2,Bouloucos,58715
1954-06-19T00:00:00Z,10018,Kazuhide,F,1993-08-03T00:00:00Z,2,Peha,56760
1953-01-23T00:00:00Z,10019,Lillian,M,1993-08-03T00:00:00Z,1,Haddadi,73717
1952-12-24T00:00:00Z,10020,,M,1991-01-26T00:00:00Z,3,Warwick,40031
1960-02-20T00:00:00Z,10021,,M,1989-12-17T00:00:00Z,5,Erde,60408
1952-07-08T00:00:00Z,10022,,M,1995-08-22T00:00:00Z,3,Famili,48233
1953-09-29T00:00:00Z,10023,,F,1989-12-17T00:00:00Z,2,Montemayor,47896
1958-09-05T00:00:00Z,10024,,F,1997-05-19T00:00:00Z,3,Pettey,64675
1958-10-31T00:00:00Z,10025,Prasadram,M,1987-08-17T00:00:00Z,5,Heyers,47411
1953-04-03T00:00:00Z,10026,Yongqiao,M,1995-03-20T00:00:00Z,3,Berztiss,28336
1962-07-10T00:00:00Z,10027,Divier,F,1989-07-07T00:00:00Z,5,Reistad,73851
1963-11-26T00:00:00Z,10028,Domenick,M,1991-10-22T00:00:00Z,1,Tempesti,39356
1956-12-13T00:00:00Z,10029,Otmar,M,1985-11-20T00:00:00Z,,Herbst,74999
1958-07-14T00:00:00Z,10030,Elvis,M,1994-02-17T00:00:00Z,,Demeyer,67492
1959-01-27T00:00:00Z,10031,Karsten,M,1994-02-17T00:00:00Z,,Joslin,37716
1960-08-09T00:00:00Z,10032,Jeong,F,1990-06-20T00:00:00Z,,Reistad,62233
1956-11-14T00:00:00Z,10033,Arif,M,1987-03-18T00:00:00Z,,Merlo,70011
1962-12-29T00:00:00Z,10034,Bader,M,1988-09-05T00:00:00Z,,Swan,39878
1953-02-08T00:00:00Z,10035,Alain,M,1988-09-05T00:00:00Z,,Chappelet,25945
1959-08-10T00:00:00Z,10036,Adamantios,M,1992-01-03T00:00:00Z,,Portugali,60781
1963-07-22T00:00:00Z,10037,Pradeep,M,1990-12-05T00:00:00Z,,Makrucki,37691
1960-07-20T00:00:00Z,10038,Huan,M,1989-09-20T00:00:00Z,,Lortz,35222
1959-10-01T00:00:00Z,10039,Alejandro,M,1988-01-19T00:00:00Z,,Brender,36051
1959-09-13T00:00:00Z,10040,Weiyi,F,1993-02-14T00:00:00Z,,Meriste,37112
1959-08-27T00:00:00Z,10041,Uri,F,1989-11-12T00:00:00Z,1,Lenart,56415
1956-02-26T00:00:00Z,10042,Magy,F,1993-03-21T00:00:00Z,3,Stamatiou,30404
1960-09-19T00:00:00Z,10043,Yishay,M,1990-10-20T00:00:00Z,1,Tzvieli,34341
1961-09-21T00:00:00Z,10044,Mingsen,F,1994-05-21T00:00:00Z,1,Casley,39728
1957-08-14T00:00:00Z,10045,Moss,M,1989-09-02T00:00:00Z,3,Shanbhogue,74970
1960-07-23T00:00:00Z,10046,Lucien,M,1992-06-20T00:00:00Z,4,Rosenbaum,50064
1952-06-29T00:00:00Z,10047,Zvonko,M,1989-03-31T00:00:00Z,4,Nyanchama,42716
1963-07-11T00:00:00Z,10048,Florian,M,1985-02-24T00:00:00Z,3,Syrotiuk,26436
1961-04-24T00:00:00Z,10049,Basil,F,1992-05-04T00:00:00Z,5,Tramer,37853
1958-05-21T00:00:00Z,10050,Yinghua,M,1990-12-25T00:00:00Z,2,Dredge,43026
1953-07-28T00:00:00Z,10051,Hidefumi,M,1992-10-15T00:00:00Z,3,Caine,58121
1961-02-26T00:00:00Z,10052,Heping,M,1988-05-21T00:00:00Z,1,Nitsch,55360
1954-09-13T00:00:00Z,10053,Sanjiv,F,1986-02-04T00:00:00Z,3,Zschoche,54462
1957-04-04T00:00:00Z,10054,Mayumi,M,1995-03-13T00:00:00Z,4,Schueller,65367
1956-06-06T00:00:00Z,10055,Georgy,M,1992-04-27T00:00:00Z,5,Dredge,49281
1961-09-01T00:00:00Z,10056,Brendon,F,1990-02-01T00:00:00Z,2,Bernini,33370
1954-05-30T00:00:00Z,10057,Ebbe,F,1992-01-15T00:00:00Z,4,Callaway,27215
1954-10-01T00:00:00Z,10058,Berhard,M,1987-04-13T00:00:00Z,3,McFarlin,38376
1953-09-19T00:00:00Z,10059,Alejandro,F,1991-06-26T00:00:00Z,2,McAlpine,44307
1961-10-15T00:00:00Z,10060,Breannda,M,1987-11-02T00:00:00Z,2,Billingsley,29175
1962-10-19T00:00:00Z,10061,Tse,M,1985-09-17T00:00:00Z,1,Herber,49095
1961-11-02T00:00:00Z,10062,Anoosh,M,1991-08-30T00:00:00Z,3,Peyn,65030
1952-08-06T00:00:00Z,10063,Gino,F,1989-04-08T00:00:00Z,3,Leonhardt,52121
1959-04-07T00:00:00Z,10064,Udi,M,1985-11-20T00:00:00Z,5,Jansch,33956
1963-04-14T00:00:00Z,10065,Satosi,M,1988-05-18T00:00:00Z,2,Awdeh,50249
1952-11-13T00:00:00Z,10066,Kwee,M,1986-02-26T00:00:00Z,5,Schusler,31897
1953-01-07T00:00:00Z,10067,Claudi,M,1987-03-04T00:00:00Z,2,Stavenow,52044
1962-11-26T00:00:00Z,10068,Charlene,M,1987-08-07T00:00:00Z,3,Brattka,28941
1960-09-06T00:00:00Z,10069,Margareta,F,1989-11-05T00:00:00Z,5,Bierman,41933
1955-08-20T00:00:00Z,10070,Reuven,M,1985-10-14T00:00:00Z,3,Garigliano,54329
1958-01-21T00:00:00Z,10071,Hisao,M,1987-10-01T00:00:00Z,2,Lipner,40612
1952-05-15T00:00:00Z,10072,Hironoby,F,1988-07-21T00:00:00Z,5,Sidou,54518
1954-02-23T00:00:00Z,10073,Shir,M,1991-12-01T00:00:00Z,4,McClurg,32568
1955-08-28T00:00:00Z,10074,Mokhtar,F,1990-08-13T00:00:00Z,5,Bernatsky,38992
1960-03-09T00:00:00Z,10075,Gao,F,1987-03-19T00:00:00Z,5,Dolinsky,51956
1952-06-13T00:00:00Z,10076,Erez,F,1985-07-09T00:00:00Z,3,Ritzmann,62405
1964-04-18T00:00:00Z,10077,Mona,M,1990-03-02T00:00:00Z,5,Azuma,46595
1959-12-25T00:00:00Z,10078,Danel,F,1987-05-26T00:00:00Z,2,Mondadori,69904
1961-10-05T00:00:00Z,10079,Kshitij,F,1986-03-27T00:00:00Z,2,Gils,32263
1957-12-03T00:00:00Z,10080,Premal,M,1985-11-19T00:00:00Z,5,Baek,52833
1960-12-17T00:00:00Z,10081,Zhongwei,M,1986-10-30T00:00:00Z,2,Rosen,50128
1963-09-09T00:00:00Z,10082,Parviz,M,1990-01-03T00:00:00Z,4,Lortz,49818
1959-07-23T00:00:00Z,10083,Vishv,M,1987-03-31T00:00:00Z,1,Zockler,
1960-05-25T00:00:00Z,10084,Tuval,M,1995-12-15T00:00:00Z,1,Kalloufi,
1962-11-07T00:00:00Z,10085,Kenroku,M,1994-04-09T00:00:00Z,5,Malabarba,
1962-11-19T00:00:00Z,10086,Somnath,M,1990-02-16T00:00:00Z,1,Foote,
1959-07-23T00:00:00Z,10087,Xinglin,F,1986-09-08T00:00:00Z,5,Eugenio,
1954-02-25T00:00:00Z,10088,Jungsoon,F,1988-09-02T00:00:00Z,5,Syrzycki,
1963-03-21T00:00:00Z,10089,Sudharsan,F,1986-08-12T00:00:00Z,4,Flasterstein,
1961-05-30T00:00:00Z,10090,Kendra,M,1986-03-14T00:00:00Z,2,Hofting,44956
1955-10-04T00:00:00Z,10091,Amabile,M,1992-11-18T00:00:00Z,3,Gomatam,38645
1964-10-18T00:00:00Z,10092,Valdiodio,F,1989-09-22T00:00:00Z,1,Niizuma,25976
1964-06-11T00:00:00Z,10093,Sailaja,M,1996-11-05T00:00:00Z,3,Desikan,45656
1957-05-25T00:00:00Z,10094,Arumugam,F,1987-04-18T00:00:00Z,5,Ossenbruggen,66817
1965-01-03T00:00:00Z,10095,Hilari,M,1986-07-15T00:00:00Z,4,Morton,37702
1954-09-16T00:00:00Z,10096,Jayson,M,1990-01-14T00:00:00Z,4,Mandell,43889
1952-02-27T00:00:00Z,10097,Remzi,M,1990-09-15T00:00:00Z,3,Waschkowski,71165
1961-09-23T00:00:00Z,10098,Sreekrishna,F,1985-05-13T00:00:00Z,4,Servieres,44817
1956-05-25T00:00:00Z,10099,Valter,F,1988-10-18T00:00:00Z,2,Sullins,73578
1953-04-21T00:00:00Z,10100,Hironobu,F,1987-09-21T00:00:00Z,4,Haraldson,68431
12 changes: 12 additions & 0 deletions x-pack/qa/sql/src/main/resources/setup_test_emp_with_nulls.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
DROP TABLE IF EXISTS "test_emp_with_nulls";
CREATE TABLE "test_emp_with_nulls" (
"birth_date" TIMESTAMP WITH TIME ZONE,
"emp_no" INT,
"first_name" VARCHAR(50),
"gender" VARCHAR(1),
"hire_date" TIMESTAMP WITH TIME ZONE,
"languages" TINYINT,
"last_name" VARCHAR(50),
"salary" INT
)
AS SELECT * FROM CSVREAD('classpath:/employees_with_nulls.csv');

0 comments on commit 3d9ca4b

Please sign in to comment.