Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for arrays being in separated rows or returned as null #213

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,16 @@

import static org.opensearch.sql.utils.ExpressionUtils.PATH_SEP;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import org.opensearch.sql.data.model.ExprTupleValue;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.model.ExprValueUtils;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.expression.env.Environment;

Expand Down Expand Up @@ -100,6 +103,16 @@ public ExprValue resolve(ExprTupleValue value) {
}

private ExprValue resolve(ExprValue value, List<String> paths) {
// This case is to allow returning all values in an array to be in one row
if (value.type().equals(ExprCoreType.ARRAY)){
ExprValue result = ExprValueUtils.collectionValue(new ArrayList<>());

for (ExprValue val: value.collectionValue()){
result.collectionValue().add(resolve(val, paths));
}
return result;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can do this with single-liner like

return new ExprCollectionValue(value.collectionValue().stream()
      .map(val -> resolve(val, paths)).collect(Collectors.toList()))

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! Fixed in a48587d

}

final ExprValue wholePathValue = value.keyValue(String.join(PATH_SEP, paths));
if (!wholePathValue.isMissing() || paths.size() == 1) {
return wholePathValue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,22 +85,6 @@ public void testSelectNestedFieldItself() {
);
}

@Test

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can fix these rather than deleting them.

verifyDataRows(response, rows(new JSONArray(List.of(new JSONObject("{\"id\":1}"), new JSONObject("{\"id\":2}")))));
verifyDataRows(response, rows(new JSONArray(List.of(1,2))));

public void testSelectObjectFieldOfArrayValuesItself() {
JSONObject response = new JSONObject(query("SELECT accounts FROM %s"));

// Only the first element of the list of is returned.
verifyDataRows(response, rows(new JSONObject("{\"id\": 1}")));
}

@Test
public void testSelectObjectFieldOfArrayValuesInnerFields() {
JSONObject response = new JSONObject(query("SELECT accounts.id FROM %s"));

// Only the first element of the list of is returned.
verifyDataRows(response, rows(1));
}

private String query(String sql) {
return executeQuery(
StringUtils.format(sql, TEST_INDEX_DEEP_NESTED),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -587,7 +587,15 @@ public enum Index {
"src/test/resources/calcs.json"),
MULTI_NESTED(TestsConstants.TEST_INDEX_MULTI_NESTED,
"multi_nested",
getMappingFile("indexDefinitions/multi_nested.json"),
getMappingFile("multi_nested.json"),
"src/test/resources/multi_nested_objects.json"),
NESTED_OBJECT(TestsConstants.TEST_INDEX_NESTED_OBJECT,
"nested_object",
null,
"src/test/resources/nested_objects.json"),
MULTI_NESTED_OBJECT(TestsConstants.TEST_INDEX_MULTI_NESTED_OBJECT,
"multi_nested_object",
null,
"src/test/resources/multi_nested_objects.json");

private final String name;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ public class TestsConstants {
public final static String TEST_INDEX_BEER = TEST_INDEX + "_beer";
public final static String TEST_INDEX_NULL_MISSING = TEST_INDEX + "_null_missing";
public final static String TEST_INDEX_CALCS = TEST_INDEX + "_calcs";
public final static String TEST_INDEX_MULTI_NESTED= TEST_INDEX + "_multi_nested";
public final static String TEST_INDEX_MULTI_NESTED = TEST_INDEX + "_multi_nested";
public final static String TEST_INDEX_NESTED_OBJECT = TEST_INDEX + "_nested_object";
public final static String TEST_INDEX_MULTI_NESTED_OBJECT = TEST_INDEX + "_multi_nested_object";

public final static String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
public final static String TS_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS";
Expand Down
32 changes: 30 additions & 2 deletions integ-test/src/test/java/org/opensearch/sql/sql/NestedIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,52 @@

package org.opensearch.sql.sql;

import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_MULTI_NESTED;
import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;

import org.json.JSONArray;
import org.json.JSONObject;
import org.junit.Test;
import org.opensearch.sql.legacy.SQLIntegTestCase;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this space between the imports pass checkstyle?

import java.io.IOException;
import java.util.List;

import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_TYPE;

public class NestedIT extends SQLIntegTestCase {
@Override
public void init() throws IOException {
loadIndex(Index.NESTED);
loadIndex(Index.MULTI_NESTED);
}

@Test
public void nested_function_with_array_of_nested_field_test() {
String query = "SELECT nested(message.info), nested(comment.data) FROM " + TEST_INDEX_NESTED_TYPE;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Plugin response is

{
  "schema": [
    {
      "name": "nested(message.info)",
      "type": "keyword"
    },
    {
      "name": "nested(comment.data)",
      "type": "keyword"
    }
  ],
  "total": 5,
  "datarows": [
    [
      "a",
      "ab"
    ],
    [
      "b",
      "aa"
    ],
    [
      "c",
      "aa"
    ],
    [
      [
        "c",
        "a"
      ],
      "ab"
    ],
    [
      [
        "zz"
      ],
      [
        "aa",
        "bb"
      ]
    ]
  ],
  "size": 5,
  "status": 200
}

It could be confusing when datatype is keyword, but values are arrays and keywords. Imagine a user has a parser for response, what should parser do with such values?
You can try our JDBC driver as an example.

I think we should return all arrays and value type should be array if we work with value which may be an array.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe our solution is to split the nested function for JDBC or other queries that have strict typing (really, any SQL language). For now, we should make sure that the JDBC and ODBC drivers should not output any undesired typing and break.

To ensure that typings aren't broken, we could introduce partiql like syntax for nested that would make sure that only strict typing is followed on output. For example:

SELECT n.name, m1.info AS info_keyword, m2.info AS info_array FROM nested as n, n.message AS m1 IS keyword, n.message as m2 IS array

SELECT n.name, CASE(m.info IS keyword) THEN m.info END AS info_keyword, CASE(m.info IS ARRAY) THEN m.info END AS info_array FROM nested as n, n.message AS m

For now, this can be marked as 'out of scope' as the user should be responsible for proper data setup and typing.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree. It is a new feature reported in #1300.

JSONObject result = executeJdbcRequest(query);

assertEquals(5, result.getInt("total"));
verifyDataRows(result,
rows("a", "ab"),
rows("b", "aa"),
rows("c", "aa"),
rows(new JSONArray(List.of("c","a")), "ab"),
rows(new JSONArray(List.of("zz")), new JSONArray(List.of("aa", "bb"))));
}

@Test
public void nested_string_subfield_test() {
String query = "SELECT nested(message.dayOfWeek) FROM " + TEST_INDEX_NESTED_TYPE;
public void nested_function_with_array_of_multi_nested_field_test() {
String query = "SELECT nested(message.author.name) FROM " + TEST_INDEX_MULTI_NESTED;
JSONObject result = executeJdbcRequest(query);

assertEquals(5, result.getInt("total"));
verifyDataRows(result,
rows("e"),
rows("f"),
rows("g"),
rows(new JSONArray(List.of("h", "p"))),
rows(new JSONArray(List.of("yy"))));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package org.opensearch.sql.sql;

import org.json.JSONArray;
import org.json.JSONObject;
import org.junit.Test;
import org.opensearch.sql.legacy.SQLIntegTestCase;

import java.io.IOException;
import java.util.List;
Comment on lines +3 to +9

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do these imports pass checkstyle with the extra space and them out of alphabetical order?


import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_MULTI_NESTED;
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_TYPE;
import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;

public class NestedPartiQLIT extends SQLIntegTestCase {
@Override
public void init() throws IOException {
loadIndex(Index.NESTED);
loadIndex(Index.MULTI_NESTED);
}

@Test
public void partiQL_with_array_of_nested_field_test() {
String query = "SELECT message.info, comment.data FROM " + TEST_INDEX_NESTED_TYPE;
JSONObject result = executeJdbcRequest(query);

assertEquals(5, result.getInt("total"));
verifyDataRows(result,
rows("a", "ab"),
rows("b", "aa"),
rows("c", "aa"),
rows(new JSONArray(List.of("c","a")), "ab"),
rows(new JSONArray(List.of("zz")), new JSONArray(List.of("aa", "bb"))));
}

@Test
public void partiQL_with_array_of_multi_nested_field_test() {
String query = "SELECT message.author.name, message.info FROM " + TEST_INDEX_MULTI_NESTED;
JSONObject result = executeJdbcRequest(query);

assertEquals(5, result.getInt("total"));
verifyDataRows(result,
rows("e", "a"),
rows("f", "b"),
rows("g", "c"),
rows(new JSONArray(List.of("h", "p")), new JSONArray(List.of("d","i"))),
rows(new JSONArray(List.of("yy")), new JSONArray(List.of("zz"))));
}
}
50 changes: 50 additions & 0 deletions integ-test/src/test/java/org/opensearch/sql/sql/ObjectIT.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package org.opensearch.sql.sql;

import org.json.JSONArray;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

checkstyle passes?

import org.json.JSONObject;
import org.junit.Test;
import org.opensearch.sql.legacy.SQLIntegTestCase;

import java.io.IOException;
import java.util.List;

import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_MULTI_NESTED_OBJECT;
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_OBJECT;
import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;

public class ObjectIT extends SQLIntegTestCase {
@Override
public void init() throws IOException {
loadIndex(Index.NESTED_OBJECT);
loadIndex(Index.MULTI_NESTED_OBJECT);
}

@Test
public void object_array_of_objects_test() {
String query = "SELECT message.info, comment.data FROM " + TEST_INDEX_NESTED_OBJECT;
JSONObject result = executeJdbcRequest(query);

assertEquals(5, result.getInt("total"));
verifyDataRows(result,
rows("a", "ab"),
rows("b", "aa"),
rows("c", "aa"),
rows(new JSONArray(List.of("c","a")), "ab"),
rows(new JSONArray(List.of("zz")), new JSONArray(List.of("aa", "bb"))));
}

@Test
public void object_with_array_of_multi_nested_objects_test() {
String query = "SELECT message.author.name, message.info FROM " + TEST_INDEX_MULTI_NESTED_OBJECT;
JSONObject result = executeJdbcRequest(query);

assertEquals(5, result.getInt("total"));
verifyDataRows(result,
rows("e", "a"),
rows("f", "b"),
rows("g", "c"),
rows(new JSONArray(List.of("h", "p")), new JSONArray(List.of("d","i"))),
rows(new JSONArray(List.of("yy")), new JSONArray(List.of("zz"))));
}
}
6 changes: 3 additions & 3 deletions integ-test/src/test/resources/multi_nested_objects.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{"index":{"_id":"1"}}
{"message":[{"info":"a","author":{"name": "e", "address": {"street": "bc", "number": 1}},"dayOfWeek":1}]}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we are updating the test data, maybe we could update the values to be better then one or two single letters.

{"message":{"info":"a","author":{"name": "e", "address": {"street": "bc", "number": 1}},"dayOfWeek":1}}
{"index":{"_id":"2"}}
{"message":[{"info":"b","author":{"name": "f", "address": {"street": "ab", "number": 2}},"dayOfWeek":2}]}
{"message":{"info":"b","author":{"name": "f", "address": {"street": "ab", "number": 2}},"dayOfWeek":2}}
{"index":{"_id":"3"}}
{"message":[{"info":"c","author":{"name": "g", "address": {"street": "sk", "number": 3}},"dayOfWeek":1}]}
{"message":{"info":"c","author":{"name": "g", "address": {"street": "sk", "number": 3}},"dayOfWeek":1}}
{"index":{"_id":"4"}}
{"message":[{"info":"d","author":{"name": "h", "address": {"street": "mb", "number": 4}},"dayOfWeek":4},{"info":"i","author":{"name": "p", "address": {"street": "on", "number": 5}},"dayOfWeek":5}]}
{"index":{"_id":"5"}}
Expand Down