Skip to content

Commit

Permalink
Fix long value parsing in jsonextractscalar (apache#14337)
Browse files Browse the repository at this point in the history
  • Loading branch information
bziobrowski authored Nov 12, 2024
1 parent a8a0a33 commit 013e80f
Show file tree
Hide file tree
Showing 8 changed files with 485 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
import org.apache.pinot.core.operator.ColumnContext;
import org.apache.pinot.core.operator.blocks.ValueBlock;
import org.apache.pinot.core.operator.transform.TransformResultMetadata;
import org.apache.pinot.core.util.NumberUtils;
import org.apache.pinot.core.util.NumericException;
import org.apache.pinot.spi.data.FieldSpec.DataType;
import org.apache.pinot.spi.utils.JsonUtils;

Expand Down Expand Up @@ -120,6 +122,10 @@ public TransformResultMetadata getResultMetadata() {

@Override
public int[] transformToIntValuesSV(ValueBlock valueBlock) {
if (_resultMetadata.getDataType().getStoredType() != DataType.INT) {
return super.transformToIntValuesSV(valueBlock);
}

initIntValuesSV(valueBlock.getNumDocs());
IntFunction<Object> resultExtractor = getResultExtractor(valueBlock);
int defaultValue = 0;
Expand Down Expand Up @@ -156,6 +162,9 @@ public int[] transformToIntValuesSV(ValueBlock valueBlock) {

@Override
public long[] transformToLongValuesSV(ValueBlock valueBlock) {
if (_resultMetadata.getDataType().getStoredType() != DataType.LONG) {
return super.transformToLongValuesSV(valueBlock);
}
initLongValuesSV(valueBlock.getNumDocs());
IntFunction<Object> resultExtractor = getResultExtractor(valueBlock);
long defaultValue = 0;
Expand Down Expand Up @@ -184,8 +193,11 @@ public long[] transformToLongValuesSV(ValueBlock valueBlock) {
if (result instanceof Number) {
_longValuesSV[i] = ((Number) result).longValue();
} else {
// Handle scientific notation
_longValuesSV[i] = (long) Double.parseDouble(result.toString());
try {
_longValuesSV[i] = NumberUtils.parseJsonLong(result.toString());
} catch (NumericException nfe) {
throw new NumberFormatException("For input string: \"" + result + "\"");
}
}
}
return _longValuesSV;
Expand Down
230 changes: 230 additions & 0 deletions pinot-core/src/main/java/org/apache/pinot/core/util/NumberUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.core.util;

/**
* Utility class with various number related methods.
*/
public class NumberUtils {

public static final NumericException NULL_EXCEPTION = new NumericException("Can't parse null string");
public static final NumericException EXP_EXCEPTION = new NumericException("Wrong exponent");

private static final long[] POWERS_OF_10 = new long[]{
1L,
10L,
100L,
1000L,
10000L,
100000L,
1000000L,
10000000L,
100000000L,
1000000000L,
10000000000L,
100000000000L,
1000000000000L,
10000000000000L,
100000000000000L,
1000000000000000L,
10000000000000000L,
100000000000000000L,
1000000000000000000L,
};

private NumberUtils() {
}

/**
* Parses whole input char sequence.
* Throws static, pre-allocated NumericException.
* If proper stack trace is required, caller has to catch it and throw another exception.
* @param cs char sequence to parse
* @return parsed long value
*/
public static long parseLong(CharSequence cs)
throws NumericException {
if (cs == null) {
throw NULL_EXCEPTION;
}
return parseLong(cs, 0, cs.length());
}

/**
* Parses input char sequence between given indices.
* Throws static, pre-allocated NumericException.
* If proper stack trace is required, caller has to catch it and throw another exception.
* @param cs char sequence to parse
* @param start start index (inclusive)
* @param end end index (exclusive)
* @return parsed long value
*/
public static long parseLong(CharSequence cs, int start, int end)
throws NumericException {
if (cs == null) {
throw NULL_EXCEPTION;
}

boolean negative = false;
int i = start;
long limit = -Long.MAX_VALUE;

if (end > start) {
char firstChar = cs.charAt(start);
if (firstChar < '0') { // Possible leading "+" or "-"
if (firstChar == '-') {
negative = true;
limit = Long.MIN_VALUE;
} else if (firstChar != '+') {
throw NumericException.INSTANCE;
}

if (end == start + 1) { // Cannot have lone "+" or "-"
throw NumericException.INSTANCE;
}
i++;
}
long multmin = limit / 10;
long result = 0;
while (i < end) {
// Accumulating negatively avoids surprises near MAX_VALUE
char c = cs.charAt(i++);
if (c < '0' || c > '9' || result < multmin) {
throw NumericException.INSTANCE;
}

int digit = c - '0';
result *= 10;
if (result < limit + digit) {
throw NumericException.INSTANCE;
}
result -= digit;
}
return negative ? result : -result;
} else {
throw NumericException.INSTANCE;
}
}

/**
* Parses input accepting regular long syntax plus:
* 1E1 -> 10
* 1.234 -> 1
* 1.123E1 -> 11
* @param cs - char sequence to parse
* @return parsed long value
*/
public static long parseJsonLong(CharSequence cs)
throws NumericException {
if (cs == null) {
throw NULL_EXCEPTION;
}

boolean negative = false;
int i = 0;
int len = cs.length();
long limit = -Long.MAX_VALUE;

if (len > 0) {
boolean dotFound = false;
boolean exponentFound = false;

char firstChar = cs.charAt(0);
if (firstChar < '0') { // Possible leading "+" or "-"
if (firstChar == '-') {
negative = true;
limit = Long.MIN_VALUE;
} else if (firstChar != '+') {
throw NumericException.INSTANCE;
}

if (len == 1) { // Cannot have lone "+" or "-"
throw NumericException.INSTANCE;
}
i++;
}
long multmin = limit / 10;
long result = 0;
while (i < len) {
// Accumulating negatively avoids surprises near MAX_VALUE
char c = cs.charAt(i++);
if (c < '0' || c > '9' || result < multmin) {
if (c == '.') {
//ignore the rest of sequence
dotFound = true;
break;
} else if (c == 'e' || c == 'E') {
exponentFound = true;
break;
}
throw NumericException.INSTANCE;
}

int digit = c - '0';
result *= 10;
if (result < limit + digit) {
throw NumericException.INSTANCE;
}
result -= digit;
}

if (dotFound) {
//scan rest of the string to make sure it's only digits
while (i < len) {
char c = cs.charAt(i++);
if (c < '0' || c > '9') {
if ((c | 32) == 'e') {
exponentFound = true;
break;
} else {
throw NumericException.INSTANCE;
}
}
}
}

if (exponentFound) {
if (dotFound) {
try { // TODO: remove toString()
return (long) Double.parseDouble(cs.toString());
} catch (NumberFormatException ne) {
throw NumericException.INSTANCE;
}
}

long exp;
try {
exp = parseLong(cs, i, len);
} catch (NumericException nfe) {
throw EXP_EXCEPTION;
}

if (exp < 0 || exp > POWERS_OF_10.length) {
throw EXP_EXCEPTION;
}

return (negative ? result : -result) * POWERS_OF_10[(int) exp];
}

return negative ? result : -result;
} else {
throw NumericException.INSTANCE;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.core.util;

public class NumericException extends Exception {

public static final NumericException INSTANCE = new NumericException();

public NumericException() {
}

public NumericException(String message) {
super(message);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,13 @@ public class JsonExtractScalarTransformFunctionTest extends BaseTransformFunctio
public void testJsonPathTransformFunction(String expressionStr, DataType resultsDataType, boolean isSingleValue) {
ExpressionContext expression = RequestContextUtils.getExpression(expressionStr);
TransformFunction transformFunction = TransformFunctionFactory.get(expression, _dataSourceMap);

Assert.assertTrue(transformFunction instanceof JsonExtractScalarTransformFunction);
Assert.assertEquals(transformFunction.getName(), JsonExtractScalarTransformFunction.FUNCTION_NAME);

Assert.assertEquals(transformFunction.getResultMetadata().getDataType(), resultsDataType);
Assert.assertEquals(transformFunction.getResultMetadata().isSingleValue(), isSingleValue);

if (isSingleValue) {
switch (resultsDataType) {
case INT:
Expand Down Expand Up @@ -105,6 +107,49 @@ public void testJsonPathTransformFunction(String expressionStr, DataType results
}
}

@Test
public void testExtractWithScalarTypeMismatch() {
ExpressionContext expression = RequestContextUtils.getExpression("jsonExtractScalar(json,'$.stringSV','DOUBLE')");
TransformFunction transformFunction = TransformFunctionFactory.get(expression, _dataSourceMap);

Assert.assertTrue(transformFunction instanceof JsonExtractScalarTransformFunction);
Assert.assertEquals(transformFunction.getName(), JsonExtractScalarTransformFunction.FUNCTION_NAME);

long[] longValues = transformFunction.transformToLongValuesSV(_projectionBlock);
for (int i = 0; i < NUM_ROWS; i++) {
Assert.assertEquals(longValues[i], (long) Double.parseDouble(_stringSVValues[i]));
}

int[] intValues = transformFunction.transformToIntValuesSV(_projectionBlock);
for (int i = 0; i < NUM_ROWS; i++) {
Assert.assertEquals(intValues[i], (int) Double.parseDouble(_stringSVValues[i]));
}

float[] floatValues = transformFunction.transformToFloatValuesSV(_projectionBlock);
for (int i = 0; i < NUM_ROWS; i++) {
Assert.assertEquals(floatValues[i], Float.parseFloat(_stringSVValues[i]));
}

BigDecimal[] bdValues = transformFunction.transformToBigDecimalValuesSV(_projectionBlock);
for (int i = 0; i < NUM_ROWS; i++) {
Assert.assertEquals(bdValues[i], new BigDecimal(_stringSVValues[i]));
}
}

@Test
public void testExtractWithScalarStringToLong() {
ExpressionContext expression = RequestContextUtils.getExpression("jsonExtractScalar(json,'$.stringSV','LONG')");
TransformFunction transformFunction = TransformFunctionFactory.get(expression, _dataSourceMap);

Assert.assertTrue(transformFunction instanceof JsonExtractScalarTransformFunction);
Assert.assertEquals(transformFunction.getName(), JsonExtractScalarTransformFunction.FUNCTION_NAME);

long[] longValues = transformFunction.transformToLongValuesSV(_projectionBlock);
for (int i = 0; i < NUM_ROWS; i++) {
Assert.assertEquals(longValues[i], (long) Double.parseDouble(_stringSVValues[i]));
}
}

@DataProvider(name = "testJsonPathTransformFunction")
public Object[][] testJsonPathTransformFunctionDataProvider() {
List<Object[]> testArguments = new ArrayList<>();
Expand Down
Loading

0 comments on commit 013e80f

Please sign in to comment.