Skip to content

Commit

Permalink
Add support for node-commons, some udfs and export、import-csv
Browse files Browse the repository at this point in the history
  • Loading branch information
JackieTien97 committed May 28, 2024
1 parent 06c8c09 commit 312e749
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,8 @@ public static void writeCsvFile(
field -> {
String fieldStringValue = field.getStringValue();
if (!"null".equals(field.getStringValue())) {
if (field.getDataType() == TSDataType.TEXT
if ((field.getDataType() == TSDataType.TEXT
|| field.getDataType() == TSDataType.STRING)
&& !fieldStringValue.startsWith("root.")) {
fieldStringValue = "\"" + fieldStringValue + "\"";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,7 @@
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.apache.tsfile.enums.TSDataType.BOOLEAN;
import static org.apache.tsfile.enums.TSDataType.DOUBLE;
import static org.apache.tsfile.enums.TSDataType.FLOAT;
import static org.apache.tsfile.enums.TSDataType.INT32;
import static org.apache.tsfile.enums.TSDataType.INT64;
import static org.apache.tsfile.enums.TSDataType.STRING;
import static org.apache.tsfile.enums.TSDataType.TEXT;

public class ImportData extends AbstractDataTool {
Expand Down Expand Up @@ -126,7 +122,7 @@ public class ImportData extends AbstractDataTool {
private static final String DATATYPE_DOUBLE = "double";
private static final String DATATYPE_TIMESTAMP = "timestamp";
private static final String DATATYPE_DATE = "date";
private static final String DATATYPE_BYTEA = "bytea";
private static final String DATATYPE_BLOB = "blob";
private static final String DATATYPE_NAN = "NaN";
private static final String DATATYPE_TEXT = "text";

Expand All @@ -145,7 +141,7 @@ public class ImportData extends AbstractDataTool {
TYPE_INFER_KEY_DICT.put(DATATYPE_DOUBLE, TSDataType.DOUBLE);
TYPE_INFER_KEY_DICT.put(DATATYPE_TIMESTAMP, TSDataType.TIMESTAMP);
TYPE_INFER_KEY_DICT.put(DATATYPE_DATE, TSDataType.TIMESTAMP);
TYPE_INFER_KEY_DICT.put(DATATYPE_BYTEA, TSDataType.TEXT);
TYPE_INFER_KEY_DICT.put(DATATYPE_BLOB, TSDataType.TEXT);
TYPE_INFER_KEY_DICT.put(DATATYPE_NAN, TSDataType.DOUBLE);
}

Expand All @@ -159,7 +155,7 @@ public class ImportData extends AbstractDataTool {
TYPE_INFER_VALUE_DICT.put(DATATYPE_DOUBLE, TSDataType.DOUBLE);
TYPE_INFER_VALUE_DICT.put(DATATYPE_TIMESTAMP, TSDataType.TIMESTAMP);
TYPE_INFER_VALUE_DICT.put(DATATYPE_DATE, TSDataType.TIMESTAMP);
TYPE_INFER_VALUE_DICT.put(DATATYPE_BYTEA, TSDataType.TEXT);
TYPE_INFER_VALUE_DICT.put(DATATYPE_BLOB, TSDataType.TEXT);
TYPE_INFER_VALUE_DICT.put(DATATYPE_TEXT, TSDataType.TEXT);
}

Expand Down Expand Up @@ -969,7 +965,7 @@ private static TSDataType getType(String typeStr) {
*/
private static TSDataType typeInfer(String strValue) {
if (strValue.contains("\"")) {
return TEXT;
return strValue.length() <= 512 + 2 ? STRING : TEXT;
}
if (isBoolean(strValue)) {
return TYPE_INFER_KEY_DICT.get(DATATYPE_BOOLEAN);
Expand All @@ -987,8 +983,10 @@ private static TSDataType typeInfer(String strValue) {
// "NaN" is returned if the NaN Literal is given in Parser
} else if (DATATYPE_NAN.equals(strValue)) {
return TYPE_INFER_KEY_DICT.get(DATATYPE_NAN);
} else if (strValue.length() <= 512) {
return STRING;
} else {
return TSDataType.TEXT;
return TEXT;
}
}

Expand Down Expand Up @@ -1022,6 +1020,7 @@ private static Object typeTrans(String value, TSDataType type) {
try {
switch (type) {
case TEXT:
case STRING:
if (value.startsWith("\"") && value.endsWith("\"")) {
return value.substring(1, value.length() - 1);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

import org.apache.iotdb.db.exception.sql.SemanticException;

import com.google.common.base.CharMatcher;
import org.apache.tsfile.enums.TSDataType;
import org.apache.tsfile.utils.Binary;
import org.apache.tsfile.utils.ReadWriteIOUtils;
Expand All @@ -30,38 +29,17 @@
import java.nio.ByteBuffer;
import java.util.Arrays;

import static java.util.Locale.ENGLISH;
import static java.util.Objects.requireNonNull;
import static org.apache.iotdb.commons.utils.BlobUtils.parseBlobString;

public class BinaryLiteral extends Literal {
// the grammar could possibly include whitespace in the value it passes to us
private static final CharMatcher WHITESPACE_MATCHER = CharMatcher.whitespace();
private static final CharMatcher HEX_DIGIT_MATCHER =
CharMatcher.inRange('A', 'F').or(CharMatcher.inRange('0', '9')).precomputed();

private final byte[] values;

public BinaryLiteral(String value) {
requireNonNull(value, "value is null");
if (value.length() < 3 || !value.startsWith("X'") || !value.endsWith("'")) {
throw new SemanticException("Binary literal must be in the form X'hexstring'");
}
value = value.substring(2, value.length() - 1);
String hexString = WHITESPACE_MATCHER.removeFrom(value).toUpperCase(ENGLISH);
if (!HEX_DIGIT_MATCHER.matchesAllOf(hexString)) {
throw new SemanticException("Binary literal can only contain hexadecimal digits");
}
if (hexString.length() % 2 != 0) {
throw new SemanticException("Binary literal must contain an even number of digits");
}
int len = hexString.length();
this.values = new byte[len / 2];

for (int i = 0; i < len; i += 2) {
this.values[i / 2] =
(byte)
((Character.digit(hexString.charAt(i), 16) << 4)
+ Character.digit(hexString.charAt(i + 1), 16));
try {
this.values = parseBlobString(value);
} catch (IllegalArgumentException e) {
throw new SemanticException(e.getMessage());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import org.apache.iotdb.commons.udf.utils.UDFBinaryTransformer;
import org.apache.iotdb.commons.udf.utils.UDFDataTypeTransformer;
import org.apache.iotdb.commons.utils.BlobUtils;
import org.apache.iotdb.udf.api.UDTF;
import org.apache.iotdb.udf.api.access.Row;
import org.apache.iotdb.udf.api.collector.PointCollector;
Expand All @@ -35,6 +36,7 @@
import org.apache.tsfile.enums.TSDataType;
import org.apache.tsfile.utils.Binary;
import org.apache.tsfile.utils.BytesUtils;
import org.apache.tsfile.utils.DateUtils;

import java.io.IOException;
import java.util.HashSet;
Expand All @@ -46,11 +48,15 @@ public class UDTFConst implements UDTF {

static {
VALID_TYPES.add(TSDataType.INT32.name());
VALID_TYPES.add(TSDataType.DATE.name());
VALID_TYPES.add(TSDataType.INT64.name());
VALID_TYPES.add(TSDataType.TIMESTAMP.name());
VALID_TYPES.add(TSDataType.FLOAT.name());
VALID_TYPES.add(TSDataType.DOUBLE.name());
VALID_TYPES.add(TSDataType.BOOLEAN.name());
VALID_TYPES.add(TSDataType.TEXT.name());
VALID_TYPES.add(TSDataType.STRING.name());
VALID_TYPES.add(TSDataType.BLOB.name());
}

private TSDataType dataType;
Expand Down Expand Up @@ -80,7 +86,11 @@ public void beforeStart(UDFParameters parameters, UDTFConfigurations configurati
case INT32:
intValue = Integer.parseInt(parameters.getString("value"));
break;
case DATE:
intValue = DateUtils.parseDateExpressionToInt(parameters.getString("value"));
break;
case INT64:
case TIMESTAMP:
longValue = Long.parseLong(parameters.getString("value"));
break;
case FLOAT:
Expand All @@ -93,8 +103,12 @@ public void beforeStart(UDFParameters parameters, UDTFConfigurations configurati
booleanValue = Boolean.parseBoolean(parameters.getString("value"));
break;
case TEXT:
case STRING:
binaryValue = BytesUtils.valueOf(parameters.getString("value"));
break;
case BLOB:
binaryValue = new Binary(BlobUtils.parseBlobString(parameters.getString("value")));
break;
default:
throw new UnsupportedOperationException();
}
Expand All @@ -108,9 +122,11 @@ public void beforeStart(UDFParameters parameters, UDTFConfigurations configurati
public void transform(Row row, PointCollector collector) throws Exception {
switch (dataType) {
case INT32:
case DATE:
collector.putInt(row.getTime(), intValue);
break;
case INT64:
case TIMESTAMP:
collector.putLong(row.getTime(), longValue);
break;
case FLOAT:
Expand All @@ -123,6 +139,8 @@ public void transform(Row row, PointCollector collector) throws Exception {
collector.putBoolean(row.getTime(), booleanValue);
break;
case TEXT:
case STRING:
case BLOB:
collector.putBinary(row.getTime(), UDFBinaryTransformer.transformToUDFBinary(binaryValue));
break;
default:
Expand All @@ -134,8 +152,10 @@ public void transform(Row row, PointCollector collector) throws Exception {
public Object transform(Row row) throws IOException {
switch (dataType) {
case INT32:
case DATE:
return intValue;
case INT64:
case TIMESTAMP:
return longValue;
case FLOAT:
return floatValue;
Expand All @@ -144,6 +164,8 @@ public Object transform(Row row) throws IOException {
case BOOLEAN:
return booleanValue;
case TEXT:
case STRING:
case BLOB:
return UDFBinaryTransformer.transformToUDFBinary(binaryValue);
default:
throw new UnsupportedOperationException();
Expand All @@ -156,6 +178,7 @@ public void transform(Column[] columns, ColumnBuilder builder) throws Exception

switch (dataType) {
case INT32:
case DATE:
for (int i = 0; i < count; i++) {
boolean hasWritten = false;
for (int j = 0; j < columns.length - 1; j++) {
Expand All @@ -171,6 +194,7 @@ public void transform(Column[] columns, ColumnBuilder builder) throws Exception
}
return;
case INT64:
case TIMESTAMP:
for (int i = 0; i < count; i++) {
boolean hasWritten = false;
for (int j = 0; j < columns.length - 1; j++) {
Expand Down Expand Up @@ -231,6 +255,8 @@ public void transform(Column[] columns, ColumnBuilder builder) throws Exception
}
return;
case TEXT:
case STRING:
case BLOB:
for (int i = 0; i < count; i++) {
boolean hasWritten = false;
for (int j = 0; j < columns.length - 1; j++) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iotdb.commons.utils;

import com.google.common.base.CharMatcher;

import static java.util.Locale.ENGLISH;
import static java.util.Objects.requireNonNull;

public class BlobUtils {

private BlobUtils() {}

// the grammar could possibly include whitespace in the value it passes to us
private static final CharMatcher WHITESPACE_MATCHER = CharMatcher.whitespace();
private static final CharMatcher HEX_DIGIT_MATCHER =
CharMatcher.inRange('A', 'F').or(CharMatcher.inRange('0', '9')).precomputed();

public static byte[] parseBlobString(String value) {
requireNonNull(value, "value is null");
if (value.length() < 3 || !value.startsWith("X'") || !value.endsWith("'")) {
throw new IllegalArgumentException("Binary literal must be in the form X'hexstring'");
}
value = value.substring(2, value.length() - 1);
String hexString = WHITESPACE_MATCHER.removeFrom(value).toUpperCase(ENGLISH);
if (!HEX_DIGIT_MATCHER.matchesAllOf(hexString)) {
throw new IllegalArgumentException("Binary literal can only contain hexadecimal digits");
}
if (hexString.length() % 2 != 0) {
throw new IllegalArgumentException("Binary literal must contain an even number of digits");
}
int len = hexString.length();
byte[] values = new byte[len / 2];

for (int i = 0; i < len; i += 2) {
values[i / 2] =
(byte)
((Character.digit(hexString.charAt(i), 16) << 4)
+ Character.digit(hexString.charAt(i + 1), 16));
}
return values;
}
}

0 comments on commit 312e749

Please sign in to comment.